HPL_dlaswp05N.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. * ---------------------------------------------------------------------
  46. */
  47. /*
  48. * Include files
  49. */
  50. #include "hpl.h"
  51. /*
  52. * Define default value for unrolling factor
  53. */
  54. #ifndef HPL_LASWP05N_DEPTH
  55. #define HPL_LASWP05N_DEPTH 32
  56. #define HPL_LASWP05N_LOG2_DEPTH 5
  57. #endif
  58. #ifdef STDC_HEADERS
  59. void HPL_dlaswp05N
  60. (
  61. const int M,
  62. const int N,
  63. double * A,
  64. const int LDA,
  65. const double * U,
  66. const int LDU,
  67. const int * LINDXA,
  68. const int * LINDXAU
  69. )
  70. #else
  71. void HPL_dlaswp05N
  72. ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
  73. const int M;
  74. const int N;
  75. double * A;
  76. const int LDA;
  77. const double * U;
  78. const int LDU;
  79. const int * LINDXA;
  80. const int * LINDXAU;
  81. #endif
  82. {
  83. /*
  84. * Purpose
  85. * =======
  86. *
  87. * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of
  88. * A at positions indicated by LINDXA.
  89. *
  90. * Arguments
  91. * =========
  92. *
  93. * M (local input) const int
  94. * On entry, M specifies the number of rows of U that should be
  95. * copied into A. M must be at least zero.
  96. *
  97. * N (local input) const int
  98. * On entry, N specifies the length of the rows of U that should
  99. * be copied into A. N must be at least zero.
  100. *
  101. * A (local output) double *
  102. * On entry, A points to an array of dimension (LDA,N). On exit,
  103. * the rows of this array specified by LINDXA are replaced by
  104. * rows of U indicated by LINDXAU.
  105. *
  106. * LDA (local input) const int
  107. * On entry, LDA specifies the leading dimension of the array A.
  108. * LDA must be at least MAX(1,M).
  109. *
  110. * U (local input/output) const double *
  111. * On entry, U points to an array of dimension (LDU,N). This
  112. * array contains the rows that are to be copied into A.
  113. *
  114. * LDU (local input) const int
  115. * On entry, LDU specifies the leading dimension of the array U.
  116. * LDU must be at least MAX(1,M).
  117. *
  118. * LINDXA (local input) const int *
  119. * On entry, LINDXA is an array of dimension M that contains the
  120. * local row indexes of A that should be copied from U.
  121. *
  122. * LINDXAU (local input) const int *
  123. * On entry, LINDXAU is an array of dimension M that contains
  124. * the local row indexes of U that should be copied in A.
  125. *
  126. * ---------------------------------------------------------------------
  127. */
  128. /*
  129. * .. Local Variables ..
  130. */
  131. const double * U0 = U, * u0;
  132. double * a0;
  133. const int incA = (int)( (unsigned int)(LDA) <<
  134. HPL_LASWP05N_LOG2_DEPTH ),
  135. incU = (int)( (unsigned int)(LDU) <<
  136. HPL_LASWP05N_LOG2_DEPTH );
  137. int nr, nu;
  138. register int i, j;
  139. /* ..
  140. * .. Executable Statements ..
  141. */
  142. if( ( M <= 0 ) || ( N <= 0 ) ) return;
  143. nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
  144. HPL_LASWP05N_LOG2_DEPTH ) );
  145. for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
  146. {
  147. for( i = 0; i < M; i++ )
  148. {
  149. a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
  150. *a0 = *u0; a0 += LDA; u0 += LDU;
  151. #if ( HPL_LASWP05N_DEPTH > 1 )
  152. *a0 = *u0; a0 += LDA; u0 += LDU;
  153. #endif
  154. #if ( HPL_LASWP05N_DEPTH > 2 )
  155. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  156. #endif
  157. #if ( HPL_LASWP05N_DEPTH > 4 )
  158. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  159. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  160. #endif
  161. #if ( HPL_LASWP05N_DEPTH > 8 )
  162. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  163. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  164. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  165. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  166. #endif
  167. #if ( HPL_LASWP05N_DEPTH > 16 )
  168. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  169. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  170. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  171. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  172. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  173. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  174. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  175. *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
  176. #endif
  177. }
  178. }
  179. if( nr )
  180. {
  181. for( i = 0; i < M; i++ )
  182. {
  183. a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
  184. for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
  185. }
  186. }
  187. /*
  188. * End of HPL_dlaswp05N
  189. */
  190. }