/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef STDC_HEADERS void HPL_plindx1 ( HPL_T_panel * PANEL, const int K, const int * IPID, int * IPA, int * LINDXA, int * LINDXAU, int * IPLEN, int * IPMAP, int * IPMAPM1, int * PERMU, int * IWORK ) #else void HPL_plindx1 ( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) HPL_T_panel * PANEL; const int K; const int * IPID; int * IPA; int * LINDXA; int * LINDXAU; int * IPLEN; int * IPMAP; int * IPMAPM1; int * PERMU; int * IWORK; #endif { /* * Purpose * ======= * * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing * the local source and final destination position resulting from the * application of row interchanges. In addition, this function computes * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic * mapping information for the spreading phase. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * K (global input) const int * On entry, K specifies the number of entries in IPID. K is at * least 2*N, and at most 4*N. * * IPID (global input) const int * * On entry, IPID is an array of length K. The first K entries * of that array contain the src and final destination resulting * from the application of the interchanges. * * IPA (global output) int * * On exit, IPA specifies the number of rows that the current * process row has that either belong to U or should be swapped * with remote rows of A. * * LINDXA (global output) int * * On entry, LINDXA is an array of dimension 2*N. On exit, this * array contains the local indexes of the rows of A I have that * should be copied into U. * * LINDXAU (global output) int * * On exit, LINDXAU is an array of dimension 2*N. On exit, this * array contains the local destination information encoded as * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) * of A should be locally copied into A(-LINDXAU(k),:). * * IPLEN (global output) int * * On entry, IPLEN is an array of dimension NPROW + 1. On exit, * this array is such that IPLEN[i] is the number of rows of A * in the processes before process IPMAP[i] after the sort * with the convention that IPLEN[nprow] is the total number of * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the * local number of rows of A that should be moved to the process * IPMAP[i]. IPLEN is such that the number of rows of the source * process row can be computed as IPLEN[1] - IPLEN[0], and the * remaining entries of this array are sorted so that the * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. * * IPMAP (global output) int * * On entry, IPMAP is an array of dimension NPROW. On exit, this * array contains the logarithmic mapping of the processes. In * other words, IPMAP[myrow] is the corresponding sorted process * coordinate. * * IPMAPM1 (global output) int * * On entry, IPMAPM1 is an array of dimension NPROW. On exit, * this array contains the inverse of the logarithmic mapping * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in * [0.. NPROCS) * * PERMU (global output) int * * On entry, PERMU is an array of dimension JB. On exit, PERMU * contains a sequence of permutations, that should be applied * in increasing order to permute in place the row panel U. * * IWORK (workspace) int * * On entry, IWORK is a workarray of dimension 2*JB. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int * iwork; int dst, dstrow, fndd, i, ia, icurrow, il, ip, ipU, iroff, j, jb, myrow, nb, nprow, src, srcrow; /* .. * .. Executable Statements .. */ /* * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 */ HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); /* * Compute the local arrays LINDXA and LINDXAU containing the local * source and final destination position resulting from the application * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA * elsewhere and PERMU in every process. */ myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; iroff = PANEL->ii; icurrow = PANEL->prow; iwork = IWORK + jb; if( myrow == icurrow ) { for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) { src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); if( srcrow == icurrow ) { dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); LINDXA[ip] = il - iroff; if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) { PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; IPLEN[il]++; ipU++; } else if( dstrow != icurrow ) { j = 0; do { fndd = ( dst == IPID[j] ); j+=2; } while( !fndd && ( j < K ) ); PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; IPLEN[il]++; ipU++; } else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) { Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); LINDXAU[ip] = iroff - il; } ip++; } } *IPA = ip; } else { for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) { src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); /* * LINDXA[i] is the local index of the row of A that belongs into U */ if( myrow == dstrow ) { Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); LINDXA[ip] = il - iroff; ip++; } /* * iwork[i] is the local (current) position index in U * PERMU[i] is the local (final) destination index in U */ if( srcrow == icurrow ) { if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) { PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; } else if( dstrow != icurrow ) { j = 0; do { fndd = ( dst == IPID[j] ); j+=2; } while( !fndd && ( j < K ) ); PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; } } } *IPA = 0; } /* * Simplify iwork and PERMU, return in PERMU the sequence of permutation * that need to be apply to U after it has been broadcast. */ HPL_perm( jb, iwork, PERMU, IWORK ); /* * Reset IPLEN to its correct value */ for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; IPLEN[0] = 0; /* * End of HPL_plindx1 */ }