HPL_pdinfo.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. * ---------------------------------------------------------------------
  46. */
  47. /*
  48. * Include files
  49. */
  50. #include "hpl.h"
  51. #include "RCCE.h"
  52. int MPI_Send(void *, int, int, int, int, RCCE_COMM);
  53. int MPI_Recv(void *, int, int, int, int, RCCE_COMM, int *);
  54. int MPI_Comm_size(RCCE_COMM, int *);
  55. int MPI_Comm_rank(RCCE_COMM, int *);
  56. int MPI_Init(int *, char ***);
  57. int MPI_Finalize(void);
  58. #ifdef STDC_HEADERS
  59. void HPL_pdinfo
  60. (
  61. HPL_T_test * TEST,
  62. int * NS,
  63. int * N,
  64. int * NBS,
  65. int * NB,
  66. HPL_T_ORDER * PMAPPIN,
  67. int * NPQS,
  68. int * P,
  69. int * Q,
  70. int * NPFS,
  71. HPL_T_FACT * PF,
  72. int * NBMS,
  73. int * NBM,
  74. int * NDVS,
  75. int * NDV,
  76. int * NRFS,
  77. HPL_T_FACT * RF,
  78. int * NTPS,
  79. HPL_T_TOP * TP,
  80. int * NDHS,
  81. int * DH,
  82. HPL_T_SWAP * FSWAP,
  83. int * TSWAP,
  84. int * L1NOTRAN,
  85. int * UNOTRAN,
  86. int * EQUIL,
  87. int * ALIGN
  88. )
  89. #else
  90. void HPL_pdinfo
  91. ( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
  92. HPL_T_test * TEST;
  93. int * NS;
  94. int * N;
  95. int * NBS;
  96. int * NB;
  97. HPL_T_ORDER * PMAPPIN;
  98. int * NPQS;
  99. int * P;
  100. int * Q;
  101. int * NPFS;
  102. HPL_T_FACT * PF;
  103. int * NBMS;
  104. int * NBM;
  105. int * NDVS;
  106. int * NDV;
  107. int * NRFS;
  108. HPL_T_FACT * RF;
  109. int * NTPS;
  110. HPL_T_TOP * TP;
  111. int * NDHS;
  112. int * DH;
  113. HPL_T_SWAP * FSWAP;
  114. int * TSWAP;
  115. int * L1NOTRAN;
  116. int * UNOTRAN;
  117. int * EQUIL;
  118. int * ALIGN;
  119. #endif
  120. {
  121. /*
  122. * Purpose
  123. * =======
  124. *
  125. * HPL_pdinfo reads the startup information for the various tests and
  126. * transmits it to all processes.
  127. *
  128. * Arguments
  129. * =========
  130. *
  131. * TEST (global output) HPL_T_test *
  132. * On entry, TEST points to a testing data structure. On exit,
  133. * the fields of this data structure are initialized as follows:
  134. * TEST->outfp specifies the output file where the results will
  135. * be printed. It is only defined and used by the process 0 of
  136. * the grid. TEST->thrsh specifies the threshhold value for the
  137. * test ratio. TEST->epsil is the relative machine precision of
  138. * the distributed computer. Finally the test counters, kfail,
  139. * kpass, kskip, ktest are initialized to zero.
  140. *
  141. * NS (global output) int *
  142. * On exit, NS specifies the number of different problem sizes
  143. * to be tested. NS is less than or equal to HPL_MAX_PARAM.
  144. *
  145. * N (global output) int *
  146. * On entry, N is an array of dimension HPL_MAX_PARAM. On exit,
  147. * the first NS entries of this array contain the problem sizes
  148. * to run the code with.
  149. *
  150. * NBS (global output) int *
  151. * On exit, NBS specifies the number of different distribution
  152. * blocking factors to be tested. NBS must be less than or equal
  153. * to HPL_MAX_PARAM.
  154. *
  155. * NB (global output) int *
  156. * On exit, PMAPPIN specifies the process mapping onto the no-
  157. * des of the MPI machine configuration. PMAPPIN defaults to
  158. * row-major ordering.
  159. *
  160. * PMAPPIN (global output) HPL_T_ORDER *
  161. * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
  162. * the first NBS entries of this array contain the values of the
  163. * various distribution blocking factors, to run the code with.
  164. *
  165. * NPQS (global output) int *
  166. * On exit, NPQS specifies the number of different values that
  167. * can be used for P and Q, i.e., the number of process grids to
  168. * run the code with. NPQS must be less than or equal to
  169. * HPL_MAX_PARAM.
  170. *
  171. * P (global output) int *
  172. * On entry, P is an array of dimension HPL_MAX_PARAM. On exit,
  173. * the first NPQS entries of this array contain the values of P,
  174. * the number of process rows of the NPQS grids to run the code
  175. * with.
  176. *
  177. * Q (global output) int *
  178. * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit,
  179. * the first NPQS entries of this array contain the values of Q,
  180. * the number of process columns of the NPQS grids to run the
  181. * code with.
  182. *
  183. * NPFS (global output) int *
  184. * On exit, NPFS specifies the number of different values that
  185. * can be used for PF : the panel factorization algorithm to run
  186. * the code with. NPFS is less than or equal to HPL_MAX_PARAM.
  187. *
  188. * PF (global output) HPL_T_FACT *
  189. * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
  190. * the first NPFS entries of this array contain the various
  191. * panel factorization algorithms to run the code with.
  192. *
  193. * NBMS (global output) int *
  194. * On exit, NBMS specifies the number of various recursive
  195. * stopping criteria to be tested. NBMS must be less than or
  196. * equal to HPL_MAX_PARAM.
  197. *
  198. * NBM (global output) int *
  199. * On entry, NBM is an array of dimension HPL_MAX_PARAM. On
  200. * exit, the first NBMS entries of this array contain the values
  201. * of the various recursive stopping criteria to be tested.
  202. *
  203. * NDVS (global output) int *
  204. * On exit, NDVS specifies the number of various numbers of
  205. * panels in recursion to be tested. NDVS is less than or equal
  206. * to HPL_MAX_PARAM.
  207. *
  208. * NDV (global output) int *
  209. * On entry, NDV is an array of dimension HPL_MAX_PARAM. On
  210. * exit, the first NDVS entries of this array contain the values
  211. * of the various numbers of panels in recursion to be tested.
  212. *
  213. * NRFS (global output) int *
  214. * On exit, NRFS specifies the number of different values that
  215. * can be used for RF : the recursive factorization algorithm to
  216. * be tested. NRFS is less than or equal to HPL_MAX_PARAM.
  217. *
  218. * RF (global output) HPL_T_FACT *
  219. * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
  220. * the first NRFS entries of this array contain the various
  221. * recursive factorization algorithms to run the code with.
  222. *
  223. * NTPS (global output) int *
  224. * On exit, NTPS specifies the number of different values that
  225. * can be used for the broadcast topologies to be tested. NTPS
  226. * is less than or equal to HPL_MAX_PARAM.
  227. *
  228. * TP (global output) HPL_T_TOP *
  229. * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
  230. * the first NTPS entries of this array contain the various
  231. * broadcast (along rows) topologies to run the code with.
  232. *
  233. * NDHS (global output) int *
  234. * On exit, NDHS specifies the number of different values that
  235. * can be used for the lookahead depths to be tested. NDHS is
  236. * less than or equal to HPL_MAX_PARAM.
  237. *
  238. * DH (global output) int *
  239. * On entry, DH is an array of dimension HPL_MAX_PARAM. On
  240. * exit, the first NDHS entries of this array contain the values
  241. * of lookahead depths to run the code with. Such a value is at
  242. * least 0 (no-lookahead) or greater than zero.
  243. *
  244. * FSWAP (global output) HPL_T_SWAP *
  245. * On exit, FSWAP specifies the swapping algorithm to be used in
  246. * all tests.
  247. *
  248. * TSWAP (global output) int *
  249. * On exit, TSWAP specifies the swapping threshold as a number
  250. * of columns when the mixed swapping algorithm was chosen.
  251. *
  252. * L1NOTRA (global output) int *
  253. * On exit, L1NOTRAN specifies whether the upper triangle of the
  254. * panels of columns should be stored in no-transposed form
  255. * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
  256. *
  257. * UNOTRAN (global output) int *
  258. * On exit, UNOTRAN specifies whether the panels of rows should
  259. * be stored in no-transposed form (UNOTRAN=1) or transposed
  260. * form (UNOTRAN=0) during their broadcast.
  261. *
  262. * EQUIL (global output) int *
  263. * On exit, EQUIL specifies whether equilibration during the
  264. * swap-broadcast of the panel of rows should be performed
  265. * (EQUIL=1) or not (EQUIL=0).
  266. *
  267. * ALIGN (global output) int *
  268. * On exit, ALIGN specifies the alignment of the dynamically
  269. * allocated buffers in double precision words. ALIGN is greater
  270. * than zero.
  271. *
  272. * ---------------------------------------------------------------------
  273. */
  274. /*
  275. * .. Local Variables ..
  276. */
  277. char file[HPL_LINE_MAX], line[HPL_LINE_MAX],
  278. auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
  279. FILE * infp;
  280. int * iwork = NULL;
  281. char * lineptr;
  282. int error=0, fid, i, j, lwork, maxp, nprocs,
  283. rank, size;
  284. /* ..
  285. * .. Executable Statements ..
  286. */
  287. MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  288. MPI_Comm_size( MPI_COMM_WORLD, &size );
  289. /*
  290. * Initialize the TEST data structure with default values
  291. */
  292. TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
  293. TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
  294. /*
  295. * Process 0 reads the input data, broadcasts to other processes and
  296. * writes needed information to TEST->outfp.
  297. */
  298. if( rank == 0 )
  299. {
  300. /*
  301. * Open file and skip data file header
  302. */
  303. if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
  304. {
  305. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  306. "cannot open file HPL.dat" );
  307. error = 1; goto label_error;
  308. }
  309. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  310. (void) fgets( auth, HPL_LINE_MAX - 2, infp );
  311. /*
  312. * Read name and unit number for summary output file
  313. */
  314. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  315. (void) sscanf( line, "%s", file );
  316. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  317. (void) sscanf( line, "%s", num );
  318. fid = atoi( num );
  319. if ( fid == 6 ) TEST->outfp = stdout;
  320. else if( fid == 7 ) TEST->outfp = stderr;
  321. else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
  322. {
  323. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
  324. file );
  325. error = 1; goto label_error;
  326. }
  327. /*
  328. * Read and check the parameter values for the tests.
  329. *
  330. * Problem size (>=0) (N)
  331. */
  332. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  333. (void) sscanf( line, "%s", num ); *NS = atoi( num );
  334. if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
  335. {
  336. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
  337. "Number of values of N is less than 1 or greater than",
  338. HPL_MAX_PARAM );
  339. error = 1; goto label_error;
  340. }
  341. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  342. for( i = 0; i < *NS; i++ )
  343. {
  344. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  345. if( ( N[ i ] = atoi( num ) ) < 0 )
  346. {
  347. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  348. "Value of N less than 0" );
  349. error = 1; goto label_error;
  350. }
  351. }
  352. /*
  353. * Block size (>=1) (NB)
  354. */
  355. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  356. (void) sscanf( line, "%s", num ); *NBS = atoi( num );
  357. if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
  358. {
  359. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  360. "Number of values of NB is less than 1 or",
  361. "greater than", HPL_MAX_PARAM );
  362. error = 1; goto label_error;
  363. }
  364. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  365. for( i = 0; i < *NBS; i++ )
  366. {
  367. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  368. if( ( NB[ i ] = atoi( num ) ) < 1 )
  369. {
  370. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  371. "Value of NB less than 1" );
  372. error = 1; goto label_error;
  373. }
  374. }
  375. /*
  376. * Process grids, mapping, (>=1) (P, Q)
  377. */
  378. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  379. (void) sscanf( line, "%s", num );
  380. *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
  381. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  382. (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
  383. if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
  384. {
  385. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  386. "Number of values of grids is less",
  387. "than 1 or greater than", HPL_MAX_PARAM );
  388. error = 1; goto label_error;
  389. }
  390. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  391. for( i = 0; i < *NPQS; i++ )
  392. {
  393. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  394. if( ( P[ i ] = atoi( num ) ) < 1 )
  395. {
  396. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  397. "Value of P less than 1" );
  398. error = 1; goto label_error;
  399. }
  400. }
  401. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  402. for( i = 0; i < *NPQS; i++ )
  403. {
  404. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  405. if( ( Q[ i ] = atoi( num ) ) < 1 )
  406. {
  407. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  408. "Value of Q less than 1" );
  409. error = 1; goto label_error;
  410. }
  411. }
  412. /*
  413. * Check for enough processes in machine configuration
  414. */
  415. // maxp = 0;
  416. // for( i = 0; i < *NPQS; i++ )
  417. // { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
  418. // if( maxp > size )
  419. // {
  420. // HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  421. // "Need at least %d processes for these tests", maxp );
  422. // error = 1; goto label_error;
  423. // }
  424. /* IN RCCE WE INSIST THAT THE NUMBER OF UES MUST EQUAL THE PRODUCT OF P AND Q */
  425. for( i = 0; i < *NPQS; i++ )
  426. if( P[i] * Q[i] != size )
  427. {
  428. printf(
  429. "Need EXACTLY %d processes, but P*Q = %d", size, P[i]*Q[i] );
  430. error = 1; goto label_error;
  431. }
  432. /*
  433. * Checking threshold value (TEST->thrsh)
  434. */
  435. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  436. (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
  437. /*
  438. * Panel factorization algorithm (PF)
  439. */
  440. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  441. (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
  442. if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
  443. {
  444. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  445. "number of values of PFACT",
  446. "is less than 1 or greater than", HPL_MAX_PARAM );
  447. error = 1; goto label_error;
  448. }
  449. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  450. for( i = 0; i < *NPFS; i++ )
  451. {
  452. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  453. j = atoi( num );
  454. if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
  455. else if( j == 1 ) PF[ i ] = HPL_CROUT;
  456. else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
  457. else PF[ i ] = HPL_RIGHT_LOOKING;
  458. }
  459. /*
  460. * Recursive stopping criterium (>=1) (NBM)
  461. */
  462. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  463. (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
  464. if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
  465. {
  466. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  467. "Number of values of NBMIN",
  468. "is less than 1 or greater than", HPL_MAX_PARAM );
  469. error = 1; goto label_error;
  470. }
  471. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  472. for( i = 0; i < *NBMS; i++ )
  473. {
  474. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  475. if( ( NBM[ i ] = atoi( num ) ) < 1 )
  476. {
  477. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  478. "Value of NBMIN less than 1" );
  479. error = 1; goto label_error;
  480. }
  481. }
  482. /*
  483. * Number of panels in recursion (>=2) (NDV)
  484. */
  485. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  486. (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
  487. if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
  488. {
  489. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  490. "Number of values of NDIV",
  491. "is less than 1 or greater than", HPL_MAX_PARAM );
  492. error = 1; goto label_error;
  493. }
  494. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  495. for( i = 0; i < *NDVS; i++ )
  496. {
  497. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  498. if( ( NDV[ i ] = atoi( num ) ) < 2 )
  499. {
  500. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  501. "Value of NDIV less than 2" );
  502. error = 1; goto label_error;
  503. }
  504. }
  505. /*
  506. * Recursive panel factorization (RF)
  507. */
  508. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  509. (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
  510. if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
  511. {
  512. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  513. "Number of values of RFACT",
  514. "is less than 1 or greater than", HPL_MAX_PARAM );
  515. error = 1; goto label_error;
  516. }
  517. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  518. for( i = 0; i < *NRFS; i++ )
  519. {
  520. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  521. j = atoi( num );
  522. if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
  523. else if( j == 1 ) RF[ i ] = HPL_CROUT;
  524. else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
  525. else RF[ i ] = HPL_RIGHT_LOOKING;
  526. }
  527. /*
  528. * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
  529. */
  530. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  531. (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
  532. if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
  533. {
  534. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  535. "Number of values of BCAST",
  536. "is less than 1 or greater than", HPL_MAX_PARAM );
  537. error = 1; goto label_error;
  538. }
  539. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  540. for( i = 0; i < *NTPS; i++ )
  541. {
  542. (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
  543. j = atoi( num );
  544. if( j == 0 ) TP[ i ] = HPL_1RING;
  545. else if( j == 1 ) TP[ i ] = HPL_1RING_M;
  546. else if( j == 2 ) TP[ i ] = HPL_2RING;
  547. else if( j == 3 ) TP[ i ] = HPL_2RING_M;
  548. else if( j == 4 ) TP[ i ] = HPL_BLONG;
  549. else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
  550. else TP[ i ] = HPL_1RING_M;
  551. }
  552. /*
  553. * Lookahead depth (>=0) (NDH)
  554. */
  555. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  556. (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
  557. if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
  558. {
  559. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
  560. "Number of values of DEPTH",
  561. "is less than 1 or greater than", HPL_MAX_PARAM );
  562. error = 1; goto label_error;
  563. }
  564. (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
  565. for( i = 0; i < *NDHS; i++ )
  566. {
  567. (void) sscanf( lineptr, "%s", num );
  568. lineptr += strlen( num ) + 1;
  569. if( ( DH[ i ] = atoi( num ) ) < 0 )
  570. {
  571. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  572. "Value of DEPTH less than 0" );
  573. error = 1; goto label_error;
  574. }
  575. }
  576. /*
  577. * Swapping algorithm (0,1 or 2) (FSWAP)
  578. */
  579. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  580. (void) sscanf( line, "%s", num ); j = atoi( num );
  581. if( j == 0 ) *FSWAP = HPL_SWAP00;
  582. else if( j == 1 ) *FSWAP = HPL_SWAP01;
  583. else if( j == 2 ) *FSWAP = HPL_SW_MIX;
  584. else *FSWAP = HPL_SWAP01;
  585. /*
  586. * Swapping threshold (>=0) (TSWAP)
  587. */
  588. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  589. (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
  590. if( *TSWAP <= 0 ) *TSWAP = 0;
  591. /*
  592. * L1 in (no-)transposed form (0 or 1)
  593. */
  594. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  595. (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
  596. if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0;
  597. /*
  598. * U in (no-)transposed form (0 or 1)
  599. */
  600. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  601. (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
  602. if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
  603. /*
  604. * Equilibration (0=no, 1=yes)
  605. */
  606. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  607. (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
  608. if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
  609. /*
  610. * Memory alignment in bytes (> 0) (ALIGN)
  611. */
  612. (void) fgets( line, HPL_LINE_MAX - 2, infp );
  613. (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
  614. if( *ALIGN <= 0 ) *ALIGN = 4;
  615. /*
  616. * Close input file
  617. */
  618. label_error:
  619. (void) fclose( infp );
  620. }
  621. else { TEST->outfp = NULL; }
  622. /*
  623. * Check for error on reading input file
  624. */
  625. (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
  626. MPI_COMM_WORLD );
  627. if( error )
  628. {
  629. if( rank == 0 )
  630. HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
  631. "Illegal input in file HPL.dat. Exiting ..." );
  632. MPI_Finalize();
  633. #ifdef HPL_CALL_VSIPL
  634. (void) vsip_finalize( NULL );
  635. #endif
  636. exit( 1 );
  637. }
  638. /*
  639. * Compute and broadcast machine epsilon
  640. */
  641. TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
  642. /*
  643. * Pack information arrays and broadcast
  644. */
  645. (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
  646. MPI_COMM_WORLD );
  647. /*
  648. * Broadcast array sizes
  649. */
  650. iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
  651. if( rank == 0 )
  652. {
  653. iwork[ 0] = *NS; iwork[ 1] = *NBS;
  654. iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
  655. iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS;
  656. iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS;
  657. iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN;
  658. iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN;
  659. }
  660. (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
  661. if( rank != 0 )
  662. {
  663. *NS = iwork[ 0]; *NBS = iwork[ 1];
  664. *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
  665. *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5];
  666. *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8];
  667. *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
  668. *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14];
  669. }
  670. if( iwork ) free( iwork );
  671. /*
  672. * Pack information arrays and broadcast
  673. */
  674. lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) +
  675. (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
  676. iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
  677. if( rank == 0 )
  678. {
  679. j = 0;
  680. for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; }
  681. for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; }
  682. for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
  683. for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
  684. for( i = 0; i < *NPFS; i++ )
  685. {
  686. if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0;
  687. else if( PF[i] == HPL_CROUT ) iwork[j] = 1;
  688. else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
  689. j++;
  690. }
  691. for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
  692. for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
  693. for( i = 0; i < *NRFS; i++ )
  694. {
  695. if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0;
  696. else if( RF[i] == HPL_CROUT ) iwork[j] = 1;
  697. else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
  698. j++;
  699. }
  700. for( i = 0; i < *NTPS; i++ )
  701. {
  702. if( TP[i] == HPL_1RING ) iwork[j] = 0;
  703. else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
  704. else if( TP[i] == HPL_2RING ) iwork[j] = 2;
  705. else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
  706. else if( TP[i] == HPL_BLONG ) iwork[j] = 4;
  707. else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
  708. j++;
  709. }
  710. for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
  711. if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
  712. else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
  713. else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
  714. j++;
  715. }
  716. (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
  717. MPI_COMM_WORLD );
  718. if( rank != 0 )
  719. {
  720. j = 0;
  721. for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; }
  722. for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; }
  723. for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
  724. for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
  725. for( i = 0; i < *NPFS; i++ )
  726. {
  727. if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
  728. else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
  729. else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
  730. j++;
  731. }
  732. for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
  733. for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
  734. for( i = 0; i < *NRFS; i++ )
  735. {
  736. if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
  737. else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
  738. else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
  739. j++;
  740. }
  741. for( i = 0; i < *NTPS; i++ )
  742. {
  743. if( iwork[j] == 0 ) TP[i] = HPL_1RING;
  744. else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
  745. else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
  746. else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
  747. else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
  748. else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
  749. j++;
  750. }
  751. for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
  752. if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
  753. else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
  754. else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
  755. j++;
  756. }
  757. if( iwork ) free( iwork );
  758. /*
  759. * regurgitate input
  760. */
  761. if( rank == 0 )
  762. {
  763. HPL_fprintf( TEST->outfp, "%s%s\n",
  764. "========================================",
  765. "========================================" );
  766. HPL_fprintf( TEST->outfp, "%s%s\n",
  767. "HPLinpack 2.0 -- High-Performance Linpack benchmark -- ",
  768. " September 10, 2008" );
  769. HPL_fprintf( TEST->outfp, "%s%s\n",
  770. "Written by A. Petitet and R. Clint Whaley, ",
  771. "Innovative Computing Laboratory, UTK" );
  772. HPL_fprintf( TEST->outfp, "%s%s\n",
  773. "Modified by Piotr Luszczek, ",
  774. "Innovative Computing Laboratory, UTK" );
  775. HPL_fprintf( TEST->outfp, "%s%s\n",
  776. "Modified by Julien Langou, ",
  777. "University of Colorado Denver");
  778. HPL_fprintf( TEST->outfp, "%s%s\n",
  779. "========================================",
  780. "========================================" );
  781. HPL_fprintf( TEST->outfp, "\n%s\n",
  782. "An explanation of the input/output parameters follows:" );
  783. HPL_fprintf( TEST->outfp, "%s\n",
  784. "T/V : Wall time / encoded variant." );
  785. HPL_fprintf( TEST->outfp, "%s\n",
  786. "N : The order of the coefficient matrix A." );
  787. HPL_fprintf( TEST->outfp, "%s\n",
  788. "NB : The partitioning blocking factor." );
  789. HPL_fprintf( TEST->outfp, "%s\n",
  790. "P : The number of process rows." );
  791. HPL_fprintf( TEST->outfp, "%s\n",
  792. "Q : The number of process columns." );
  793. HPL_fprintf( TEST->outfp, "%s\n",
  794. "Time : Time in seconds to solve the linear system." );
  795. HPL_fprintf( TEST->outfp, "%s\n\n",
  796. "Gflops : Rate of execution for solving the linear system." );
  797. HPL_fprintf( TEST->outfp, "%s\n",
  798. "The following parameter values will be used:" );
  799. /*
  800. * Problem size
  801. */
  802. HPL_fprintf( TEST->outfp, "\nN :" );
  803. for( i = 0; i < Mmin( 8, *NS ); i++ )
  804. HPL_fprintf( TEST->outfp, "%8d ", N[i] );
  805. if( *NS > 8 )
  806. {
  807. HPL_fprintf( TEST->outfp, "\n " );
  808. for( i = 8; i < Mmin( 16, *NS ); i++ )
  809. HPL_fprintf( TEST->outfp, "%8d ", N[i] );
  810. if( *NS > 16 )
  811. {
  812. HPL_fprintf( TEST->outfp, "\n " );
  813. for( i = 16; i < *NS; i++ )
  814. HPL_fprintf( TEST->outfp, "%8d ", N[i] );
  815. }
  816. }
  817. /*
  818. * Distribution blocking factor
  819. */
  820. HPL_fprintf( TEST->outfp, "\nNB :" );
  821. for( i = 0; i < Mmin( 8, *NBS ); i++ )
  822. HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
  823. if( *NBS > 8 )
  824. {
  825. HPL_fprintf( TEST->outfp, "\n " );
  826. for( i = 8; i < Mmin( 16, *NBS ); i++ )
  827. HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
  828. if( *NBS > 16 )
  829. {
  830. HPL_fprintf( TEST->outfp, "\n " );
  831. for( i = 16; i < *NBS; i++ )
  832. HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
  833. }
  834. }
  835. /*
  836. * Process mapping
  837. */
  838. HPL_fprintf( TEST->outfp, "\nPMAP :" );
  839. if( *PMAPPIN == HPL_ROW_MAJOR )
  840. HPL_fprintf( TEST->outfp, " Row-major process mapping" );
  841. else if( *PMAPPIN == HPL_COLUMN_MAJOR )
  842. HPL_fprintf( TEST->outfp, " Column-major process mapping" );
  843. /*
  844. * Process grid
  845. */
  846. HPL_fprintf( TEST->outfp, "\nP :" );
  847. for( i = 0; i < Mmin( 8, *NPQS ); i++ )
  848. HPL_fprintf( TEST->outfp, "%8d ", P[i] );
  849. if( *NPQS > 8 )
  850. {
  851. HPL_fprintf( TEST->outfp, "\n " );
  852. for( i = 8; i < Mmin( 16, *NPQS ); i++ )
  853. HPL_fprintf( TEST->outfp, "%8d ", P[i] );
  854. if( *NPQS > 16 )
  855. {
  856. HPL_fprintf( TEST->outfp, "\n " );
  857. for( i = 16; i < *NPQS; i++ )
  858. HPL_fprintf( TEST->outfp, "%8d ", P[i] );
  859. }
  860. }
  861. HPL_fprintf( TEST->outfp, "\nQ :" );
  862. for( i = 0; i < Mmin( 8, *NPQS ); i++ )
  863. HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
  864. if( *NPQS > 8 )
  865. {
  866. HPL_fprintf( TEST->outfp, "\n " );
  867. for( i = 8; i < Mmin( 16, *NPQS ); i++ )
  868. HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
  869. if( *NPQS > 16 )
  870. {
  871. HPL_fprintf( TEST->outfp, "\n " );
  872. for( i = 16; i < *NPQS; i++ )
  873. HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
  874. }
  875. }
  876. /*
  877. * Panel Factorization
  878. */
  879. HPL_fprintf( TEST->outfp, "\nPFACT :" );
  880. for( i = 0; i < Mmin( 8, *NPFS ); i++ )
  881. {
  882. if( PF[i] == HPL_LEFT_LOOKING )
  883. HPL_fprintf( TEST->outfp, " Left " );
  884. else if( PF[i] == HPL_CROUT )
  885. HPL_fprintf( TEST->outfp, " Crout " );
  886. else if( PF[i] == HPL_RIGHT_LOOKING )
  887. HPL_fprintf( TEST->outfp, " Right " );
  888. }
  889. if( *NPFS > 8 )
  890. {
  891. HPL_fprintf( TEST->outfp, "\n " );
  892. for( i = 8; i < Mmin( 16, *NPFS ); i++ )
  893. {
  894. if( PF[i] == HPL_LEFT_LOOKING )
  895. HPL_fprintf( TEST->outfp, " Left " );
  896. else if( PF[i] == HPL_CROUT )
  897. HPL_fprintf( TEST->outfp, " Crout " );
  898. else if( PF[i] == HPL_RIGHT_LOOKING )
  899. HPL_fprintf( TEST->outfp, " Right " );
  900. }
  901. if( *NPFS > 16 )
  902. {
  903. HPL_fprintf( TEST->outfp, "\n " );
  904. for( i = 16; i < *NPFS; i++ )
  905. {
  906. if( PF[i] == HPL_LEFT_LOOKING )
  907. HPL_fprintf( TEST->outfp, " Left " );
  908. else if( PF[i] == HPL_CROUT )
  909. HPL_fprintf( TEST->outfp, " Crout " );
  910. else if( PF[i] == HPL_RIGHT_LOOKING )
  911. HPL_fprintf( TEST->outfp, " Right " );
  912. }
  913. }
  914. }
  915. /*
  916. * Recursive stopping criterium
  917. */
  918. HPL_fprintf( TEST->outfp, "\nNBMIN :" );
  919. for( i = 0; i < Mmin( 8, *NBMS ); i++ )
  920. HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
  921. if( *NBMS > 8 )
  922. {
  923. HPL_fprintf( TEST->outfp, "\n " );
  924. for( i = 8; i < Mmin( 16, *NBMS ); i++ )
  925. HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
  926. if( *NBMS > 16 )
  927. {
  928. HPL_fprintf( TEST->outfp, "\n " );
  929. for( i = 16; i < *NBMS; i++ )
  930. HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
  931. }
  932. }
  933. /*
  934. * Number of panels in recursion
  935. */
  936. HPL_fprintf( TEST->outfp, "\nNDIV :" );
  937. for( i = 0; i < Mmin( 8, *NDVS ); i++ )
  938. HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
  939. if( *NDVS > 8 )
  940. {
  941. HPL_fprintf( TEST->outfp, "\n " );
  942. for( i = 8; i < Mmin( 16, *NDVS ); i++ )
  943. HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
  944. if( *NDVS > 16 )
  945. {
  946. HPL_fprintf( TEST->outfp, "\n " );
  947. for( i = 16; i < *NDVS; i++ )
  948. HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
  949. }
  950. }
  951. /*
  952. * Recursive Factorization
  953. */
  954. HPL_fprintf( TEST->outfp, "\nRFACT :" );
  955. for( i = 0; i < Mmin( 8, *NRFS ); i++ )
  956. {
  957. if( RF[i] == HPL_LEFT_LOOKING )
  958. HPL_fprintf( TEST->outfp, " Left " );
  959. else if( RF[i] == HPL_CROUT )
  960. HPL_fprintf( TEST->outfp, " Crout " );
  961. else if( RF[i] == HPL_RIGHT_LOOKING )
  962. HPL_fprintf( TEST->outfp, " Right " );
  963. }
  964. if( *NRFS > 8 )
  965. {
  966. HPL_fprintf( TEST->outfp, "\n " );
  967. for( i = 8; i < Mmin( 16, *NRFS ); i++ )
  968. {
  969. if( RF[i] == HPL_LEFT_LOOKING )
  970. HPL_fprintf( TEST->outfp, " Left " );
  971. else if( RF[i] == HPL_CROUT )
  972. HPL_fprintf( TEST->outfp, " Crout " );
  973. else if( RF[i] == HPL_RIGHT_LOOKING )
  974. HPL_fprintf( TEST->outfp, " Right " );
  975. }
  976. if( *NRFS > 16 )
  977. {
  978. HPL_fprintf( TEST->outfp, "\n " );
  979. for( i = 16; i < *NRFS; i++ )
  980. {
  981. if( RF[i] == HPL_LEFT_LOOKING )
  982. HPL_fprintf( TEST->outfp, " Left " );
  983. else if( RF[i] == HPL_CROUT )
  984. HPL_fprintf( TEST->outfp, " Crout " );
  985. else if( RF[i] == HPL_RIGHT_LOOKING )
  986. HPL_fprintf( TEST->outfp, " Right " );
  987. }
  988. }
  989. }
  990. /*
  991. * Broadcast topology
  992. */
  993. HPL_fprintf( TEST->outfp, "\nBCAST :" );
  994. for( i = 0; i < Mmin( 8, *NTPS ); i++ )
  995. {
  996. if( TP[i] == HPL_1RING )
  997. HPL_fprintf( TEST->outfp, " 1ring " );
  998. else if( TP[i] == HPL_1RING_M )
  999. HPL_fprintf( TEST->outfp, " 1ringM " );
  1000. else if( TP[i] == HPL_2RING )
  1001. HPL_fprintf( TEST->outfp, " 2ring " );
  1002. else if( TP[i] == HPL_2RING_M )
  1003. HPL_fprintf( TEST->outfp, " 2ringM " );
  1004. else if( TP[i] == HPL_BLONG )
  1005. HPL_fprintf( TEST->outfp, " Blong " );
  1006. else if( TP[i] == HPL_BLONG_M )
  1007. HPL_fprintf( TEST->outfp, " BlongM " );
  1008. }
  1009. if( *NTPS > 8 )
  1010. {
  1011. HPL_fprintf( TEST->outfp, "\n " );
  1012. for( i = 8; i < Mmin( 16, *NTPS ); i++ )
  1013. {
  1014. if( TP[i] == HPL_1RING )
  1015. HPL_fprintf( TEST->outfp, " 1ring " );
  1016. else if( TP[i] == HPL_1RING_M )
  1017. HPL_fprintf( TEST->outfp, " 1ringM " );
  1018. else if( TP[i] == HPL_2RING )
  1019. HPL_fprintf( TEST->outfp, " 2ring " );
  1020. else if( TP[i] == HPL_2RING_M )
  1021. HPL_fprintf( TEST->outfp, " 2ringM " );
  1022. else if( TP[i] == HPL_BLONG )
  1023. HPL_fprintf( TEST->outfp, " Blong " );
  1024. else if( TP[i] == HPL_BLONG_M )
  1025. HPL_fprintf( TEST->outfp, " BlongM " );
  1026. }
  1027. if( *NTPS > 16 )
  1028. {
  1029. HPL_fprintf( TEST->outfp, "\n " );
  1030. for( i = 16; i < *NTPS; i++ )
  1031. {
  1032. if( TP[i] == HPL_1RING )
  1033. HPL_fprintf( TEST->outfp, " 1ring " );
  1034. else if( TP[i] == HPL_1RING_M )
  1035. HPL_fprintf( TEST->outfp, " 1ringM " );
  1036. else if( TP[i] == HPL_2RING )
  1037. HPL_fprintf( TEST->outfp, " 2ring " );
  1038. else if( TP[i] == HPL_2RING_M )
  1039. HPL_fprintf( TEST->outfp, " 2ringM " );
  1040. else if( TP[i] == HPL_BLONG )
  1041. HPL_fprintf( TEST->outfp, " Blong " );
  1042. else if( TP[i] == HPL_BLONG_M )
  1043. HPL_fprintf( TEST->outfp, " BlongM " );
  1044. }
  1045. }
  1046. }
  1047. /*
  1048. * Lookahead depths
  1049. */
  1050. HPL_fprintf( TEST->outfp, "\nDEPTH :" );
  1051. for( i = 0; i < Mmin( 8, *NDHS ); i++ )
  1052. HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
  1053. if( *NDHS > 8 )
  1054. {
  1055. HPL_fprintf( TEST->outfp, "\n " );
  1056. for( i = 8; i < Mmin( 16, *NDHS ); i++ )
  1057. HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
  1058. if( *NDHS > 16 )
  1059. {
  1060. HPL_fprintf( TEST->outfp, "\n " );
  1061. for( i = 16; i < *NDHS; i++ )
  1062. HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
  1063. }
  1064. }
  1065. /*
  1066. * Swapping algorithm
  1067. */
  1068. HPL_fprintf( TEST->outfp, "\nSWAP :" );
  1069. if( *FSWAP == HPL_SWAP00 )
  1070. HPL_fprintf( TEST->outfp, " Binary-exchange" );
  1071. else if( *FSWAP == HPL_SWAP01 )
  1072. HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
  1073. else if( *FSWAP == HPL_SW_MIX )
  1074. HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
  1075. /*
  1076. * L1 storage form
  1077. */
  1078. HPL_fprintf( TEST->outfp, "\nL1 :" );
  1079. if( *L1NOTRAN != 0 )
  1080. HPL_fprintf( TEST->outfp, " no-transposed form" );
  1081. else
  1082. HPL_fprintf( TEST->outfp, " transposed form" );
  1083. /*
  1084. * U storage form
  1085. */
  1086. HPL_fprintf( TEST->outfp, "\nU :" );
  1087. if( *UNOTRAN != 0 )
  1088. HPL_fprintf( TEST->outfp, " no-transposed form" );
  1089. else
  1090. HPL_fprintf( TEST->outfp, " transposed form" );
  1091. /*
  1092. * Equilibration
  1093. */
  1094. HPL_fprintf( TEST->outfp, "\nEQUIL :" );
  1095. if( *EQUIL != 0 )
  1096. HPL_fprintf( TEST->outfp, " yes" );
  1097. else
  1098. HPL_fprintf( TEST->outfp, " no" );
  1099. /*
  1100. * Alignment
  1101. */
  1102. HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words",
  1103. *ALIGN );
  1104. HPL_fprintf( TEST->outfp, "\n\n" );
  1105. /*
  1106. * For testing only
  1107. */
  1108. if( TEST->thrsh > HPL_rzero )
  1109. {
  1110. HPL_fprintf( TEST->outfp, "%s%s\n\n",
  1111. "----------------------------------------",
  1112. "----------------------------------------" );
  1113. HPL_fprintf( TEST->outfp, "%s\n",
  1114. "- The matrix A is randomly generated for each test." );
  1115. HPL_fprintf( TEST->outfp, "%s\n",
  1116. "- The following scaled residual check will be computed:" );
  1117. HPL_fprintf( TEST->outfp, "%s\n",
  1118. " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
  1119. HPL_fprintf( TEST->outfp, "%s %21.6e\n",
  1120. "- The relative machine precision (eps) is taken to be ",
  1121. TEST->epsil );
  1122. HPL_fprintf( TEST->outfp, "%s %11.1f\n\n",
  1123. "- Computational tests pass if scaled residuals are less than ",
  1124. TEST->thrsh );
  1125. }
  1126. }
  1127. /*
  1128. * End of HPL_pdinfo
  1129. */
  1130. }