hpl_pauxil.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. */
  46. #ifndef HPL_PAUXIL_H
  47. #define HPL_PAUXIL_H
  48. /*
  49. * ---------------------------------------------------------------------
  50. * Include files
  51. * ---------------------------------------------------------------------
  52. */
  53. #include "hpl_misc.h"
  54. #include "hpl_blas.h"
  55. #include "hpl_auxil.h"
  56. #include "hpl_pmisc.h"
  57. #include "hpl_grid.h"
  58. /*
  59. * ---------------------------------------------------------------------
  60. * #define macros definitions
  61. * ---------------------------------------------------------------------
  62. */
  63. /*
  64. * Mindxg2p returns the process coodinate owning the entry globally in-
  65. * dexed by ig_.
  66. */
  67. #define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \
  68. { \
  69. if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \
  70. ( (nprocs_) > 1 ) ) \
  71. { \
  72. proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \
  73. proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \
  74. } \
  75. else \
  76. { \
  77. proc_ = (src_); \
  78. } \
  79. }
  80. #define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \
  81. { \
  82. if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \
  83. ( (nprocs_) == 1 ) ) { il_ = (ig_); } \
  84. else \
  85. { \
  86. int i__, j__; \
  87. j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
  88. il_ = (nb_)*( j__ - i__ ) + \
  89. ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \
  90. (ig_) - (inb_) : (ig_) ); \
  91. } \
  92. }
  93. #define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \
  94. { \
  95. if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \
  96. ( (nprocs_) == 1 ) ) \
  97. { il_ = (ig_); proc_ = (src_); } \
  98. else \
  99. { \
  100. int i__, j__; \
  101. j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
  102. il_ = (nb_)*(j__-i__) + \
  103. ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \
  104. (ig_) - (inb_) : (ig_) ); \
  105. proc_ = (src_) + 1 + i__; \
  106. proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \
  107. } \
  108. }
  109. /*
  110. * Mindxl2g computes the global index ig_ corresponding to the local
  111. * index il_ in process proc_.
  112. */
  113. #define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \
  114. { \
  115. if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \
  116. { \
  117. if( (proc_) == (src_) ) \
  118. { \
  119. if( (il_) < (inb_) ) ig_ = (il_); \
  120. else ig_ = (il_) + \
  121. (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
  122. } \
  123. else if( (proc_) < (src_) ) \
  124. { \
  125. ig_ = (il_) + (inb_) + \
  126. (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \
  127. (proc_)-(src_)-1+(nprocs_) ); \
  128. } \
  129. else \
  130. { \
  131. ig_ = (il_) + (inb_) + \
  132. (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \
  133. (proc_)-(src_)-1 ); \
  134. } \
  135. } \
  136. else \
  137. { \
  138. ig_ = (il_); \
  139. } \
  140. }
  141. /*
  142. * MnumrocI computes the # of local indexes np_ residing in the process
  143. * of coordinate proc_ corresponding to the interval of global indexes
  144. * i_:i_+n_-1 assuming that the global index 0 resides in the process
  145. * src_, and that the indexes are distributed from src_ using the para-
  146. * meters inb_, nb_ and nprocs_.
  147. */
  148. #define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \
  149. { \
  150. if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \
  151. { \
  152. int inb__, mydist__, n__, nblk__, quot__, src__; \
  153. if( ( inb__ = (inb_) - (i_) ) <= 0 ) \
  154. { \
  155. nblk__ = (-inb__) / (nb_) + 1; \
  156. src__ = (src_) + nblk__; \
  157. src__ -= ( src__ / (nprocs_) ) * (nprocs_); \
  158. inb__ += nblk__*(nb_); \
  159. if( ( n__ = (n_) - inb__ ) <= 0 ) \
  160. { \
  161. if( (proc_) == src__ ) np_ = (n_); \
  162. else np_ = 0; \
  163. } \
  164. else \
  165. { \
  166. if( ( mydist__ = (proc_) - src__ ) < 0 ) \
  167. mydist__ += (nprocs_); \
  168. nblk__ = n__ / (nb_) + 1; \
  169. mydist__ -= nblk__ - \
  170. (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
  171. if( mydist__ < 0 ) \
  172. { \
  173. if( (proc_) != src__ ) \
  174. np_ = (nb_) + (nb_) * quot__; \
  175. else \
  176. np_ = inb__ + (nb_) * quot__; \
  177. } \
  178. else if( mydist__ > 0 ) \
  179. { \
  180. np_ = (nb_) * quot__; \
  181. } \
  182. else \
  183. { \
  184. if( (proc_) != src__ ) \
  185. np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
  186. else \
  187. np_ = (n_)+ (nb_)*(quot__ - nblk__); \
  188. } \
  189. } \
  190. } \
  191. else \
  192. { \
  193. if( ( n__ = (n_) - inb__ ) <= 0 ) \
  194. { \
  195. if( (proc_) == (src_) ) np_ = (n_); \
  196. else np_ = 0; \
  197. } \
  198. else \
  199. { \
  200. if( ( mydist__ = (proc_) - (src_) ) < 0 ) \
  201. mydist__ += (nprocs_); \
  202. nblk__ = n__ / (nb_) + 1; \
  203. mydist__ -= nblk__ - \
  204. ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
  205. if( mydist__ < 0 ) \
  206. { \
  207. if( (proc_) != (src_) ) \
  208. np_ = (nb_) + (nb_) * quot__; \
  209. else \
  210. np_ = inb__ + (nb_) * quot__; \
  211. } \
  212. else if( mydist__ > 0 ) \
  213. { \
  214. np_ = (nb_) * quot__; \
  215. } \
  216. else \
  217. { \
  218. if( (proc_) != (src_) ) \
  219. np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
  220. else \
  221. np_ = (n_)+ (nb_)*(quot__ - nblk__); \
  222. } \
  223. } \
  224. } \
  225. } \
  226. else \
  227. { \
  228. np_ = (n_); \
  229. } \
  230. }
  231. #define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \
  232. MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
  233. /*
  234. * ---------------------------------------------------------------------
  235. * Function prototypes
  236. * ---------------------------------------------------------------------
  237. */
  238. void HPL_indxg2lp
  239. STDC_ARGS( (
  240. int *,
  241. int *,
  242. const int,
  243. const int,
  244. const int,
  245. const int,
  246. const int
  247. ) );
  248. int HPL_indxg2l
  249. STDC_ARGS( (
  250. const int,
  251. const int,
  252. const int,
  253. const int,
  254. const int
  255. ) );
  256. int HPL_indxg2p
  257. STDC_ARGS( (
  258. const int,
  259. const int,
  260. const int,
  261. const int,
  262. const int
  263. ) );
  264. int HPL_indxl2g
  265. STDC_ARGS( (
  266. const int,
  267. const int,
  268. const int,
  269. const int,
  270. const int,
  271. const int
  272. ) );
  273. void HPL_infog2l
  274. STDC_ARGS( (
  275. int,
  276. int,
  277. const int,
  278. const int,
  279. const int,
  280. const int,
  281. const int,
  282. const int,
  283. const int,
  284. const int,
  285. const int,
  286. const int,
  287. int *,
  288. int *,
  289. int *,
  290. int *
  291. ) );
  292. int HPL_numroc
  293. STDC_ARGS( (
  294. const int,
  295. const int,
  296. const int,
  297. const int,
  298. const int,
  299. const int
  300. ) );
  301. int HPL_numrocI
  302. STDC_ARGS( (
  303. const int,
  304. const int,
  305. const int,
  306. const int,
  307. const int,
  308. const int,
  309. const int
  310. ) );
  311. void HPL_dlaswp00N
  312. STDC_ARGS( (
  313. const int,
  314. const int,
  315. double *,
  316. const int,
  317. const int *
  318. ) );
  319. void HPL_dlaswp10N
  320. STDC_ARGS( (
  321. const int,
  322. const int,
  323. double *,
  324. const int,
  325. const int *
  326. ) );
  327. void HPL_dlaswp01N
  328. STDC_ARGS( (
  329. const int,
  330. const int,
  331. double *,
  332. const int,
  333. double *,
  334. const int,
  335. const int *,
  336. const int *
  337. ) );
  338. void HPL_dlaswp01T
  339. STDC_ARGS( (
  340. const int,
  341. const int,
  342. double *,
  343. const int,
  344. double *,
  345. const int,
  346. const int *,
  347. const int *
  348. ) );
  349. void HPL_dlaswp02N
  350. STDC_ARGS( (
  351. const int,
  352. const int,
  353. const double *,
  354. const int,
  355. double *,
  356. double *,
  357. const int,
  358. const int *,
  359. const int *
  360. ) );
  361. void HPL_dlaswp03N
  362. STDC_ARGS( (
  363. const int,
  364. const int,
  365. double *,
  366. const int,
  367. const double *,
  368. const double *,
  369. const int
  370. ) );
  371. void HPL_dlaswp03T
  372. STDC_ARGS( (
  373. const int,
  374. const int,
  375. double *,
  376. const int,
  377. const double *,
  378. const double *,
  379. const int
  380. ) );
  381. void HPL_dlaswp04N
  382. STDC_ARGS( (
  383. const int,
  384. const int,
  385. const int,
  386. double *,
  387. const int,
  388. double *,
  389. const int,
  390. const double *,
  391. const double *,
  392. const int,
  393. const int *,
  394. const int *
  395. ) );
  396. void HPL_dlaswp04T
  397. STDC_ARGS( (
  398. const int,
  399. const int,
  400. const int,
  401. double *,
  402. const int,
  403. double *,
  404. const int,
  405. const double *,
  406. const double *,
  407. const int,
  408. const int *,
  409. const int *
  410. ) );
  411. void HPL_dlaswp05N
  412. STDC_ARGS( (
  413. const int,
  414. const int,
  415. double *,
  416. const int,
  417. const double *,
  418. const int,
  419. const int *,
  420. const int *
  421. ) );
  422. void HPL_dlaswp05T
  423. STDC_ARGS( (
  424. const int,
  425. const int,
  426. double *,
  427. const int,
  428. const double *,
  429. const int,
  430. const int *,
  431. const int *
  432. ) );
  433. void HPL_dlaswp06N
  434. STDC_ARGS( (
  435. const int,
  436. const int,
  437. double *,
  438. const int,
  439. double *,
  440. const int,
  441. const int *
  442. ) );
  443. void HPL_dlaswp06T
  444. STDC_ARGS( (
  445. const int,
  446. const int,
  447. double *,
  448. const int,
  449. double *,
  450. const int,
  451. const int *
  452. ) );
  453. void HPL_pabort
  454. STDC_ARGS( (
  455. int,
  456. const char *,
  457. const char *,
  458. ...
  459. ) );
  460. void HPL_pwarn
  461. STDC_ARGS( (
  462. FILE *,
  463. int,
  464. const char *,
  465. const char *,
  466. ...
  467. ) );
  468. void HPL_pdlaprnt
  469. STDC_ARGS( (
  470. const HPL_T_grid *,
  471. const int,
  472. const int,
  473. const int,
  474. double *,
  475. const int,
  476. const int,
  477. const int,
  478. const char *
  479. ) );
  480. double HPL_pdlamch
  481. STDC_ARGS( (
  482. MPI_Comm,
  483. const HPL_T_MACH
  484. ) );
  485. double HPL_pdlange
  486. STDC_ARGS( (
  487. const HPL_T_grid *,
  488. const HPL_T_NORM,
  489. const int,
  490. const int,
  491. const int,
  492. const double *,
  493. const int
  494. ) );
  495. #endif
  496. /*
  497. * End of hpl_pauxil.h
  498. */