heat.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "heat.h"
  17. /* default values */
  18. static unsigned ntheta = 32+2;
  19. static unsigned nthick = 32+2;
  20. static unsigned nblocks = 16;
  21. static unsigned shape = 0;
  22. static unsigned pinned = 0;
  23. static unsigned version = 2;
  24. static unsigned use_cg = 0; /* use a LU decomposition of CG ? */
  25. static int argc_;
  26. static char **argv_;
  27. extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
  28. unsigned nrow, uint32_t *colind, uint32_t *rowptr);
  29. static void parse_args(int argc, char **argv)
  30. {
  31. int i;
  32. for (i = 1; i < argc; i++) {
  33. if (strcmp(argv[i], "-cg") == 0) {
  34. use_cg = 1;
  35. }
  36. if (strcmp(argv[i], "-shape") == 0) {
  37. char *argptr;
  38. shape = strtol(argv[++i], &argptr, 10);
  39. }
  40. if (strcmp(argv[i], "-nthick") == 0) {
  41. char *argptr;
  42. nthick = strtol(argv[++i], &argptr, 10);
  43. }
  44. if (strcmp(argv[i], "-ntheta") == 0) {
  45. char *argptr;
  46. ntheta = strtol(argv[++i], &argptr, 10);
  47. }
  48. if (strcmp(argv[i], "-nblocks") == 0) {
  49. char *argptr;
  50. nblocks = strtol(argv[++i], &argptr, 10);
  51. }
  52. if (strcmp(argv[i], "-v1") == 0) {
  53. version = 1;
  54. }
  55. if (strcmp(argv[i], "-v2") == 0) {
  56. version = 2;
  57. }
  58. if (strcmp(argv[i], "-v3") == 0) {
  59. version = 3;
  60. }
  61. if (strcmp(argv[i], "-pin") == 0) {
  62. pinned = 1;
  63. }
  64. if (strcmp(argv[i], "-h") == 0) {
  65. printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg]\n", argv[0]);
  66. }
  67. }
  68. }
  69. /*
  70. * The Finite element method code
  71. *
  72. * B C
  73. * **********
  74. * * 0 * *
  75. * * * *
  76. * * * 1 *
  77. * **********
  78. * A D
  79. */
  80. static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  81. unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh)
  82. {
  83. float xa,ya,xb,yb,xc,yc;
  84. float tmp;
  85. assert(theta_tr + 2 <= ntheta);
  86. assert(thick_tr + 2 <= nthick);
  87. /* A */
  88. xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  89. ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  90. /* B */
  91. if (side_tr) {
  92. /* lower D is actually B here */
  93. xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  94. yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  95. } else {
  96. /* upper */
  97. xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  98. yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  99. }
  100. xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  101. yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  102. /* now look for the actual psi node */
  103. if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) {
  104. /* A nothing to do */
  105. } else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) {
  106. /* psi matches C */
  107. /* swap A and C coordinates */
  108. tmp = xa; xa = xc; xc = tmp;
  109. tmp = ya; ya = yc; yc = tmp;
  110. } else if
  111. (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) {
  112. /* psi is D (that was stored in C) XXX */
  113. tmp = xa; xa = xb; xb = tmp;
  114. tmp = ya; ya = yb; yb = tmp;
  115. } else if
  116. (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) {
  117. /* psi is C */
  118. tmp = xa; xa = xb; xb = tmp;
  119. tmp = ya; ya = yb; yb = tmp;
  120. } else {
  121. /* the psi node is not a node of the current triangle */
  122. return 0.0f;
  123. }
  124. /* now the triangle should have A as the psi node */
  125. float denom;
  126. float value;
  127. denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
  128. switch (xy) {
  129. case X:
  130. value = (yc - yb)/denom;
  131. break;
  132. case Y:
  133. value = -(xc - xb)/denom;
  134. break;
  135. default:
  136. assert(0);
  137. }
  138. return value;
  139. }
  140. static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  141. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  142. {
  143. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh);
  144. }
  145. static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  146. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  147. {
  148. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh);
  149. }
  150. static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh)
  151. {
  152. float surface;
  153. float tmp;
  154. float xi, xj, xk, yi, yj, yk;
  155. STARPU_ASSERT(theta_tr + 2 <= ntheta);
  156. STARPU_ASSERT(thick_tr + 2 <= nthick);
  157. xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  158. yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  159. xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  160. yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  161. if (side_tr) {
  162. /* lower */
  163. xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  164. yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  165. } else {
  166. xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  167. yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  168. }
  169. tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj);
  170. surface = 0.5*fabs(tmp);
  171. return surface;
  172. }
  173. static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr,
  174. unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  175. {
  176. float surface;
  177. float value;
  178. float dxi, dxj, dyi, dyj;
  179. if (theta_tr < 0) return 0.0f;
  180. if (theta_tr + 2 > (int)ntheta) return 0.0f;
  181. if (thick_tr < 0) return 0.0f;
  182. if (thick_tr + 2 > (int)nthick) return 0.0f;
  183. dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  184. dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  185. dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  186. dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  187. surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh);
  188. value = (dxi*dxj + dyi*dyj)*surface;
  189. return value;
  190. }
  191. static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  192. {
  193. float integral = 0.0f;
  194. integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  195. integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  196. integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  197. integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  198. integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  199. integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  200. return integral;
  201. }
  202. static float compute_A_value(unsigned i, unsigned j, point *pmesh)
  203. {
  204. float value = 0.0f;
  205. unsigned thick_i, thick_j;
  206. unsigned theta_i, theta_j;
  207. /* add all contributions from all connex triangles */
  208. thick_i = NODE_TO_THICK(i);
  209. thick_j = NODE_TO_THICK(j);
  210. theta_i = NODE_TO_THETA(i);
  211. theta_j = NODE_TO_THETA(j);
  212. /* Compute the Sum of all the integral over all triangles */
  213. if ( (abs(thick_i - thick_j) <= 1) && (abs(theta_i - theta_j) <= 1) )
  214. {
  215. if ( (theta_j == theta_i -1) && (thick_j == thick_i +1))
  216. goto done;
  217. if ( (theta_j == theta_i + 1) && (thick_j == thick_i - 1))
  218. goto done;
  219. /* this may not be a null entry */
  220. value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh);
  221. }
  222. done:
  223. return value;
  224. }
  225. #define TRANSLATE(k) (RefArray[(k)])
  226. #define TRANSLATEBACK(k) (RefArrayBack[(k)])
  227. static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
  228. {
  229. unsigned i;
  230. /* solve the actual problem LU X = B */
  231. /* solve LX' = Y with X' = UX */
  232. /* solve UX = X' */
  233. fprintf(stderr, "Solving the problem ...\n");
  234. #ifdef CHECK_RESULTS
  235. float *savedB = malloc(subsize*sizeof(float));
  236. memcpy(savedB, B, subsize*sizeof(float));
  237. float *LUB = malloc(subsize*sizeof(float));
  238. #endif
  239. /* L */
  240. STRSV("L", "N", "N", subsize, A, subsize, B, 1);
  241. /* U */
  242. STRSV("U", "N", "U", subsize, A, subsize, B, 1);
  243. STARPU_ASSERT(DIM == size);
  244. #ifdef CHECK_RESULTS
  245. /* compute the error on (LUB - savedB) which should be 0 */
  246. /* LUB = B */
  247. memcpy(LUB, B, subsize*sizeof(float));
  248. /* LUB = U * LUB */
  249. STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
  250. /* LUB = L * LUB */
  251. STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
  252. /* LUB -= B */
  253. SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
  254. /* check if LUB is close to the 0 vector */
  255. int maxind = ISAMAX(subsize, LUB, 1);
  256. fprintf(stderr, "max (LUX - B) = %f\n",LUB[maxind - 1]);
  257. free(LUB);
  258. free(savedB);
  259. #endif
  260. /* now display back the ACTUAL result */
  261. for (i = 0; i < subsize; i++)
  262. {
  263. result[TRANSLATE(i)] = B[i];
  264. }
  265. for (i = subsize ; i < size; i++)
  266. {
  267. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  268. }
  269. }
  270. unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
  271. {
  272. unsigned k;
  273. unsigned index = 0;
  274. unsigned theta, thick;
  275. unsigned newsize;
  276. for (k = 0; k < size; k++)
  277. {
  278. RefArray[k] = k;
  279. RefArrayBack[k] = k;
  280. }
  281. /* first inner nodes */
  282. for (theta = 1; theta < ntheta - 1 ; theta++)
  283. {
  284. for (thick = 1; thick < nthick - 1; thick++)
  285. {
  286. /* inner nodes are unknown */
  287. RefArrayBack[NODE_NUMBER(theta, thick)] = index;
  288. RefArray[index] = NODE_NUMBER(theta, thick);
  289. index++;
  290. }
  291. }
  292. newsize = index;
  293. for (theta=0; theta < ntheta; theta++)
  294. {
  295. /* Lower boundary "South" */
  296. RefArrayBack[NODE_NUMBER(theta, 0)] = index;
  297. RefArray[index++] = NODE_NUMBER(theta, 0);
  298. /* Upper boundary "North" */
  299. RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index;
  300. RefArray[index++] = NODE_NUMBER(theta, nthick-1);
  301. }
  302. for (thick = 1; thick < nthick -1; thick++)
  303. {
  304. /* "West "*/
  305. RefArrayBack[NODE_NUMBER(0, thick)] = index;
  306. RefArray[index++] = NODE_NUMBER(0, thick);
  307. /* "East" */
  308. RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index;
  309. RefArray[index++] = NODE_NUMBER(ntheta-1, thick);
  310. }
  311. assert(index == size);
  312. return newsize;
  313. }
  314. void build_mesh(point *mesh)
  315. {
  316. unsigned theta, thick;
  317. /* first build the mesh by determining all points positions */
  318. for (theta = 0; theta < ntheta; theta++)
  319. {
  320. float angle;
  321. angle = (ntheta - 1 - theta) * Pi/(ntheta-1);
  322. for (thick = 0; thick < nthick; thick++)
  323. {
  324. float r;
  325. r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
  326. switch (shape) {
  327. default:
  328. case 0:
  329. mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
  330. mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle);
  331. break;
  332. case 1:
  333. mesh[NODE_NUMBER(theta,thick)].x =
  334. -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1);
  335. mesh[NODE_NUMBER(theta,thick)].y =
  336. RMIN+((RMAX-RMIN)*thick)/(nthick - 1);
  337. break;
  338. case 2:
  339. mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f);
  340. mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f);
  341. break;
  342. }
  343. }
  344. }
  345. }
  346. static unsigned build_neighbour_vector(unsigned *neighbours, unsigned node, int *RefArray, int *RefArrayBack)
  347. {
  348. /* where is that point in the former space ? */
  349. int former = TRANSLATE(node);
  350. int former_thick, former_theta;
  351. former_thick= (int)NODE_TO_THICK(former);
  352. former_theta = (int)NODE_TO_THETA(former);
  353. /* do a list of all the possible neighbours */
  354. unsigned nneighbours = 0;
  355. int dtheta, dthick;
  356. for (dthick = -1; dthick <= 1; dthick++)
  357. {
  358. if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick )
  359. {
  360. for (dtheta = -1; dtheta <= 1; dtheta++)
  361. {
  362. if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta )
  363. {
  364. /* we got a possible neighbour */
  365. unsigned node =
  366. NODE_NUMBER((former_theta + dtheta), (former_thick + dthick));
  367. neighbours[nneighbours++] = TRANSLATEBACK(node);
  368. }
  369. }
  370. }
  371. }
  372. unsigned i;
  373. /* order that list */
  374. for (i = 0; i < nneighbours; i++)
  375. {
  376. /* find the i^th smallest entry for position i */
  377. unsigned index;
  378. unsigned min , min_index;
  379. min = neighbours[i];
  380. min_index = i;
  381. for (index = i+1; index < nneighbours; index++)
  382. {
  383. STARPU_ASSERT(neighbours[i] != neighbours[index]);
  384. if (neighbours[index] < min)
  385. {
  386. min = neighbours[index];
  387. min_index = index;
  388. }
  389. }
  390. /* swap values */
  391. neighbours[min_index] = neighbours[i];
  392. neighbours[i] = min;
  393. }
  394. return nneighbours;
  395. }
  396. static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack)
  397. {
  398. unsigned i,j;
  399. /* first give the value of known nodes (at boundaries) */
  400. for (i = 0; i < size; i++)
  401. {
  402. Bformer[i] = 0.0f;
  403. }
  404. for (i = 0; i < nthick; i++)
  405. {
  406. Bformer[i] = 200.0f;
  407. Bformer[size-1-i] = 200.0f;
  408. }
  409. for (i = 1; i < ntheta-1; i++)
  410. {
  411. Bformer[i*nthick] = 200.0f;
  412. Bformer[(i+1)*nthick-1] = 100.0f;
  413. }
  414. /* now the actual stiffness (reordered) matrix*/
  415. for (j = 0 ; j < newsize ; j++)
  416. {
  417. unsigned neighbour;
  418. unsigned nneighbours;
  419. unsigned neighbours[9];
  420. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  421. B[j] = Bformer[TRANSLATE(j)];
  422. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  423. {
  424. unsigned i = neighbours[neighbour];
  425. if (i >= newsize)
  426. {
  427. B[j] -= compute_A_value(TRANSLATE(i), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(i)];
  428. }
  429. }
  430. }
  431. }
  432. static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind,
  433. uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack)
  434. {
  435. unsigned j;
  436. unsigned pos = 0;
  437. *nzval = NULL;
  438. *colind = NULL;
  439. /* now the actual stiffness (reordered) matrix*/
  440. for (j = 0 ; j < newsize ; j++)
  441. {
  442. rowptr[j] = pos;
  443. unsigned neighbour;
  444. unsigned nneighbours;
  445. unsigned neighbours[9];
  446. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  447. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  448. {
  449. float val;
  450. unsigned nodeneighbour = neighbours[neighbour];
  451. if (nodeneighbour < newsize) {
  452. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  453. if (val != 0.0f) {
  454. *nzval = realloc(*nzval, (pos+1)*sizeof(float));
  455. *colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
  456. (*nzval)[pos] = val;
  457. (*colind)[pos] = nodeneighbour;
  458. pos++;
  459. }
  460. }
  461. }
  462. }
  463. rowptr[newsize] = pos;
  464. return pos;
  465. }
  466. static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack)
  467. {
  468. unsigned j;
  469. /* touch all the memory */
  470. memset(A, 0, newsize*newsize*sizeof(float));
  471. /* now the actual stiffness (reordered) matrix*/
  472. for (j = 0 ; j < newsize ; j++)
  473. {
  474. unsigned neighbour;
  475. unsigned nneighbours;
  476. unsigned neighbours[9];
  477. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  478. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  479. {
  480. unsigned nodeneighbour = neighbours[neighbour];
  481. if (nodeneighbour < newsize) {
  482. float val;
  483. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  484. A[j+ newsize*nodeneighbour] = val;
  485. }
  486. }
  487. }
  488. }
  489. int main(int argc, char **argv)
  490. {
  491. float *A;
  492. float *B;
  493. unsigned newsize;
  494. float *result;
  495. int *RefArray, *RefArrayBack;
  496. point *pmesh;
  497. float *Bformer;
  498. argc_ = argc;
  499. argv_ = argv;
  500. #ifdef USE_MARCEL
  501. marcel_init(&argc, argv);
  502. #endif
  503. parse_args(argc, argv);
  504. pmesh = malloc(DIM*sizeof(point));
  505. RefArray = malloc(DIM*sizeof(int));
  506. RefArrayBack = malloc(DIM*sizeof(int));
  507. Bformer = malloc(DIM*sizeof(float));
  508. result = malloc(DIM*sizeof(float));
  509. build_mesh(pmesh);
  510. #ifdef USE_POSTSCRIPT
  511. postscript_gen();
  512. #endif
  513. /* now simplify that problem given the boundary conditions
  514. * to do so, we remove the already known variables from the system
  515. * by pivoting the various know variable, RefArray keep track of that
  516. * pivoting */
  517. newsize = compute_pivot_array(RefArray, RefArrayBack, DIM);
  518. /* we can either use a direct method (LU decomposition here) or an
  519. * iterative method (conjugate gradient here) */
  520. if (use_cg) {
  521. unsigned nnz;
  522. float *nzval;
  523. uint32_t *colind;
  524. uint32_t *rowptr;
  525. rowptr = malloc((newsize+1)*sizeof(uint32_t));
  526. B = malloc(newsize*sizeof(float));
  527. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  528. nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack);
  529. #if 0
  530. printf("nnz : %d\n", nnz);
  531. fprintf(stdout, "MUMPS FORMAT BEGIN\n");
  532. FILE *fm = fopen("input_mumps", "w+");
  533. fprintf(fm, "%d\t:N\n%d\t:NZ\n", newsize, nnz);
  534. unsigned r;
  535. for (r = 0; r < newsize; r++)
  536. {
  537. int first_ind = rowptr[r];
  538. int last_ind = rowptr[r+1];
  539. int ind;
  540. for (ind = first_ind; ind < last_ind; ind++)
  541. {
  542. fprintf(fm, "%d %d %f\n", colind[ind]+1, r+1, nzval[ind]);
  543. }
  544. }
  545. for (r = 0; r < newsize; r++)
  546. {
  547. fprintf(fm, "%f\n", B[r]);
  548. }
  549. fclose(fm);
  550. fprintf(stdout, "MUMPS FORMAT END\n");
  551. #endif
  552. do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr);
  553. /* XXX */
  554. memcpy(B, result, newsize*sizeof(float));
  555. /* now display back the ACTUAL result */
  556. unsigned i;
  557. for (i = 0; i < newsize; i++)
  558. {
  559. result[TRANSLATE(i)] = B[i];
  560. }
  561. for (i = newsize ; i < DIM; i++)
  562. {
  563. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  564. }
  565. }
  566. else {
  567. /* unfortunately CUDA does not allow late memory registration,
  568. * we need to do the malloc using CUDA itself ... */
  569. initialize_system(&A, &B, newsize, pinned);
  570. /* then build the stiffness matrix A */
  571. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  572. build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack);
  573. fprintf(stderr, "Problem size : %dx%d (%dx%d)\n", newsize, newsize, DIM, DIM);
  574. if (version < 3) {
  575. dw_factoLU(A, newsize, newsize, nblocks, version);
  576. }
  577. else {
  578. dw_factoLU_tag(A, newsize, newsize, nblocks);
  579. }
  580. display_stat_heat();
  581. solve_system(DIM, newsize, result, RefArray, Bformer, A, B);
  582. starpu_shutdown();
  583. }
  584. #ifdef OPENGL_RENDER
  585. opengl_render(ntheta, nthick, result, pmesh, argc, argv);
  586. #endif
  587. free(pmesh);
  588. free(RefArray);
  589. free(RefArrayBack);
  590. free(Bformer);
  591. free(result);
  592. return 0;
  593. }