heat.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "heat.h"
  17. /* default values */
  18. static unsigned ntheta = 32+2;
  19. static unsigned nthick = 32+2;
  20. static unsigned nblocks = 16;
  21. static unsigned nbigblocks = 8;
  22. static unsigned shape = 0;
  23. static unsigned pinned = 0;
  24. static unsigned check = 0;
  25. static unsigned version = 2;
  26. static unsigned use_cg = 0; /* use a LU decomposition of CG ? */
  27. static unsigned no_prio = 0;
  28. extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
  29. unsigned nrow, uint32_t *colind, uint32_t *rowptr);
  30. static void parse_args(int argc, char **argv)
  31. {
  32. int i;
  33. for (i = 1; i < argc; i++) {
  34. if (strcmp(argv[i], "-cg") == 0) {
  35. use_cg = 1;
  36. }
  37. if (strcmp(argv[i], "-shape") == 0) {
  38. char *argptr;
  39. shape = strtol(argv[++i], &argptr, 10);
  40. }
  41. if (strcmp(argv[i], "-nthick") == 0) {
  42. char *argptr;
  43. nthick = strtol(argv[++i], &argptr, 10);
  44. }
  45. if (strcmp(argv[i], "-ntheta") == 0) {
  46. char *argptr;
  47. ntheta = strtol(argv[++i], &argptr, 10);
  48. }
  49. if (strcmp(argv[i], "-nblocks") == 0) {
  50. char *argptr;
  51. nblocks = strtol(argv[++i], &argptr, 10);
  52. }
  53. if (strcmp(argv[i], "-nbigblocks") == 0) {
  54. char *argptr;
  55. nbigblocks = strtol(argv[++i], &argptr, 10);
  56. }
  57. if (strcmp(argv[i], "-v1") == 0) {
  58. version = 1;
  59. }
  60. if (strcmp(argv[i], "-v2") == 0) {
  61. version = 2;
  62. }
  63. if (strcmp(argv[i], "-v3") == 0) {
  64. version = 3;
  65. }
  66. if (strcmp(argv[i], "-v4") == 0) {
  67. version = 4;
  68. }
  69. if (strcmp(argv[i], "-pin") == 0) {
  70. pinned = 1;
  71. }
  72. if (strcmp(argv[i], "-check") == 0) {
  73. check = 1;
  74. }
  75. if (strcmp(argv[i], "-no-prio") == 0) {
  76. no_prio = 1;
  77. }
  78. if (strcmp(argv[i], "-size") == 0) {
  79. char *argptr;
  80. unsigned size = strtol(argv[++i], &argptr, 10);
  81. nthick = 130;
  82. ntheta = (size/128) + 2;
  83. STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
  84. }
  85. if (strcmp(argv[i], "-h") == 0) {
  86. printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
  87. }
  88. }
  89. }
  90. /*
  91. * The Finite element method code
  92. *
  93. * B C
  94. * **********
  95. * * 0 * *
  96. * * * *
  97. * * * 1 *
  98. * **********
  99. * A D
  100. */
  101. static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  102. unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh)
  103. {
  104. float xa,ya,xb,yb,xc,yc;
  105. float tmp;
  106. assert(theta_tr + 2 <= ntheta);
  107. assert(thick_tr + 2 <= nthick);
  108. /* A */
  109. xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  110. ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  111. /* B */
  112. if (side_tr) {
  113. /* lower D is actually B here */
  114. xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  115. yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  116. } else {
  117. /* upper */
  118. xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  119. yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  120. }
  121. xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  122. yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  123. /* now look for the actual psi node */
  124. if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) {
  125. /* A nothing to do */
  126. } else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) {
  127. /* psi matches C */
  128. /* swap A and C coordinates */
  129. tmp = xa; xa = xc; xc = tmp;
  130. tmp = ya; ya = yc; yc = tmp;
  131. } else if
  132. (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) {
  133. /* psi is D (that was stored in C) XXX */
  134. tmp = xa; xa = xb; xb = tmp;
  135. tmp = ya; ya = yb; yb = tmp;
  136. } else if
  137. (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) {
  138. /* psi is C */
  139. tmp = xa; xa = xb; xb = tmp;
  140. tmp = ya; ya = yb; yb = tmp;
  141. } else {
  142. /* the psi node is not a node of the current triangle */
  143. return 0.0f;
  144. }
  145. /* now the triangle should have A as the psi node */
  146. float denom;
  147. float value;
  148. denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
  149. switch (xy) {
  150. case X:
  151. value = (yc - yb)/denom;
  152. break;
  153. case Y:
  154. value = -(xc - xb)/denom;
  155. break;
  156. default:
  157. assert(0);
  158. }
  159. return value;
  160. }
  161. static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  162. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  163. {
  164. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh);
  165. }
  166. static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  167. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  168. {
  169. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh);
  170. }
  171. static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh)
  172. {
  173. float surface;
  174. float tmp;
  175. float xi, xj, xk, yi, yj, yk;
  176. STARPU_ASSERT(theta_tr + 2 <= ntheta);
  177. STARPU_ASSERT(thick_tr + 2 <= nthick);
  178. xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  179. yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  180. xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  181. yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  182. if (side_tr) {
  183. /* lower */
  184. xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  185. yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  186. } else {
  187. xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  188. yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  189. }
  190. tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj);
  191. surface = 0.5*fabs(tmp);
  192. return surface;
  193. }
  194. static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr,
  195. unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  196. {
  197. float surface;
  198. float value;
  199. float dxi, dxj, dyi, dyj;
  200. if (theta_tr < 0) return 0.0f;
  201. if (theta_tr + 2 > (int)ntheta) return 0.0f;
  202. if (thick_tr < 0) return 0.0f;
  203. if (thick_tr + 2 > (int)nthick) return 0.0f;
  204. dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  205. dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  206. dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  207. dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  208. surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh);
  209. value = (dxi*dxj + dyi*dyj)*surface;
  210. return value;
  211. }
  212. static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  213. {
  214. float integral = 0.0f;
  215. integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  216. integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  217. integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  218. integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  219. integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  220. integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  221. return integral;
  222. }
  223. static float compute_A_value(unsigned i, unsigned j, point *pmesh)
  224. {
  225. float value = 0.0f;
  226. unsigned thick_i, thick_j;
  227. unsigned theta_i, theta_j;
  228. /* add all contributions from all connex triangles */
  229. thick_i = NODE_TO_THICK(i);
  230. thick_j = NODE_TO_THICK(j);
  231. theta_i = NODE_TO_THETA(i);
  232. theta_j = NODE_TO_THETA(j);
  233. /* Compute the Sum of all the integral over all triangles */
  234. if ( (abs(thick_i - thick_j) <= 1) && (abs(theta_i - theta_j) <= 1) )
  235. {
  236. if ( (theta_j == theta_i -1) && (thick_j == thick_i +1))
  237. goto done;
  238. if ( (theta_j == theta_i + 1) && (thick_j == thick_i - 1))
  239. goto done;
  240. /* this may not be a null entry */
  241. value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh);
  242. }
  243. done:
  244. return value;
  245. }
  246. #define TRANSLATE(k) (RefArray[(k)])
  247. #define TRANSLATEBACK(k) (RefArrayBack[(k)])
  248. static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
  249. {
  250. unsigned i;
  251. /* solve the actual problem LU X = B */
  252. /* solve LX' = Y with X' = UX */
  253. /* solve UX = X' */
  254. fprintf(stderr, "Solving the problem ...\n");
  255. float *savedB;
  256. float *LUB;
  257. if (check)
  258. {
  259. savedB = malloc(subsize*sizeof(float));
  260. memcpy(savedB, B, subsize*sizeof(float));
  261. LUB = malloc(subsize*sizeof(float));
  262. }
  263. /* L */
  264. STRSV("L", "N", "N", subsize, A, subsize, B, 1);
  265. /* U */
  266. STRSV("U", "N", "U", subsize, A, subsize, B, 1);
  267. STARPU_ASSERT(DIM == size);
  268. if (check)
  269. {
  270. /* compute the error on (LUB - savedB) which should be 0 */
  271. /* LUB = B */
  272. memcpy(LUB, B, subsize*sizeof(float));
  273. /* LUB = U * LUB */
  274. STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
  275. /* LUB = L * LUB */
  276. STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
  277. /* LUB -= B */
  278. SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
  279. /* check if LUB is close to the 0 vector */
  280. int maxind = ISAMAX(subsize, LUB, 1);
  281. fprintf(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]);
  282. float sum = SASUM(subsize, LUB, 1);
  283. fprintf(stderr,"avg. error %e\n", sum/subsize);
  284. free(LUB);
  285. free(savedB);
  286. }
  287. /* now display back the ACTUAL result */
  288. for (i = 0; i < subsize; i++)
  289. {
  290. result[TRANSLATE(i)] = B[i];
  291. }
  292. for (i = subsize ; i < size; i++)
  293. {
  294. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  295. }
  296. }
  297. unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
  298. {
  299. unsigned k;
  300. unsigned index = 0;
  301. unsigned theta, thick;
  302. unsigned newsize;
  303. for (k = 0; k < size; k++)
  304. {
  305. RefArray[k] = k;
  306. RefArrayBack[k] = k;
  307. }
  308. /* first inner nodes */
  309. for (theta = 1; theta < ntheta - 1 ; theta++)
  310. {
  311. for (thick = 1; thick < nthick - 1; thick++)
  312. {
  313. /* inner nodes are unknown */
  314. RefArrayBack[NODE_NUMBER(theta, thick)] = index;
  315. RefArray[index] = NODE_NUMBER(theta, thick);
  316. index++;
  317. }
  318. }
  319. newsize = index;
  320. for (theta=0; theta < ntheta; theta++)
  321. {
  322. /* Lower boundary "South" */
  323. RefArrayBack[NODE_NUMBER(theta, 0)] = index;
  324. RefArray[index++] = NODE_NUMBER(theta, 0);
  325. /* Upper boundary "North" */
  326. RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index;
  327. RefArray[index++] = NODE_NUMBER(theta, nthick-1);
  328. }
  329. for (thick = 1; thick < nthick -1; thick++)
  330. {
  331. /* "West "*/
  332. RefArrayBack[NODE_NUMBER(0, thick)] = index;
  333. RefArray[index++] = NODE_NUMBER(0, thick);
  334. /* "East" */
  335. RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index;
  336. RefArray[index++] = NODE_NUMBER(ntheta-1, thick);
  337. }
  338. assert(index == size);
  339. return newsize;
  340. }
  341. void build_mesh(point *mesh)
  342. {
  343. unsigned theta, thick;
  344. /* first build the mesh by determining all points positions */
  345. for (theta = 0; theta < ntheta; theta++)
  346. {
  347. float angle;
  348. angle = (ntheta - 1 - theta) * Pi/(ntheta-1);
  349. for (thick = 0; thick < nthick; thick++)
  350. {
  351. float r;
  352. r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
  353. switch (shape) {
  354. default:
  355. case 0:
  356. mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
  357. mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle);
  358. break;
  359. case 1:
  360. mesh[NODE_NUMBER(theta,thick)].x =
  361. -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1);
  362. mesh[NODE_NUMBER(theta,thick)].y =
  363. RMIN+((RMAX-RMIN)*thick)/(nthick - 1);
  364. break;
  365. case 2:
  366. mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f);
  367. mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f);
  368. break;
  369. }
  370. }
  371. }
  372. }
  373. static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned node, int *RefArray, int *RefArrayBack)
  374. {
  375. /* where is that point in the former space ? */
  376. int former = TRANSLATE(node);
  377. int former_thick, former_theta;
  378. former_thick= (int)NODE_TO_THICK(former);
  379. former_theta = (int)NODE_TO_THETA(former);
  380. /* do a list of all the possible neighbours */
  381. unsigned nneighbours = 0;
  382. int dtheta, dthick;
  383. for (dthick = -1; dthick <= 1; dthick++)
  384. {
  385. if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick )
  386. {
  387. for (dtheta = -1; dtheta <= 1; dtheta++)
  388. {
  389. if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta )
  390. {
  391. /* we got a possible neighbour */
  392. unsigned node =
  393. NODE_NUMBER((former_theta + dtheta), (former_thick + dthick));
  394. neighbours[nneighbours++] = TRANSLATEBACK(node);
  395. }
  396. }
  397. }
  398. }
  399. unsigned i;
  400. /* order that list */
  401. for (i = 0; i < nneighbours; i++)
  402. {
  403. /* find the i^th smallest entry for position i */
  404. unsigned index;
  405. unsigned min , min_index;
  406. min = neighbours[i];
  407. min_index = i;
  408. for (index = i+1; index < nneighbours; index++)
  409. {
  410. STARPU_ASSERT(neighbours[i] != neighbours[index]);
  411. if (neighbours[index] < min)
  412. {
  413. min = neighbours[index];
  414. min_index = index;
  415. }
  416. }
  417. /* swap values */
  418. neighbours[min_index] = neighbours[i];
  419. neighbours[i] = min;
  420. }
  421. return nneighbours;
  422. }
  423. static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack)
  424. {
  425. unsigned i,j;
  426. /* first give the value of known nodes (at boundaries) */
  427. for (i = 0; i < size; i++)
  428. {
  429. Bformer[i] = 0.0f;
  430. }
  431. for (i = 0; i < nthick; i++)
  432. {
  433. Bformer[i] = 200.0f;
  434. Bformer[size-1-i] = 200.0f;
  435. }
  436. for (i = 1; i < ntheta-1; i++)
  437. {
  438. Bformer[i*nthick] = 200.0f;
  439. Bformer[(i+1)*nthick-1] = 100.0f;
  440. }
  441. /* now the actual stiffness (reordered) matrix*/
  442. for (j = 0 ; j < newsize ; j++)
  443. {
  444. unsigned long neighbour;
  445. unsigned long nneighbours;
  446. unsigned long neighbours[9];
  447. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  448. B[j] = Bformer[TRANSLATE(j)];
  449. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  450. {
  451. unsigned i = neighbours[neighbour];
  452. if (i >= newsize)
  453. {
  454. B[j] -= compute_A_value(TRANSLATE(i), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(i)];
  455. }
  456. }
  457. }
  458. }
  459. static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind,
  460. uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack)
  461. {
  462. unsigned j;
  463. unsigned pos = 0;
  464. *nzval = NULL;
  465. *colind = NULL;
  466. /* now the actual stiffness (reordered) matrix*/
  467. for (j = 0 ; j < newsize ; j++)
  468. {
  469. rowptr[j] = pos;
  470. unsigned long neighbour;
  471. unsigned long nneighbours;
  472. unsigned long neighbours[9];
  473. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  474. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  475. {
  476. float val;
  477. unsigned nodeneighbour = neighbours[neighbour];
  478. if (nodeneighbour < newsize) {
  479. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  480. if (val != 0.0f) {
  481. *nzval = realloc(*nzval, (pos+1)*sizeof(float));
  482. *colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
  483. (*nzval)[pos] = val;
  484. (*colind)[pos] = nodeneighbour;
  485. pos++;
  486. }
  487. }
  488. }
  489. }
  490. rowptr[newsize] = pos;
  491. return pos;
  492. }
  493. static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack)
  494. {
  495. unsigned long j;
  496. /* touch all the memory */
  497. memset(A, 0, newsize*newsize*sizeof(float));
  498. /* now the actual stiffness (reordered) matrix*/
  499. for (j = 0 ; j < newsize ; j++)
  500. {
  501. unsigned long neighbour;
  502. unsigned long nneighbours;
  503. unsigned long neighbours[9];
  504. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  505. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  506. {
  507. unsigned long nodeneighbour = neighbours[neighbour];
  508. if (nodeneighbour < newsize) {
  509. float val;
  510. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  511. A[j+ (unsigned long)newsize*nodeneighbour] = val;
  512. }
  513. }
  514. }
  515. }
  516. int main(int argc, char **argv)
  517. {
  518. float *A;
  519. float *B;
  520. unsigned newsize;
  521. float *result;
  522. int *RefArray, *RefArrayBack;
  523. point *pmesh;
  524. float *Bformer;
  525. parse_args(argc, argv);
  526. pmesh = malloc(DIM*sizeof(point));
  527. RefArray = malloc(DIM*sizeof(int));
  528. RefArrayBack = malloc(DIM*sizeof(int));
  529. Bformer = malloc(DIM*sizeof(float));
  530. result = malloc(DIM*sizeof(float));
  531. build_mesh(pmesh);
  532. /* now simplify that problem given the boundary conditions
  533. * to do so, we remove the already known variables from the system
  534. * by pivoting the various know variable, RefArray keep track of that
  535. * pivoting */
  536. newsize = compute_pivot_array(RefArray, RefArrayBack, DIM);
  537. /* we can either use a direct method (LU decomposition here) or an
  538. * iterative method (conjugate gradient here) */
  539. if (use_cg) {
  540. unsigned nnz;
  541. float *nzval;
  542. uint32_t *colind;
  543. uint32_t *rowptr;
  544. rowptr = malloc((newsize+1)*sizeof(uint32_t));
  545. B = malloc(newsize*sizeof(float));
  546. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  547. nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack);
  548. do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr);
  549. /* XXX */
  550. memcpy(B, result, newsize*sizeof(float));
  551. /* now display back the ACTUAL result */
  552. unsigned i;
  553. for (i = 0; i < newsize; i++)
  554. {
  555. result[TRANSLATE(i)] = B[i];
  556. }
  557. for (i = newsize ; i < DIM; i++)
  558. {
  559. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  560. }
  561. }
  562. else {
  563. /* unfortunately CUDA does not allow late memory registration,
  564. * we need to do the malloc using CUDA itself ... */
  565. initialize_system(&A, &B, newsize, pinned);
  566. /* then build the stiffness matrix A */
  567. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  568. build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack);
  569. fprintf(stderr, "Problem size : %dx%d (%dx%d) (%ld MB)\n", newsize, newsize, DIM, DIM, ((unsigned long)newsize*newsize*4UL)/(1024*1024));
  570. STARPU_ASSERT(newsize % nblocks == 0);
  571. switch (version) {
  572. case 1:
  573. case 2:
  574. dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);
  575. break;
  576. case 3:
  577. dw_factoLU_tag(A, newsize, newsize, nblocks, no_prio);
  578. break;
  579. case 4:
  580. dw_factoLU_grain(A, newsize, newsize, nblocks, nbigblocks);
  581. break;
  582. default:
  583. STARPU_ABORT();
  584. }
  585. display_stat_heat();
  586. if (check)
  587. solve_system(DIM, newsize, result, RefArray, Bformer, A, B);
  588. starpu_helper_init_cublas();
  589. starpu_shutdown();
  590. }
  591. #ifdef STARPU_OPENGL_RENDER
  592. opengl_render(ntheta, nthick, result, pmesh, argc, argv);
  593. #endif
  594. free(pmesh);
  595. free(RefArray);
  596. free(RefArrayBack);
  597. free(Bformer);
  598. free(result);
  599. return 0;
  600. }