heat.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009, 2010 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include "heat.h"
  18. /* default values */
  19. static unsigned ntheta = 32+2;
  20. static unsigned nthick = 32+2;
  21. static unsigned nblocks = 16;
  22. static unsigned nbigblocks = 8;
  23. static unsigned shape = 0;
  24. static unsigned pinned = 0;
  25. static unsigned check = 0;
  26. static unsigned version = 2;
  27. static unsigned use_cg = 0; /* use a LU decomposition of CG ? */
  28. static unsigned no_prio = 0;
  29. extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
  30. unsigned nrow, uint32_t *colind, uint32_t *rowptr);
  31. static void parse_args(int argc, char **argv)
  32. {
  33. int i;
  34. for (i = 1; i < argc; i++)
  35. {
  36. if (strcmp(argv[i], "-cg") == 0)
  37. {
  38. use_cg = 1;
  39. }
  40. if (strcmp(argv[i], "-shape") == 0)
  41. {
  42. char *argptr;
  43. shape = strtol(argv[++i], &argptr, 10);
  44. }
  45. if (strcmp(argv[i], "-nthick") == 0)
  46. {
  47. char *argptr;
  48. nthick = strtol(argv[++i], &argptr, 10);
  49. }
  50. if (strcmp(argv[i], "-ntheta") == 0)
  51. {
  52. char *argptr;
  53. ntheta = strtol(argv[++i], &argptr, 10);
  54. }
  55. if (strcmp(argv[i], "-nblocks") == 0)
  56. {
  57. char *argptr;
  58. nblocks = strtol(argv[++i], &argptr, 10);
  59. }
  60. if (strcmp(argv[i], "-nbigblocks") == 0)
  61. {
  62. char *argptr;
  63. nbigblocks = strtol(argv[++i], &argptr, 10);
  64. }
  65. if (strcmp(argv[i], "-v1") == 0)
  66. {
  67. version = 1;
  68. }
  69. if (strcmp(argv[i], "-v2") == 0)
  70. {
  71. version = 2;
  72. }
  73. if (strcmp(argv[i], "-v3") == 0)
  74. {
  75. version = 3;
  76. }
  77. if (strcmp(argv[i], "-v4") == 0)
  78. {
  79. version = 4;
  80. }
  81. if (strcmp(argv[i], "-pin") == 0)
  82. {
  83. pinned = 1;
  84. }
  85. if (strcmp(argv[i], "-check") == 0)
  86. {
  87. check = 1;
  88. }
  89. if (strcmp(argv[i], "-no-prio") == 0)
  90. {
  91. no_prio = 1;
  92. }
  93. if (strcmp(argv[i], "-size") == 0)
  94. {
  95. char *argptr;
  96. unsigned size = strtol(argv[++i], &argptr, 10);
  97. nthick = 130;
  98. ntheta = (size/128) + 2;
  99. STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
  100. }
  101. if (strcmp(argv[i], "-h") == 0)
  102. {
  103. printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
  104. }
  105. }
  106. }
  107. /*
  108. * The Finite element method code
  109. *
  110. * B C
  111. * **********
  112. * * 0 * *
  113. * * * *
  114. * * * 1 *
  115. * **********
  116. * A D
  117. */
  118. static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  119. unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh)
  120. {
  121. float xa,ya,xb,yb,xc,yc;
  122. float tmp;
  123. assert(theta_tr + 2 <= ntheta);
  124. assert(thick_tr + 2 <= nthick);
  125. /* A */
  126. xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  127. ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  128. /* B */
  129. if (side_tr)
  130. {
  131. /* lower D is actually B here */
  132. xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  133. yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  134. }
  135. else
  136. {
  137. /* upper */
  138. xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  139. yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  140. }
  141. xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  142. yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  143. /* now look for the actual psi node */
  144. if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))
  145. {
  146. /* A nothing to do */
  147. }
  148. else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))
  149. {
  150. /* psi matches C */
  151. /* swap A and C coordinates */
  152. tmp = xa; xa = xc; xc = tmp;
  153. tmp = ya; ya = yc; yc = tmp;
  154. }
  155. else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)))
  156. {
  157. /* psi is D (that was stored in C) XXX */
  158. tmp = xa; xa = xb; xb = tmp;
  159. tmp = ya; ya = yb; yb = tmp;
  160. }
  161. else if (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)))
  162. {
  163. /* psi is C */
  164. tmp = xa; xa = xb; xb = tmp;
  165. tmp = ya; ya = yb; yb = tmp;
  166. }
  167. else
  168. {
  169. /* the psi node is not a node of the current triangle */
  170. return 0.0f;
  171. }
  172. /* now the triangle should have A as the psi node */
  173. float denom;
  174. float value;
  175. denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
  176. switch (xy)
  177. {
  178. case X:
  179. value = (yc - yb)/denom;
  180. break;
  181. case Y:
  182. value = -(xc - xb)/denom;
  183. break;
  184. default:
  185. assert(0);
  186. }
  187. return value;
  188. }
  189. static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  190. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  191. {
  192. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh);
  193. }
  194. static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
  195. unsigned theta_psi, unsigned thick_psi, point *pmesh)
  196. {
  197. return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh);
  198. }
  199. static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh)
  200. {
  201. float surface;
  202. float tmp;
  203. float xi, xj, xk, yi, yj, yk;
  204. STARPU_ASSERT(theta_tr + 2 <= ntheta);
  205. STARPU_ASSERT(thick_tr + 2 <= nthick);
  206. xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
  207. yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
  208. xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
  209. yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
  210. if (side_tr)
  211. {
  212. /* lower */
  213. xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
  214. yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
  215. }
  216. else
  217. {
  218. xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
  219. yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
  220. }
  221. tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj);
  222. surface = 0.5*fabs(tmp);
  223. return surface;
  224. }
  225. static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr,
  226. unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  227. {
  228. float surface;
  229. float value;
  230. float dxi, dxj, dyi, dyj;
  231. if (theta_tr < 0) return 0.0f;
  232. if (theta_tr + 2 > (int)ntheta) return 0.0f;
  233. if (thick_tr < 0) return 0.0f;
  234. if (thick_tr + 2 > (int)nthick) return 0.0f;
  235. dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  236. dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
  237. dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  238. dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
  239. surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh);
  240. value = (dxi*dxj + dyi*dyj)*surface;
  241. return value;
  242. }
  243. static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
  244. {
  245. float integral = 0.0f;
  246. integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  247. integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  248. integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  249. integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  250. integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
  251. integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
  252. return integral;
  253. }
  254. static float compute_A_value(unsigned i, unsigned j, point *pmesh)
  255. {
  256. float value = 0.0f;
  257. unsigned thick_i, thick_j;
  258. unsigned theta_i, theta_j;
  259. /* add all contributions from all connex triangles */
  260. thick_i = NODE_TO_THICK(i);
  261. thick_j = NODE_TO_THICK(j);
  262. theta_i = NODE_TO_THETA(i);
  263. theta_j = NODE_TO_THETA(j);
  264. /* Compute the Sum of all the integral over all triangles */
  265. if ( (abs(thick_i - thick_j) <= 1) && (abs(theta_i - theta_j) <= 1) )
  266. {
  267. if ( (theta_j == theta_i -1) && (thick_j == thick_i +1))
  268. goto done;
  269. if ( (theta_j == theta_i + 1) && (thick_j == thick_i - 1))
  270. goto done;
  271. /* this may not be a null entry */
  272. value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh);
  273. }
  274. done:
  275. return value;
  276. }
  277. #define TRANSLATE(k) (RefArray[(k)])
  278. #define TRANSLATEBACK(k) (RefArrayBack[(k)])
  279. static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
  280. {
  281. unsigned i;
  282. /* solve the actual problem LU X = B */
  283. /* solve LX' = Y with X' = UX */
  284. /* solve UX = X' */
  285. FPRINTF(stderr, "Solving the problem ...\n");
  286. float *savedB = NULL;
  287. float *LUB = NULL;
  288. if (check)
  289. {
  290. savedB = malloc(subsize*sizeof(float));
  291. memcpy(savedB, B, subsize*sizeof(float));
  292. LUB = malloc(subsize*sizeof(float));
  293. }
  294. /* L */
  295. STRSV("L", "N", "N", subsize, A, subsize, B, 1);
  296. /* U */
  297. STRSV("U", "N", "U", subsize, A, subsize, B, 1);
  298. STARPU_ASSERT(DIM == size);
  299. if (check)
  300. {
  301. /* compute the error on (LUB - savedB) which should be 0 */
  302. /* LUB = B */
  303. memcpy(LUB, B, subsize*sizeof(float));
  304. /* LUB = U * LUB */
  305. STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
  306. /* LUB = L * LUB */
  307. STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
  308. /* LUB -= B */
  309. SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
  310. /* check if LUB is close to the 0 vector */
  311. int maxind = ISAMAX(subsize, LUB, 1);
  312. FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]);
  313. float sum = SASUM(subsize, LUB, 1);
  314. FPRINTF(stderr,"avg. error %e\n", sum/subsize);
  315. free(LUB);
  316. free(savedB);
  317. }
  318. /* now display back the ACTUAL result */
  319. for (i = 0; i < subsize; i++)
  320. {
  321. result[TRANSLATE(i)] = B[i];
  322. }
  323. for (i = subsize ; i < size; i++)
  324. {
  325. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  326. }
  327. }
  328. unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
  329. {
  330. unsigned k;
  331. unsigned index = 0;
  332. unsigned theta, thick;
  333. unsigned newsize;
  334. for (k = 0; k < size; k++)
  335. {
  336. RefArray[k] = k;
  337. RefArrayBack[k] = k;
  338. }
  339. /* first inner nodes */
  340. for (theta = 1; theta < ntheta - 1 ; theta++)
  341. {
  342. for (thick = 1; thick < nthick - 1; thick++)
  343. {
  344. /* inner nodes are unknown */
  345. RefArrayBack[NODE_NUMBER(theta, thick)] = index;
  346. RefArray[index] = NODE_NUMBER(theta, thick);
  347. index++;
  348. }
  349. }
  350. newsize = index;
  351. for (theta=0; theta < ntheta; theta++)
  352. {
  353. /* Lower boundary "South" */
  354. RefArrayBack[NODE_NUMBER(theta, 0)] = index;
  355. RefArray[index++] = NODE_NUMBER(theta, 0);
  356. /* Upper boundary "North" */
  357. RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index;
  358. RefArray[index++] = NODE_NUMBER(theta, nthick-1);
  359. }
  360. for (thick = 1; thick < nthick -1; thick++)
  361. {
  362. /* "West "*/
  363. RefArrayBack[NODE_NUMBER(0, thick)] = index;
  364. RefArray[index++] = NODE_NUMBER(0, thick);
  365. /* "East" */
  366. RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index;
  367. RefArray[index++] = NODE_NUMBER(ntheta-1, thick);
  368. }
  369. assert(index == size);
  370. return newsize;
  371. }
  372. void build_mesh(point *mesh)
  373. {
  374. unsigned theta, thick;
  375. /* first build the mesh by determining all points positions */
  376. for (theta = 0; theta < ntheta; theta++)
  377. {
  378. float angle;
  379. angle = (ntheta - 1 - theta) * Pi/(ntheta-1);
  380. for (thick = 0; thick < nthick; thick++)
  381. {
  382. float r;
  383. r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
  384. switch (shape)
  385. {
  386. default:
  387. case 0:
  388. mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
  389. mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle);
  390. break;
  391. case 1:
  392. mesh[NODE_NUMBER(theta,thick)].x =
  393. -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1);
  394. mesh[NODE_NUMBER(theta,thick)].y =
  395. RMIN+((RMAX-RMIN)*thick)/(nthick - 1);
  396. break;
  397. case 2:
  398. mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f);
  399. mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f);
  400. break;
  401. }
  402. }
  403. }
  404. }
  405. static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned node, int *RefArray, int *RefArrayBack)
  406. {
  407. /* where is that point in the former space ? */
  408. int former = TRANSLATE(node);
  409. int former_thick, former_theta;
  410. former_thick= (int)NODE_TO_THICK(former);
  411. former_theta = (int)NODE_TO_THETA(former);
  412. /* do a list of all the possible neighbours */
  413. unsigned nneighbours = 0;
  414. int dtheta, dthick;
  415. for (dthick = -1; dthick <= 1; dthick++)
  416. {
  417. if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick )
  418. {
  419. for (dtheta = -1; dtheta <= 1; dtheta++)
  420. {
  421. if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta )
  422. {
  423. /* we got a possible neighbour */
  424. unsigned pnode =
  425. NODE_NUMBER((former_theta + dtheta), (former_thick + dthick));
  426. neighbours[nneighbours++] = TRANSLATEBACK(pnode);
  427. }
  428. }
  429. }
  430. }
  431. unsigned i;
  432. /* order that list */
  433. for (i = 0; i < nneighbours; i++)
  434. {
  435. /* find the i^th smallest entry for position i */
  436. unsigned index;
  437. unsigned min , min_index;
  438. min = neighbours[i];
  439. min_index = i;
  440. for (index = i+1; index < nneighbours; index++)
  441. {
  442. STARPU_ASSERT(neighbours[i] != neighbours[index]);
  443. if (neighbours[index] < min)
  444. {
  445. min = neighbours[index];
  446. min_index = index;
  447. }
  448. }
  449. /* swap values */
  450. neighbours[min_index] = neighbours[i];
  451. neighbours[i] = min;
  452. }
  453. return nneighbours;
  454. }
  455. static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack)
  456. {
  457. unsigned i,j;
  458. /* first give the value of known nodes (at boundaries) */
  459. for (i = 0; i < size; i++)
  460. {
  461. Bformer[i] = 0.0f;
  462. }
  463. for (i = 0; i < nthick; i++)
  464. {
  465. Bformer[i] = 200.0f;
  466. Bformer[size-1-i] = 200.0f;
  467. }
  468. for (i = 1; i < ntheta-1; i++)
  469. {
  470. Bformer[i*nthick] = 200.0f;
  471. Bformer[(i+1)*nthick-1] = 100.0f;
  472. }
  473. /* now the actual stiffness (reordered) matrix*/
  474. for (j = 0 ; j < newsize ; j++)
  475. {
  476. unsigned long neighbour;
  477. unsigned long nneighbours;
  478. unsigned long neighbours[9];
  479. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  480. B[j] = Bformer[TRANSLATE(j)];
  481. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  482. {
  483. unsigned n = neighbours[neighbour];
  484. if (n >= newsize)
  485. {
  486. B[j] -= compute_A_value(TRANSLATE(n), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(n)];
  487. }
  488. }
  489. }
  490. }
  491. static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind,
  492. uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack)
  493. {
  494. unsigned j;
  495. unsigned pos = 0;
  496. *nzval = NULL;
  497. *colind = NULL;
  498. /* now the actual stiffness (reordered) matrix*/
  499. for (j = 0 ; j < newsize ; j++)
  500. {
  501. rowptr[j] = pos;
  502. unsigned long neighbour;
  503. unsigned long nneighbours;
  504. unsigned long neighbours[9];
  505. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  506. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  507. {
  508. float val;
  509. unsigned nodeneighbour = neighbours[neighbour];
  510. if (nodeneighbour < newsize)
  511. {
  512. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  513. if (val != 0.0f)
  514. {
  515. *nzval = realloc(*nzval, (pos+1)*sizeof(float));
  516. *colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
  517. (*nzval)[pos] = val;
  518. (*colind)[pos] = nodeneighbour;
  519. pos++;
  520. }
  521. }
  522. }
  523. }
  524. rowptr[newsize] = pos;
  525. return pos;
  526. }
  527. static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack)
  528. {
  529. unsigned long j;
  530. /* touch all the memory */
  531. memset(A, 0, newsize*newsize*sizeof(float));
  532. /* now the actual stiffness (reordered) matrix*/
  533. for (j = 0 ; j < newsize ; j++)
  534. {
  535. unsigned long neighbour;
  536. unsigned long nneighbours;
  537. unsigned long neighbours[9];
  538. nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
  539. for (neighbour = 0; neighbour < nneighbours; neighbour++)
  540. {
  541. unsigned long nodeneighbour = neighbours[neighbour];
  542. if (nodeneighbour < newsize)
  543. {
  544. float val;
  545. val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
  546. A[j+ (unsigned long)newsize*nodeneighbour] = val;
  547. }
  548. }
  549. }
  550. }
  551. int main(int argc, char **argv)
  552. {
  553. float *A;
  554. float *B;
  555. unsigned newsize;
  556. float *result;
  557. int *RefArray, *RefArrayBack;
  558. point *pmesh;
  559. float *Bformer;
  560. parse_args(argc, argv);
  561. pmesh = malloc(DIM*sizeof(point));
  562. RefArray = malloc(DIM*sizeof(int));
  563. RefArrayBack = malloc(DIM*sizeof(int));
  564. Bformer = malloc(DIM*sizeof(float));
  565. result = malloc(DIM*sizeof(float));
  566. build_mesh(pmesh);
  567. /* now simplify that problem given the boundary conditions
  568. * to do so, we remove the already known variables from the system
  569. * by pivoting the various know variable, RefArray keep track of that
  570. * pivoting */
  571. newsize = compute_pivot_array(RefArray, RefArrayBack, DIM);
  572. /* we can either use a direct method (LU decomposition here) or an
  573. * iterative method (conjugate gradient here) */
  574. if (use_cg)
  575. {
  576. unsigned nnz;
  577. float *nzval;
  578. uint32_t *colind;
  579. uint32_t *rowptr;
  580. rowptr = malloc((newsize+1)*sizeof(uint32_t));
  581. B = malloc(newsize*sizeof(float));
  582. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  583. nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack);
  584. do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr);
  585. /* XXX */
  586. memcpy(B, result, newsize*sizeof(float));
  587. /* now display back the ACTUAL result */
  588. unsigned i;
  589. for (i = 0; i < newsize; i++)
  590. {
  591. result[TRANSLATE(i)] = B[i];
  592. }
  593. for (i = newsize ; i < DIM; i++)
  594. {
  595. result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
  596. }
  597. }
  598. else
  599. {
  600. /* unfortunately CUDA does not allow late memory registration,
  601. * we need to do the malloc using CUDA itself ... */
  602. initialize_system(&A, &B, newsize, pinned);
  603. /* then build the stiffness matrix A */
  604. build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
  605. build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack);
  606. FPRINTF(stderr, "Problem size : %ux%u (%ux%u) (%lu MB)\n", newsize, newsize, DIM, DIM, ((unsigned long)newsize*newsize*4UL)/(1024*1024));
  607. STARPU_ASSERT(newsize % nblocks == 0);
  608. switch (version)
  609. {
  610. case 1:
  611. case 2:
  612. dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);
  613. break;
  614. case 3:
  615. dw_factoLU_tag(A, newsize, newsize, nblocks, no_prio);
  616. break;
  617. case 4:
  618. dw_factoLU_grain(A, newsize, newsize, nblocks, nbigblocks);
  619. break;
  620. default:
  621. STARPU_ABORT();
  622. }
  623. display_stat_heat();
  624. if (check)
  625. solve_system(DIM, newsize, result, RefArray, Bformer, A, B);
  626. starpu_shutdown();
  627. }
  628. #ifdef STARPU_OPENGL_RENDER
  629. opengl_render(ntheta, nthick, result, pmesh, argc, argv);
  630. #endif
  631. free(pmesh);
  632. free(RefArray);
  633. free(RefArrayBack);
  634. free(Bformer);
  635. free(result);
  636. return 0;
  637. }