123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- /*
- * heat propagation simulation through either direct LU factorization or
- * iterative conjugate gradient.
- */
- #include "heat.h"
- /* default values */
- #ifdef STARPU_QUICK_CHECK
- static unsigned ntheta = 8+2;
- static unsigned nthick = 8+2;
- #else
- static unsigned ntheta = 32+2;
- static unsigned nthick = 32+2;
- #endif
- static unsigned nblocks = 16;
- static unsigned nbigblocks = 8;
- static unsigned shape = 0;
- static unsigned pinned = 0;
- static unsigned check = 0;
- static unsigned version = 2;
- static unsigned use_cg = 0; /* use a LU decomposition of CG ? */
- static unsigned no_prio = 0;
- extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
- unsigned nrow, uint32_t *colind, uint32_t *rowptr);
- static void parse_args(int argc, char **argv)
- {
- int i;
- for (i = 1; i < argc; i++)
- {
- if (strcmp(argv[i], "-cg") == 0)
- {
- use_cg = 1;
- }
- if (strcmp(argv[i], "-shape") == 0)
- {
- char *argptr;
- shape = strtol(argv[++i], &argptr, 10);
- }
- if (strcmp(argv[i], "-nthick") == 0)
- {
- char *argptr;
- nthick = strtol(argv[++i], &argptr, 10);
- }
- if (strcmp(argv[i], "-ntheta") == 0)
- {
- char *argptr;
- ntheta = strtol(argv[++i], &argptr, 10);
- }
- if (strcmp(argv[i], "-nblocks") == 0)
- {
- char *argptr;
- nblocks = strtol(argv[++i], &argptr, 10);
- }
- if (strcmp(argv[i], "-nbigblocks") == 0)
- {
- char *argptr;
- nbigblocks = strtol(argv[++i], &argptr, 10);
- }
- if (strcmp(argv[i], "-v1") == 0)
- {
- version = 1;
- }
- if (strcmp(argv[i], "-v2") == 0)
- {
- version = 2;
- }
- if (strcmp(argv[i], "-v3") == 0)
- {
- version = 3;
- }
- if (strcmp(argv[i], "-v4") == 0)
- {
- version = 4;
- }
- if (strcmp(argv[i], "-pin") == 0)
- {
- pinned = 1;
- }
- if (strcmp(argv[i], "-check") == 0)
- {
- check = 1;
- }
- if (strcmp(argv[i], "-no-prio") == 0)
- {
- no_prio = 1;
- }
- if (strcmp(argv[i], "-size") == 0)
- {
- char *argptr;
- unsigned size = strtol(argv[++i], &argptr, 10);
- nthick = 130;
- ntheta = (size/128) + 2;
- STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
- }
- if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0)
- {
- printf("usage : %s [-v1|-v2|-v3|-v4] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
- }
- }
- }
- /*
- * The Finite element method code
- *
- * B C
- * **********
- * * 0 * *
- * * * *
- * * * 1 *
- * **********
- * A D
- */
- static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
- unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh)
- {
- float xa,ya,xb,yb,xc,yc;
- float tmp;
- assert(theta_tr + 2 <= ntheta);
- assert(thick_tr + 2 <= nthick);
- /* A */
- xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
- ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
- /* B */
- if (side_tr)
- {
- /* lower D is actually B here */
- xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
- yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
- }
- else
- {
- /* upper */
- xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
- yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
- }
- xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
- yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
- /* now look for the actual psi node */
- if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))
- {
- /* A nothing to do */
- }
- else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))
- {
- /* psi matches C */
- /* swap A and C coordinates */
- tmp = xa; xa = xc; xc = tmp;
- tmp = ya; ya = yc; yc = tmp;
- }
- else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)))
- {
- /* psi is D (that was stored in C) XXX */
- tmp = xa; xa = xb; xb = tmp;
- tmp = ya; ya = yb; yb = tmp;
- }
- else if (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)))
- {
- /* psi is C */
- tmp = xa; xa = xb; xb = tmp;
- tmp = ya; ya = yb; yb = tmp;
- }
- else
- {
- /* the psi node is not a node of the current triangle */
- return 0.0f;
- }
- /* now the triangle should have A as the psi node */
- float denom;
- float value;
- denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
- switch (xy)
- {
- case X:
- value = (yc - yb)/denom;
- break;
- case Y:
- value = -(xc - xb)/denom;
- break;
- default:
- assert(0);
- }
- return value;
- }
- static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
- unsigned theta_psi, unsigned thick_psi, point *pmesh)
- {
- return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh);
- }
- static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr,
- unsigned theta_psi, unsigned thick_psi, point *pmesh)
- {
- return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh);
- }
- static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh)
- {
- float surface;
- float tmp;
- float xi, xj, xk, yi, yj, yk;
- STARPU_ASSERT(theta_tr + 2 <= ntheta);
- STARPU_ASSERT(thick_tr + 2 <= nthick);
- xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x;
- yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
- xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
- yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
- if (side_tr)
- {
- /* lower */
- xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
- yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
- }
- else
- {
- xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
- yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
- }
- tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj);
- surface = 0.5*fabs(tmp);
- return surface;
- }
- static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr,
- unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
- {
- float surface;
- float value;
- float dxi, dxj, dyi, dyj;
- if (theta_tr < 0) return 0.0f;
- if (theta_tr + 2 > (int)ntheta) return 0.0f;
- if (thick_tr < 0) return 0.0f;
- if (thick_tr + 2 > (int)nthick) return 0.0f;
- dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
- dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh);
- dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
- dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh);
- surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh);
- value = (dxi*dxj + dyi*dyj)*surface;
- return value;
- }
- static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh)
- {
- float integral = 0.0f;
- integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
- integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
- integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
- integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
- integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh);
- integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh);
- return integral;
- }
- static float compute_A_value(unsigned i, unsigned j, point *pmesh)
- {
- float value = 0.0f;
- unsigned thick_i, thick_j;
- unsigned theta_i, theta_j;
- /* add all contributions from all connex triangles */
- thick_i = NODE_TO_THICK(i);
- thick_j = NODE_TO_THICK(j);
- theta_i = NODE_TO_THETA(i);
- theta_j = NODE_TO_THETA(j);
- /* Compute the Sum of all the integral over all triangles */
- if ( (abs((int)thick_i - (int)thick_j) <= 1) && (abs((int)theta_i - (int)theta_j) <= 1) )
- {
- if ( (theta_j == theta_i -1) && (thick_j == thick_i +1))
- goto done;
- if ( (theta_j == theta_i + 1) && (thick_j == thick_i - 1))
- goto done;
- /* this may not be a null entry */
- value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh);
- }
- done:
- return value;
- }
- #define TRANSLATE(k) (RefArray[(k)])
- #define TRANSLATEBACK(k) (RefArrayBack[(k)])
- static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
- {
- unsigned i;
- /* solve the actual problem LU X = B */
- /* solve LX' = Y with X' = UX */
- /* solve UX = X' */
- FPRINTF(stderr, "Solving the problem ...\n");
- float *savedB = NULL;
- float *LUB = NULL;
- if (check)
- {
- savedB = malloc(subsize*sizeof(float));
- memcpy(savedB, B, subsize*sizeof(float));
- LUB = malloc(subsize*sizeof(float));
- }
- /* L */
- STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1);
- /* U */
- STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1);
- STARPU_ASSERT(DIM == size);
- if (check)
- {
- /* compute the error on (LUB - savedB) which should be 0 */
- /* LUB = B */
- memcpy(LUB, B, subsize*sizeof(float));
- /* LUB = U * LUB */
- STARPU_STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
- /* LUB = L * LUB */
- STARPU_STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
- /* LUB -= B */
- STARPU_SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
- /* check if LUB is close to the 0 vector */
- int maxind = STARPU_ISAMAX(subsize, LUB, 1);
- FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]);
- float sum = STARPU_SASUM(subsize, LUB, 1);
- FPRINTF(stderr,"avg. error %e\n", sum/subsize);
- free(LUB);
- free(savedB);
- }
- /* now display back the ACTUAL result */
- for (i = 0; i < subsize; i++)
- {
- result[TRANSLATE(i)] = B[i];
- }
- for (i = subsize ; i < size; i++)
- {
- result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
- }
- }
- unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
- {
- unsigned k;
- unsigned index = 0;
- unsigned theta, thick;
- unsigned newsize;
- for (k = 0; k < size; k++)
- {
- RefArray[k] = k;
- RefArrayBack[k] = k;
- }
- /* first inner nodes */
- for (theta = 1; theta < ntheta - 1 ; theta++)
- {
- for (thick = 1; thick < nthick - 1; thick++)
- {
- /* inner nodes are unknown */
- RefArrayBack[NODE_NUMBER(theta, thick)] = index;
- RefArray[index] = NODE_NUMBER(theta, thick);
- index++;
- }
- }
- newsize = index;
- for (theta=0; theta < ntheta; theta++)
- {
- /* Lower boundary "South" */
- RefArrayBack[NODE_NUMBER(theta, 0)] = index;
- RefArray[index++] = NODE_NUMBER(theta, 0);
- /* Upper boundary "North" */
- RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index;
- RefArray[index++] = NODE_NUMBER(theta, nthick-1);
- }
- for (thick = 1; thick < nthick -1; thick++)
- {
- /* "West "*/
- RefArrayBack[NODE_NUMBER(0, thick)] = index;
- RefArray[index++] = NODE_NUMBER(0, thick);
- /* "East" */
- RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index;
- RefArray[index++] = NODE_NUMBER(ntheta-1, thick);
- }
- assert(index == size);
- return newsize;
- }
- void build_mesh(point *mesh)
- {
- unsigned theta, thick;
- /* first build the mesh by determining all points positions */
- for (theta = 0; theta < ntheta; theta++)
- {
- float angle;
- angle = (ntheta - 1 - theta) * Pi/(ntheta-1);
- for (thick = 0; thick < nthick; thick++)
- {
- float r;
- r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
- switch (shape)
- {
- default:
- case 0:
- mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
- mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle);
- break;
- case 1:
- mesh[NODE_NUMBER(theta,thick)].x =
- -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1);
- mesh[NODE_NUMBER(theta,thick)].y =
- RMIN+((RMAX-RMIN)*thick)/(nthick - 1);
- break;
- case 2:
- mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f);
- mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f);
- break;
- }
- }
- }
- }
- static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned node, int *RefArray, int *RefArrayBack)
- {
- /* where is that point in the former space ? */
- int former = TRANSLATE(node);
- int former_thick, former_theta;
- former_thick= (int)NODE_TO_THICK(former);
- former_theta = (int)NODE_TO_THETA(former);
- /* do a list of all the possible neighbours */
- unsigned nneighbours = 0;
- int dtheta, dthick;
- for (dthick = -1; dthick <= 1; dthick++)
- {
- if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick )
- {
- for (dtheta = -1; dtheta <= 1; dtheta++)
- {
- if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta )
- {
- /* we got a possible neighbour */
- unsigned pnode =
- NODE_NUMBER((former_theta + dtheta), (former_thick + dthick));
- neighbours[nneighbours++] = TRANSLATEBACK(pnode);
- }
- }
- }
- }
- unsigned i;
- /* order that list */
- for (i = 0; i < nneighbours; i++)
- {
- /* find the i^th smallest entry for position i */
- unsigned index;
- unsigned min , min_index;
- min = neighbours[i];
- min_index = i;
- for (index = i+1; index < nneighbours; index++)
- {
- STARPU_ASSERT(neighbours[i] != neighbours[index]);
- if (neighbours[index] < min)
- {
- min = neighbours[index];
- min_index = index;
- }
- }
- /* swap values */
- neighbours[min_index] = neighbours[i];
- neighbours[i] = min;
- }
- return nneighbours;
- }
- static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack)
- {
- unsigned i,j;
- /* first give the value of known nodes (at boundaries) */
- for (i = 0; i < size; i++)
- {
- Bformer[i] = 0.0f;
- }
- for (i = 0; i < nthick; i++)
- {
- Bformer[i] = 200.0f;
- Bformer[size-1-i] = 200.0f;
- }
- for (i = 1; i < ntheta-1; i++)
- {
- Bformer[i*nthick] = 200.0f;
- Bformer[(i+1)*nthick-1] = 100.0f;
- }
- /* now the actual stiffness (reordered) matrix*/
- for (j = 0 ; j < newsize ; j++)
- {
- unsigned long neighbour;
- unsigned long nneighbours;
- unsigned long neighbours[9];
- nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
- B[j] = Bformer[TRANSLATE(j)];
- for (neighbour = 0; neighbour < nneighbours; neighbour++)
- {
- unsigned n = neighbours[neighbour];
- if (n >= newsize)
- {
- B[j] -= compute_A_value(TRANSLATE(n), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(n)];
- }
- }
- }
- }
- static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind,
- uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack)
- {
- unsigned j;
- unsigned pos = 0;
- *nzval = NULL;
- *colind = NULL;
- /* now the actual stiffness (reordered) matrix*/
- for (j = 0 ; j < newsize ; j++)
- {
- rowptr[j] = pos;
- unsigned long neighbour;
- unsigned long nneighbours;
- unsigned long neighbours[9];
- nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
- for (neighbour = 0; neighbour < nneighbours; neighbour++)
- {
- unsigned nodeneighbour = neighbours[neighbour];
- if (nodeneighbour < newsize)
- {
- float val;
- val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
- if (val != 0.0f)
- {
- *nzval = realloc(*nzval, (pos+1)*sizeof(float));
- *colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
- (*nzval)[pos] = val;
- (*colind)[pos] = nodeneighbour;
- pos++;
- }
- }
- }
- }
- rowptr[newsize] = pos;
- return pos;
- }
- static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack)
- {
- unsigned long j;
- /* touch all the memory */
- memset(A, 0, newsize*newsize*sizeof(float));
- /* now the actual stiffness (reordered) matrix*/
- for (j = 0 ; j < newsize ; j++)
- {
- unsigned long neighbour;
- unsigned long nneighbours;
- unsigned long neighbours[9];
- nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack);
- for (neighbour = 0; neighbour < nneighbours; neighbour++)
- {
- unsigned long nodeneighbour = neighbours[neighbour];
- if (nodeneighbour < newsize)
- {
- float val;
- val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
- A[j+ (unsigned long)newsize*nodeneighbour] = val;
- }
- }
- }
- }
- int main(int argc, char **argv)
- {
- float *A;
- float *B;
- unsigned newsize;
- float *result;
- int *RefArray, *RefArrayBack;
- point *pmesh;
- float *Bformer;
- parse_args(argc, argv);
- pmesh = malloc(DIM*sizeof(point));
- RefArray = malloc(DIM*sizeof(int));
- RefArrayBack = malloc(DIM*sizeof(int));
- Bformer = malloc(DIM*sizeof(float));
- result = calloc(DIM, sizeof(float));
- build_mesh(pmesh);
- /* now simplify that problem given the boundary conditions
- * to do so, we remove the already known variables from the system
- * by pivoting the various know variable, RefArray keep track of that
- * pivoting */
- newsize = compute_pivot_array(RefArray, RefArrayBack, DIM);
- /* we can either use a direct method (LU decomposition here) or an
- * iterative method (conjugate gradient here) */
- if (use_cg)
- {
- unsigned nnz;
- float *nzval;
- uint32_t *colind;
- uint32_t *rowptr;
- rowptr = malloc((newsize+1)*sizeof(uint32_t));
- B = malloc(newsize*sizeof(float));
- build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
- nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack);
- do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr);
- /* XXX */
- memcpy(B, result, newsize*sizeof(float));
- /* now display back the ACTUAL result */
- unsigned i;
- for (i = 0; i < newsize; i++)
- {
- result[TRANSLATE(i)] = B[i];
- }
- for (i = newsize ; i < DIM; i++)
- {
- result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
- }
- free(nzval);
- free(colind);
- free(rowptr);
- free(B);
- }
- else
- {
- /* unfortunately CUDA does not allow late memory registration,
- * we need to do the malloc using CUDA itself ... */
- initialize_system(&A, &B, newsize, pinned);
- /* then build the stiffness matrix A */
- build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack);
- build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack);
- FPRINTF(stderr, "Problem size : %ux%u (%ux%u) (%lu MB)\n", newsize, newsize, DIM, DIM, ((unsigned long)newsize*newsize*4UL)/(1024*1024));
- STARPU_ASSERT(newsize % nblocks == 0);
- switch (version)
- {
- case 1:
- case 2:
- dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);
- break;
- case 3:
- dw_factoLU_tag(A, newsize, newsize, nblocks, no_prio);
- break;
- case 4:
- dw_factoLU_grain(A, newsize, newsize, nblocks, nbigblocks);
- break;
- default:
- STARPU_ABORT();
- }
- display_stat_heat();
- if (check)
- solve_system(DIM, newsize, result, RefArray, Bformer, A, B);
- starpu_cublas_shutdown();
- starpu_shutdown();
- free_system(A, B, newsize, pinned);
- }
- #ifdef STARPU_OPENGL_RENDER
- const char *display = getenv("DISPLAY");
- if (display && display[0])
- opengl_render(ntheta, nthick, result, pmesh, argc, argv);
- #endif
- free(pmesh);
- free(RefArray);
- free(RefArrayBack);
- free(Bformer);
- free(result);
- return 0;
- }
|