| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424 | 
							- /*
 
-  * StarPU
 
-  * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
 
-  *
 
-  * This program is free software; you can redistribute it and/or modify
 
-  * it under the terms of the GNU Lesser General Public License as published by
 
-  * the Free Software Foundation; either version 2.1 of the License, or (at
 
-  * your option) any later version.
 
-  *
 
-  * This program is distributed in the hope that it will be useful, but
 
-  * WITHOUT ANY WARRANTY; without even the implied warranty of
 
-  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
-  *
 
-  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-  */
 
- #include "dw_sparse_cg.h"
 
- /*
 
-  *	Algorithm :
 
-  *		
 
-  *		i = 0
 
-  *		r = b - A x
 
-  *			( d = A x ; r = r - d )
 
-  *		d = r
 
-  *		delta_new = trans(r) r
 
-  *		delta_0 = delta_new
 
-  *
 
-  * 		while (i < i_max && delta_new > eps^2 delta_0) 
 
-  * 		{
 
-  *			q = A d
 
-  *			alpha = delta_new / ( trans(d) q )
 
-  *			x = x + alpha d
 
-  *			if ( i is divisible by 50 )
 
-  *				r = b - A x
 
-  *			else 
 
-  *				r = r - alpha q
 
-  *			delta_old = delta_new
 
-  *			delta_new = trans(r) r
 
-  *			beta = delta_new / delta_old
 
-  *			d = r + beta d
 
-  *			i = i + 1
 
-  * 		}
 
-  */
 
- /*
 
-  *	compute r = b - A x
 
-  *
 
-  *		descr[0] = A, descr[1] = x, descr [2] = r, descr[3] = b
 
-  */
 
- void core_codelet_func_1(void *descr[], __attribute__((unused)) void *arg)
 
- {
 
- 	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
 
- 	uint32_t *colind = GET_CSR_COLIND(descr[0]);
 
- 	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
 
- 	uint32_t firstentry = GET_CSR_ELEMSIZE(descr[0]);
 
- 	float *vecx = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	float *vecr = (float *)GET_VECTOR_PTR(descr[2]);
 
- 	float *vecb = (float *)GET_VECTOR_PTR(descr[3]);
 
- 	uint32_t nnz;
 
- 	uint32_t nrow;
 
- 	nnz = GET_CSR_NNZ(descr[0]);
 
- 	nrow = GET_CSR_NROW(descr[0]);
 
- 	unsigned row;
 
- 	for (row = 0; row < nrow; row++)
 
- 	{
 
- 		float tmp = 0.0f;
 
- 		unsigned index;
 
- 		unsigned firstindex = rowptr[row] - firstentry;
 
- 		unsigned lastindex = rowptr[row+1] - firstentry;
 
- 		for (index = firstindex; index < lastindex; index++)
 
- 		{
 
- 			unsigned col;
 
- 			col = colind[index];
 
- 			tmp += nzval[index]*vecx[col];
 
- 		}
 
- 		vecr[row] = vecb[row] - tmp;
 
- 	}
 
- }
 
- /*
 
-  *	compute d = r
 
-  *		descr[0] = d, descr[1] = r
 
-  */
 
- void core_codelet_func_2(void *descr[], __attribute__((unused)) void *arg)
 
- {
 
- 	/* simply copy r into d */
 
- 	uint32_t nx = GET_VECTOR_NX(descr[0]);
 
- 	size_t elemsize = GET_VECTOR_ELEMSIZE(descr[0]);
 
- 	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
 
- 	STARPU_ASSERT(GET_VECTOR_ELEMSIZE(descr[0]) == GET_VECTOR_ELEMSIZE(descr[1]));
 
- 	float *src = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	float *dst = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	memcpy(dst, src, nx*elemsize);
 
- }
 
- /*
 
-  *	compute delta_new = trans(r) r
 
-  *		delta_0   = delta_new
 
-  *
 
-  *		args = &delta_new, &delta_0
 
-  */
 
- void core_codelet_func_3(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float dot;
 
- 	float *vec;
 
- 	int size;
 
- 	
 
- 	/* get the vector */
 
- 	vec = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	size = (int)GET_VECTOR_NX(descr[0]);
 
- 	dot = SDOT(size, vec, 1, vec, 1);
 
- 	fprintf(stderr, "func 3 : DOT = %f\n", dot);
 
- 	pb->delta_new = dot;
 
- 	pb->delta_0 = dot;
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_3(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float dot;
 
- 	float *vec;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vec = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	dot = cublasSdot (size, vec, 1, vec, 1);
 
- 	pb->delta_new = dot;
 
- 	pb->delta_0 = dot;
 
- }
 
- #endif
 
- /*
 
-  *	compute q with : q = A d
 
-  *
 
-  *		descr[0] = A, descr[1] = d, descr [2] = q
 
-  */
 
- void core_codelet_func_4(void *descr[], __attribute__((unused)) void *arg)
 
- {
 
- 	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
 
- 	uint32_t *colind = GET_CSR_COLIND(descr[0]);
 
- 	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
 
- 	uint32_t firstentry = GET_CSR_FIRSTENTRY(descr[0]);
 
- 	float *vecd = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	float *vecq = (float *)GET_VECTOR_PTR(descr[2]);
 
- 	uint32_t nnz;
 
- 	uint32_t nrow;
 
- 	nnz = GET_CSR_NNZ(descr[0]);
 
- 	nrow = GET_CSR_NROW(descr[0]);
 
- 	unsigned row;
 
- 	for (row = 0; row < nrow; row++)
 
- 	{
 
- 		float tmp = 0.0f;
 
- 		unsigned index;
 
- 		unsigned firstindex = rowptr[row] - firstentry;
 
- 		unsigned lastindex = rowptr[row+1] - firstentry;
 
- 		for (index = firstindex; index < lastindex; index++)
 
- 		{
 
- 			unsigned col;
 
- 			col = colind[index];
 
- 			tmp += nzval[index]*vecd[col];
 
- 		}
 
- 		vecq[row] = tmp;
 
- 	}
 
- }
 
- /* 
 
-  *	compute alpha = delta_new / ( trans(d) q )
 
-  *
 
-  * 		descr[0] = d, descr[1] = q
 
-  *		args = &alpha, &delta_new
 
-  */
 
- void core_codelet_func_5(void *descr[], void *arg)
 
- {
 
- 	float dot;
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecd, *vecq;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	dot = SDOT(size, vecd, 1, vecq, 1);
 
- 	pb->alpha = pb->delta_new / dot;
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_5(void *descr[], void *arg)
 
- {
 
- 	float dot;
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecd, *vecq;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	dot = cublasSdot (size, vecd, 1, vecq, 1);
 
- 	pb->alpha = pb->delta_new / dot;
 
- }
 
- #endif
 
- /*
 
-  *	compute x = x + alpha d
 
-  *
 
-  * 		descr[0] : x, descr[1] : d
 
-  *		args = &alpha
 
-  */
 
- void core_codelet_func_6(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecx, *vecd;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecx = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	SAXPY(size, pb->alpha, vecd, 1, vecx, 1);
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_6(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecx, *vecd;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecx = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	cublasSaxpy (size, pb->alpha, vecd, 1, vecx, 1);
 
- }
 
- #endif
 
- /*
 
-  *	compute r = r - alpha q
 
-  *
 
-  * 		descr[0] : r, descr[1] : q
 
-  *		args = &alpha
 
-  */
 
- void core_codelet_func_7(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecr, *vecq;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	SAXPY(size, -pb->alpha, vecq, 1, vecr, 1);
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_7(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecr, *vecq;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	cublasSaxpy (size, -pb->alpha, vecq, 1, vecr, 1);
 
- }
 
- #endif
 
- /*
 
-  *	compute delta_old = delta_new
 
-  *		delta_new = trans(r) r
 
-  *		beta = delta_new / delta_old
 
-  *
 
-  * 		descr[0] = r
 
-  *		args = &delta_old, &delta_new, &beta
 
-  */
 
- void core_codelet_func_8(void *descr[], void *arg)
 
- {
 
- 	float dot;
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecr;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	dot = SDOT(size, vecr, 1, vecr, 1);
 
- 	pb->delta_old = pb->delta_new;
 
- 	pb->delta_new = dot;
 
- 	pb->beta = pb->delta_new/pb->delta_old;
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_8(void *descr[], void *arg)
 
- {
 
- 	float dot;
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecr;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	dot = cublasSdot (size, vecr, 1, vecr, 1);
 
- 	pb->delta_old = pb->delta_new;
 
- 	pb->delta_new = dot;
 
- 	pb->beta = pb->delta_new/pb->delta_old;
 
- }
 
- #endif
 
- /*
 
-  *	compute d = r + beta d
 
-  *
 
-  * 		descr[0] : d, descr[1] : r
 
-  *		args = &beta
 
-  *
 
-  */
 
- void core_codelet_func_9(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecd, *vecr;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	/* d = beta d */
 
- 	SSCAL(size, pb->beta, vecd, 1);
 
- 	/* d = r + d */
 
- 	SAXPY (size, 1.0f, vecr, 1, vecd, 1);
 
- }
 
- #ifdef USE_CUDA
 
- void cublas_codelet_func_9(void *descr[], void *arg)
 
- {
 
- 	struct cg_problem *pb = arg;
 
- 	float *vecd, *vecr;
 
- 	uint32_t size;
 
- 	
 
- 	/* get the vector */
 
- 	vecd = (float *)GET_VECTOR_PTR(descr[0]);
 
- 	vecr = (float *)GET_VECTOR_PTR(descr[1]);
 
- 	size = GET_VECTOR_NX(descr[0]);
 
- 	/* d = beta d */
 
- 	cublasSscal(size, pb->beta, vecd, 1);
 
- 	/* d = r + d */
 
- 	cublasSaxpy (size, 1.0f, vecr, 1, vecd, 1);
 
- }
 
- #endif
 
 
  |