Переглянути джерело

examples: report error on cuda call failure and fix invalid printf argument type

Nathalie Furmento 8 роки тому
батько
коміт
77bdab7d9d

+ 24 - 21
examples/cholesky/cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2012, 2014-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,7 +35,7 @@
 #endif
 
 /*
- *   U22 
+ *   U22
  */
 
 #if defined(STARPU_USE_CUDA)
@@ -65,7 +65,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 		if (worker_size == 1)
 		{
 			/* Sequential CPU kernel */
-			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
+			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
 				right, ld12, 1.0f, center, ld22);
 		}
 		else
@@ -75,11 +75,11 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
 			unsigned block_size = (dx + worker_size - 1)/worker_size;
 			unsigned new_dx = STARPU_MIN(dx, block_size*(rank+1)) - block_size*rank;
-			
+
 			float *new_left = &left[block_size*rank];
 			float *new_center = &center[block_size*rank];
 
-			STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, 
+			STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21,
 				right, ld12, 1.0f, new_center, ld22);
 		}
 	}
@@ -88,8 +88,8 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 		/* CUDA kernel */
 #ifdef STARPU_USE_CUDA
 		cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(),
-				CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, 
-				&m1, left, ld21, right, ld12, 
+				CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz,
+				&m1, left, ld21, right, ld12,
 				&p1, center, ld22);
 		if (status != CUBLAS_STATUS_SUCCESS)
 			STARPU_CUBLAS_REPORT_ERROR(status);
@@ -110,7 +110,7 @@ void chol_cublas_codelet_update_u22(void *descr[], void *_args)
 }
 #endif /* STARPU_USE_CUDA */
 
-/* 
+/*
  * U21
  */
 
@@ -163,18 +163,18 @@ void chol_cublas_codelet_update_u21(void *descr[], void *_args)
 {
 	chol_common_codelet_update_u21(descr, 1, _args);
 }
-#endif 
+#endif
 
 /*
  *	U11
  */
 
-static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) 
+static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args)
 {
 /*	printf("11\n"); */
 	float *sub11;
 
-	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); 
+	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
 
 	unsigned nx = STARPU_MATRIX_GET_NY(descr[0]);
 	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
@@ -201,10 +201,10 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 				sub11[z+z*ld] = lambda11;
 
 				STARPU_ASSERT(lambda11 != 0.0f);
-		
+
 				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
-		
-				STARPU_SSYR("L", nx - z - 1, -1.0f, 
+
+				STARPU_SSYR("L", nx - z - 1, -1.0f,
 							&sub11[(z+1)+z*ld], 1,
 							&sub11[(z+1)+(z+1)*ld], ld);
 			}
@@ -247,12 +247,11 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 
 			for (z = 0; z < nx; z++)
 			{
-				
 				cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream);
 				cudaStreamSynchronize(stream);
 
 				STARPU_ASSERT(*lambda11 != 0.0f);
-				
+
 				*lambda11 = sqrt(*lambda11);
 
 /*				cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */
@@ -260,13 +259,17 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 				float scal = 1.0f/(*lambda11);
 
 				status = cublasSscal(handle,
-						nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1);
+						     nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 
 				status = cublasSsyr(handle,
-							CUBLAS_FILL_MODE_UPPER,
-							nx - z - 1, &m1,
-							&sub11[(z+1)+z*ld], 1,
-							&sub11[(z+1)+(z+1)*ld], ld);
+						    CUBLAS_FILL_MODE_UPPER,
+						    nx - z - 1, &m1,
+						    &sub11[(z+1)+z*ld], 1,
+						    &sub11[(z+1)+(z+1)*ld], ld);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 			}
 
 			cudaStreamSynchronize(stream);

+ 10 - 6
examples/heat/dw_factolu_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -366,13 +366,17 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, STARPU_ATT
 				float scal = 1.0f/pivot;
 
 				status = cublasSscal(starpu_cublas_get_local_handle(),
-						nx - z - 1, &scal, &sub11[z+(z+1)*ld], ld);
+						     nx - z - 1, &scal, &sub11[z+(z+1)*ld], ld);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 
 				status = cublasSger(starpu_cublas_get_local_handle(),
-						nx - z - 1, nx - z - 1, &m1,
-								&sub11[z+(z+1)*ld], ld,
-								&sub11[(z+1)+z*ld], 1,
-								&sub11[(z+1) + (z+1)*ld],ld);
+						    nx - z - 1, nx - z - 1, &m1,
+						    &sub11[z+(z+1)*ld], ld,
+						    &sub11[(z+1)+z*ld], 1,
+						    &sub11[(z+1) + (z+1)*ld],ld);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 			}
 
 			cudaStreamSynchronize(stream);

+ 19 - 18
examples/heat/dw_sparse_cg_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
+ * Copyright (C) 2010, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,7 +23,7 @@
 
 /*
  *	Algorithm :
- *		
+ *
  *		i = 0
  *		r = b - A x
  *			( d = A x ; r = r - d )
@@ -31,14 +31,14 @@
  *		delta_new = trans(r) r
  *		delta_0 = delta_new
  *
- * 		while (i < i_max && delta_new > eps^2 delta_0) 
+ * 		while (i < i_max && delta_new > eps^2 delta_0)
  * 		{
  *			q = A d
  *			alpha = delta_new / ( trans(d) q )
  *			x = x + alpha d
  *			if ( i is divisible by 50 )
  *				r = b - A x
- *			else 
+ *			else
  *				r = r - alpha q
  *			delta_old = delta_new
  *			delta_new = trans(r) r
@@ -125,7 +125,7 @@ void cpu_codelet_func_3(void *descr[], void *arg)
 	float dot;
 	float *vec;
 	int size;
-	
+
 	/* get the vector */
 	vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	size = (int)STARPU_VECTOR_GET_NX(descr[0]);
@@ -145,7 +145,7 @@ void cublas_codelet_func_3(void *descr[], void *arg)
 	float dot;
 	float *vec;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	size = STARPU_VECTOR_GET_NX(descr[0]);
@@ -204,7 +204,7 @@ void cpu_codelet_func_4(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg)
 
 }
 
-/* 
+/*
  *	compute alpha = delta_new / ( trans(d) q )
  *
  * 		descr[0] = d, descr[1] = q
@@ -217,7 +217,7 @@ void cpu_codelet_func_5(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecd, *vecq;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -237,7 +237,7 @@ void cublas_codelet_func_5(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecd, *vecq;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -268,7 +268,7 @@ void cpu_codelet_func_6(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecx, *vecd;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -284,7 +284,7 @@ void cublas_codelet_func_6(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecx, *vecd;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -309,7 +309,7 @@ void cpu_codelet_func_7(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecr, *vecq;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -325,7 +325,7 @@ void cublas_codelet_func_7(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecr, *vecq;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -355,7 +355,7 @@ void cpu_codelet_func_8(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecr;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	size = STARPU_VECTOR_GET_NX(descr[0]);
@@ -374,12 +374,13 @@ void cublas_codelet_func_8(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecr;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	size = STARPU_VECTOR_GET_NX(descr[0]);
 
-	cublasStatus_t status = cublasSdot (starpu_cublas_get_local_handle(), size, vecr, 1, vecr, 1, &dot);
+	cublasStatus_t status = cublasSdot(starpu_cublas_get_local_handle(), size, vecr, 1, vecr, 1, &dot);
+	if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status);
 	cudaStreamSynchronize(starpu_cuda_get_local_stream());
 
 	pb->delta_old = pb->delta_new;
@@ -401,7 +402,7 @@ void cpu_codelet_func_9(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecd, *vecr;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -421,7 +422,7 @@ void cublas_codelet_func_9(void *descr[], void *arg)
 	struct cg_problem *pb = arg;
 	float *vecd, *vecr;
 	uint32_t size;
-	
+
 	/* get the vector */
 	vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]);