9 years ago · 9109d34b21
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -188,7 +188,6 @@ static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 static void execute_cholesky(unsigned size, unsigned nblocks)
			
 
				 {
			
 
				-	int ret;
			
 
				 	float *mat = NULL;
			
 
				 	unsigned i,j;
			
 
				 
			
@@ -225,7 +224,7 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				-	ret = cholesky(mat, size, size, nblocks);
			
 
				+	cholesky(mat, size, size, nblocks);
			
 
				 
			
 
				 #ifdef PRINT_OUTPUT
			
 
				 	FPRINTF(stdout, "Results :\n");
			
@@ -326,8 +325,7 @@ int main(int argc, char **argv)
 
				 	ret = starpu_init(NULL);
			
 
				 	starpu_fxt_stop_profiling();
			
 
				 
			
 
				-	if (ret == -ENODEV)
			
 
				-                return 77;
			
 
				+	if (ret == -ENODEV) return 77;
			
 
				         STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -359,5 +357,5 @@ int main(int argc, char **argv)
 
				 	starpu_cublas_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2012, 2015  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2016  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -131,7 +131,7 @@ static void parse_args(int argc, char **argv)
 
				 
			
 
				 
			
 
				 /*
			
 
				- * The Finite element method code 
			
 
				+ * The Finite element method code
			
 
				  *
			
 
				  *   B              C
			
 
				  *	**********
			
@@ -365,38 +365,38 @@ static void solve_system(unsigned size, unsigned subsize, float *result, int *Re
 
				 		LUB = malloc(subsize*sizeof(float));
			
 
				 	}
			
 
				 
			
 
				-		/* L */
			
 
				-		STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1);
			
 
				-	
			
 
				-		/* U */
			
 
				-	        STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1);
			
 
				-	
			
 
				-		STARPU_ASSERT(DIM == size);
			
 
				-	
			
 
				+	/* L */
			
 
				+	STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1);
			
 
				+
			
 
				+	/* U */
			
 
				+	STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1);
			
 
				+
			
 
				+	STARPU_ASSERT(DIM == size);
			
 
				+
			
 
				 	if (check)
			
 
				 	{
			
 
				 		/* compute the error on (LUB - savedB) which should be 0 */
			
 
				-	
			
 
				+
			
 
				 		/* LUB = B */
			
 
				 		memcpy(LUB, B, subsize*sizeof(float));
			
 
				-	
			
 
				-	
			
 
				+
			
 
				+
			
 
				 		/* LUB = U * LUB */
			
 
				 		STARPU_STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
			
 
				-		
			
 
				+
			
 
				 		/* LUB = L * LUB */
			
 
				 		STARPU_STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
			
 
				-	
			
 
				+
			
 
				 		/* LUB -= B */
			
 
				 		STARPU_SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
			
 
				-	
			
 
				+
			
 
				 		/* check if LUB is close to the 0 vector */
			
 
				 		int maxind = STARPU_ISAMAX(subsize, LUB, 1);
			
 
				 		FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]);
			
 
				 
			
 
				 		float sum = STARPU_SASUM(subsize, LUB, 1);
			
 
				 		FPRINTF(stderr,"avg. error %e\n", sum/subsize);
			
 
				-	
			
 
				+
			
 
				 		free(LUB);
			
 
				 		free(savedB);
			
 
				 	}
			
@@ -430,7 +430,7 @@ unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
 
				 	/* first inner nodes */
			
 
				 	for (theta = 1; theta < ntheta - 1 ; theta++)
			
 
				 	{
			
 
				-		for (thick = 1; thick < nthick - 1; thick++) 
			
 
				+		for (thick = 1; thick < nthick - 1; thick++)
			
 
				 		{
			
 
				 			/* inner nodes are unknown */
			
 
				 			RefArrayBack[NODE_NUMBER(theta, thick)] = index;
			
@@ -447,7 +447,7 @@ unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size)
 
				 		/* Lower boundary "South" */
			
 
				 		RefArrayBack[NODE_NUMBER(theta, 0)] = index;
			
 
				 		RefArray[index++] = NODE_NUMBER(theta, 0);
			
 
				-		
			
 
				+
			
 
				 		/* Upper boundary "North" */
			
 
				 		RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index;
			
 
				 		RefArray[index++] = NODE_NUMBER(theta, nthick-1);
			
@@ -494,7 +494,7 @@ void build_mesh(point *mesh)
 
				 				case 1:
			
 
				 					mesh[NODE_NUMBER(theta,thick)].x =
			
 
				 							-100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1);
			
 
				-					mesh[NODE_NUMBER(theta,thick)].y = 
			
 
				+					mesh[NODE_NUMBER(theta,thick)].y =
			
 
				 							RMIN+((RMAX-RMIN)*thick)/(nthick - 1);
			
 
				 					break;
			
 
				 				case 2:
			
@@ -527,7 +527,7 @@ static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned n
 
				 				if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta )
			
 
				 				{
			
 
				 					/* we got a possible neighbour */
			
 
				-					unsigned pnode = 
			
 
				+					unsigned pnode =
			
 
				 						NODE_NUMBER((former_theta + dtheta), (former_thick + dthick));
			
 
				 
			
 
				 					neighbours[nneighbours++] = TRANSLATEBACK(pnode);
			
@@ -602,7 +602,7 @@ static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bform
 
				 
			
 
				 		for (neighbour = 0; neighbour < nneighbours; neighbour++)
			
 
				 		{
			
 
				-			unsigned n = neighbours[neighbour]; 
			
 
				+			unsigned n = neighbours[neighbour];
			
 
				 			if (n >= newsize)
			
 
				 			{
			
 
				 				B[j] -= compute_A_value(TRANSLATE(n), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(n)];
			
@@ -611,7 +611,7 @@ static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bform
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind, 
			
 
				+static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind,
			
 
				 						uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack)
			
 
				 {
			
 
				 	unsigned j;
			
@@ -641,12 +641,12 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 
				 			{
			
 
				 
			
 
				 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
			
 
				-	
			
 
				+
			
 
				 				if (val != 0.0f)
			
 
				 				{
			
 
				 					*nzval = realloc(*nzval, (pos+1)*sizeof(float));
			
 
				 					*colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
			
 
				-	
			
 
				+
			
 
				 					(*nzval)[pos] = val;
			
 
				 					(*colind)[pos] = nodeneighbour;
			
 
				 
			
@@ -714,13 +714,13 @@ int main(int argc, char **argv)
 
				 
			
 
				 	build_mesh(pmesh);
			
 
				 
			
 
				-	/* now simplify that problem given the boundary conditions 
			
 
				+	/* now simplify that problem given the boundary conditions
			
 
				 	 * to do so, we remove the already known variables from the system
			
 
				 	 * by pivoting the various know variable, RefArray keep track of that
			
 
				-	 * pivoting */ 
			
 
				+	 * pivoting */
			
 
				 	newsize = compute_pivot_array(RefArray, RefArrayBack, DIM);
			
 
				 
			
 
				-	/* we can either use a direct method (LU decomposition here) or an 
			
 
				+	/* we can either use a direct method (LU decomposition here) or an
			
 
				 	 * iterative method (conjugate gradient here) */
			
 
				 	if (use_cg)
			
 
				 	{
			
@@ -748,17 +748,17 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			result[TRANSLATE(i)] = B[i];
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		for (i = newsize ; i < DIM; i++)
			
 
				 		{
			
 
				 			result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 
			
 
				-		/* unfortunately CUDA does not allow late memory registration, 
			
 
				+		/* unfortunately CUDA does not allow late memory registration,
			
 
				 		 * we need to do the malloc using CUDA itself ... */
			
 
				 		initialize_system(&A, &B, newsize, pinned);
			
 
				 
			
--- a/examples/worker_collections/worker_tree_example.c
+++ b/examples/worker_collections/worker_tree_example.c
@@ -71,7 +71,7 @@ int main()
 
				 	for(i = 0; i < ncpus; i++)
			
 
				 	{
			
 
				 		int added = co->add(co, procs[i]);
			
 
				-//		FPRINTF(stderr, "added proc %d to the tree \n", added);
			
 
				+		FPRINTF(stderr, "added proc %d to the tree \n", added);
			
 
				 	}
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
@@ -81,7 +81,7 @@ int main()
 
				 	while(co->has_next(co, &it))
			
 
				 	{
			
 
				 		pu = co->get_next(co, &it);
			
 
				-//		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				+		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	unsigned six = 6;
			
@@ -90,13 +90,13 @@ int main()
 
				 	for(i = 0; i < six; i++)
			
 
				 	{
			
 
				 		co->remove(co, i);
			
 
				-//		FPRINTF(stderr, "remove %d out of %d workers\n", i, co->nworkers);
			
 
				+		FPRINTF(stderr, "remove %d out of %d workers\n", i, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	while(co->has_next(co, &it))
			
 
				 	{
			
 
				 		pu = co->get_next(co, &it);
			
 
				-//		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				+		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	FPRINTF(stderr, "timing init = %lf \n", timing);
			
--- a/socl/examples/matmul/matmul.c
+++ b/socl/examples/matmul/matmul.c
@@ -303,16 +303,19 @@ int main(int argc, const char** argv) {
 
				 	A_data = (TYPE*)malloc(A_mem_size);

			
 
				 	if (A_data == NULL) {

			
 
				 		perror("malloc");

			
 
				+		exit(-1);

			
 
				 	}

			
 
				 

			
 
				 	B_data = (TYPE*)malloc(B_mem_size);

			
 
				 	if (B_data == NULL) {

			
 
				 		perror("malloc");

			
 
				+		exit(-1);

			
 
				 	}

			
 
				 

			
 
				 	C_data = (TYPE*) malloc(C_mem_size);

			
 
				 	if (C_data == NULL) {

			
 
				 		perror("malloc");

			
 
				+		exit(-1);

			
 
				 	}

			
 
				 

			
 
				 	cl_program program[platform_count];

			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -310,6 +310,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
				 	int unknown = 0;
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				 
			
 
				+	memset(skip_worker, 0, nworkers_ctx*STARPU_MAXIMPLEMENTATIONS*sizeof(int));
			
 
				+
			
 
				 	workers->init_iterator(workers, &it);
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
--- a/tests/datawizard/gpu_register.c
+++ b/tests/datawizard/gpu_register.c
@@ -138,7 +138,11 @@ test_cuda(void)
 
				 
			
 
				 	ret = submit_tasks(handle, pieces, n);
			
 
				 	if (ret == -ENODEV)
			
 
				+	{
			
 
				+		starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu));
			
 
				+		free(foo);
			
 
				 		return -ENODEV;
			
 
				+	}
			
 
				 
			
 
				 	starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
			
 
				 	starpu_data_unregister(handle);
			
@@ -146,7 +150,11 @@ test_cuda(void)
 
				 	starpu_cuda_set_device(devid);
			
 
				 	cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				+	{
			
 
				+		starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu));
			
 
				+		free(foo);
			
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				+	}
			
 
				 
			
 
				 	ret = check_result(foo, size);
			
 
				 	starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu));
			
--- a/tests/errorcheck/workers_cpuid.c
+++ b/tests/errorcheck/workers_cpuid.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2012, 2015-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -82,7 +82,7 @@ static char *array_to_str(long *array, int n)
 
				 		nchar = sprintf(ptr, "%ld ", array[i]);
			
 
				 		ptr += nchar;
			
 
				 	}
			
 
				-		
			
 
				+
			
 
				 	return str;
			
 
				 }
			
 
				 
			
@@ -102,7 +102,7 @@ static int test_combination(long *combination, unsigned n)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	device_workers = 0;
			
 
				-	
			
 
				+
			
 
				 	/* Check for all devices */
			
 
				 	for (type=STARPU_CUDA_WORKER; type<STARPU_NARCH; type++)
			
 
				 	{
			
@@ -120,7 +120,7 @@ static int test_combination(long *combination, unsigned n)
 
				 		if (!check_workers_mapping(workers_cpuid + device_workers, workers_id, nb_workers))
			
 
				 			return -1;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	starpu_shutdown();
			
 
				 	return 1;
			
 
				 }
			
@@ -132,7 +132,7 @@ static long * generate_arrangement(int arr_size, long *set, int set_size)
 
				 	long tmp;
			
 
				 
			
 
				 	STARPU_ASSERT(arr_size <= set_size);
			
 
				-	
			
 
				+
			
 
				 	srandom(time(0));
			
 
				 
			
 
				 	for (i=0; i<arr_size; i++)
			
@@ -145,7 +145,7 @@ static long * generate_arrangement(int arr_size, long *set, int set_size)
 
				 		set[i] = set[i+j];
			
 
				 		set[i+j] = tmp;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return set;
			
 
				 }
			
 
				 
			
@@ -177,10 +177,10 @@ int main(int argc, char **argv)
 
				 
			
 
				 	for (i=0; i<STARPU_NMAXWORKERS; i++)
			
 
				 		workers_id[i] = -1;
			
 
				-		
			
 
				+
			
 
				 	cpuids = malloc(nhwpus * sizeof(long));
			
 
				 
			
 
				-	/* Evaluate several random values of STARPU_WORKERS_CPUID 
			
 
				+	/* Evaluate several random values of STARPU_WORKERS_CPUID
			
 
				 	 * and check mapping for each one
			
 
				 	 */
			
 
				 	for (i=1; i<=nhwpus; i++)
			
@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-			
			
 
				+
			
 
				 	free(cpuids);
			
 
				 
			
 
				 	hwloc_topology_destroy(topology);
			
@@ -206,6 +206,3 @@ int main(int argc, char **argv)
 
				 	return EXIT_SUCCESS;
			
 
				 }
			
 
				 #endif
			
 
				-
			
 
				-
			
 
				-
			
--- a/tests/sched_ctx/sched_ctx_list.c
+++ b/tests/sched_ctx/sched_ctx_list.c
@@ -48,7 +48,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	/* Check addition to existing list */
			
 
				 	ret = 1;
			
 
				-	elt = _starpu_sched_ctx_elt_add(ctx_list->next, 3);
			
 
				+	_starpu_sched_ctx_elt_add(ctx_list->next, 3);
			
 
				 	ret &= (ctx_list->next->head->next->sched_ctx == 3);
			
 
				 	ret &= (ctx_list->next->head->prev->sched_ctx == 3);
			
 
				 	global &= ret;