Parcourir la source

Merge remote-tracking branch 'gitlab/master' into ft_checkpoint

# Conflicts:
#	mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
Romain LION il y a 4 ans
Parent
commit
9394e6c713

+ 14 - 1
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -352,7 +352,20 @@ use <c>export STARPU_CALIBRATE=1</c> (\ref STARPU_CALIBRATE). This may be necess
 has not-so-stable performance. StarPU will force calibration (and thus ignore
 the current result) until 10 (<c>_STARPU_CALIBRATION_MINIMUM</c>) measurements have been
 made on each architecture, to avoid bad scheduling decisions just because the
-first measurements were not so good. Details on the current performance model status
+first measurements were not so good.
+
+Note that StarPU will not record the very first measurement for a given codelet
+and a given size, because it would most often be hit by computation library
+loading or initialization. StarPU will also throw measurements away if it
+notices that after computing an average execution time, it notices that most
+subsequent tasks have an execution time largely outside the computed average
+("Too big deviation for model..." warning messages). By looking at the details
+of the message and their reported measurements, it can highlight that your
+computation library really has non-stable measurements, which is probably an
+indication of an issue in the computation library, or the execution environment
+(e.g. rogue daemons).
+
+Details on the current performance model status
 can be obtained with the tool <c>starpu_perfmodel_display</c>: the
 option <c>-l</c> lists the available performance models, and the
 option <c>-s</c> allows to choose the performance model to be

+ 17 - 20
examples/cholesky/cholesky_implicit.c

@@ -92,29 +92,26 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 		}
 		starpu_data_wont_use(sdatakk);
 
-		for (m = k+1; m<nblocks; m++)
+		for (n = k+1; n<nblocks; n++)
 		{
-                        starpu_data_handle_t sdatamk = starpu_data_get_sub_data(dataA, 2, m, k);
-			for (n = k+1; n<nblocks; n++)
+                        starpu_data_handle_t sdatank = starpu_data_get_sub_data(dataA, 2, n, k);
+			for (m = n; m<nblocks; m++)
 			{
-				if (n <= m)
-                                {
-					starpu_data_handle_t sdatank = starpu_data_get_sub_data(dataA, 2, n, k);
-					starpu_data_handle_t sdatamn = starpu_data_get_sub_data(dataA, 2, m, n);
-
-					ret = starpu_task_insert(&cl22,
-								 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-								 STARPU_R, sdatamk,
-								 STARPU_R, sdatank,
-								 cl22.modes[2], sdatamn,
-								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
-								 STARPU_TAG_ONLY, TAG22(k,m,n),
-								 0);
-					if (ret == -ENODEV) return 77;
-					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
-                                }
+				starpu_data_handle_t sdatamk = starpu_data_get_sub_data(dataA, 2, m, k);
+				starpu_data_handle_t sdatamn = starpu_data_get_sub_data(dataA, 2, m, n);
+
+				ret = starpu_task_insert(&cl22,
+							 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+							 STARPU_R, sdatamk,
+							 STARPU_R, sdatank,
+							 cl22.modes[2], sdatamn,
+							 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+							 STARPU_TAG_ONLY, TAG22(k,m,n),
+							 0);
+				if (ret == -ENODEV) return 77;
+				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 			}
-			starpu_data_wont_use(sdatamk);
+			starpu_data_wont_use(sdatank);
 		}
 		starpu_iteration_pop();
 	}

+ 1 - 1
mpi/examples/filters/filter.c

@@ -59,7 +59,7 @@ void vector_filter(void *father_interface, void *child_interface, struct starpu_
 
 	STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx);
 	STARPU_ASSERT(nchunks == 2);
-	STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%d is not a multiple of nchunks %d\n", nx, nchunks);
+	STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%u is not a multiple of nchunks %u\n", nx, nchunks);
 
 	vector_child->id = vector_father->id;
 	vector_child->nx = nx/2;

+ 97 - 17
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -20,6 +20,66 @@
 #include <limits.h>
 #include <math.h>
 
+
+/* This is from magma
+
+  -- Innovative Computing Laboratory
+  -- Electrical Engineering and Computer Science Department
+  -- University of Tennessee
+  -- (C) Copyright 2009
+
+  Redistribution  and  use  in  source and binary forms, with or without
+  modification,  are  permitted  provided  that the following conditions
+  are met:
+
+  * Redistributions  of  source  code  must  retain  the above copyright
+    notice,  this  list  of  conditions  and  the  following  disclaimer.
+  * Redistributions  in  binary  form must reproduce the above copyright
+    notice,  this list of conditions and the following disclaimer in the
+    documentation  and/or other materials provided with the distribution.
+  * Neither  the  name of the University of Tennessee, Knoxville nor the
+    names of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+  THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+  LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  */
+
+#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
+#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)      ) * (double)(__n) - (1. / 6.)))
+
+#define FLOPS_SPOTRF(__n) (     FMULS_POTRF((__n)) +       FADDS_POTRF((__n)) )
+
+#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
+#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
+
+#define FMULS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m)) )
+#define FADDS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m)) )
+
+#define FMULS_TRSM FMULS_TRMM
+#define FADDS_TRSM FMULS_TRMM
+
+#define FLOPS_STRSM(__m, __n) (     FMULS_TRSM((__m), (__n)) +       FADDS_TRSM((__m), (__n)) )
+
+
+#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+
+#define FLOPS_SGEMM(__m, __n, __k) (     FMULS_GEMM((__m), (__n), (__k)) +       FADDS_GEMM((__m), (__n), (__k)) )
+
+/* End of magma code */
+
+
 int _nodes;
 starpu_mpi_checkpoint_template_t* checkpoint_p;
 
@@ -32,6 +92,8 @@ int backup_function(int rank)
 //	return (rank+1)%_nodes;
 }
 
+
+
 /*
  *	Create the codelets
  */
@@ -75,7 +137,7 @@ static struct starpu_codelet cl22 =
 #endif
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 3,
-    .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
+	.modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
 	.model = &chol_model_22,
 	.color = 0x00ff00,
 };
@@ -84,6 +146,7 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 {
 	unsigned k, m, n;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
+	unsigned nn = size/nblocks;
 
 	starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &k, sizeof(unsigned), nblocks*nblocks+10, backup_function);
 	starpu_mpi_checkpoint_template_freeze(checkpoint_p);
@@ -96,6 +159,7 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 		starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
 				       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
 				       STARPU_RW, data_handles[k][k],
+				       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 				       0);
 
 		for (m = k+1; m<nblocks; m++)
@@ -104,28 +168,30 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 					       STARPU_R, data_handles[k][k],
 					       STARPU_RW, data_handles[m][k],
+					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 					       0);
 
-//			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[k][k]);
-//			if (my_distrib(k, k, nodes) == rank)
-//				starpu_data_wont_use(data_handles[k][k]);
+			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[k][k]);
+			if (my_distrib(k, k, nodes) == rank)
+				starpu_data_wont_use(data_handles[k][k]);
+		}
 
-			for (n = k+1; n<nblocks; n++)
+		for (n = k+1; n<nblocks; n++)
+		{
+			for (m = n; m<nblocks; m++)
 			{
-				if (n <= m)
-				{
-					starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
-							       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-							       STARPU_R, data_handles[n][k],
-							       STARPU_R, data_handles[m][k],
-					               STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
-							       0);
-				}
+				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
+						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+						       STARPU_R, data_handles[n][k],
+						       STARPU_R, data_handles[m][k],
+						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+						       0);
 			}
 
-//			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
-//			if (my_distrib(m, k, nodes) == rank)
-//				starpu_data_wont_use(data_handles[m][k]);
+			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]);
+			if (my_distrib(n, k, nodes) == rank)
+				starpu_data_wont_use(data_handles[n][k]);
 		}
 		starpu_mpi_submit_checkpoint_template(*checkpoint_p);
 		starpu_iteration_pop();
@@ -137,6 +203,7 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 {
 	unsigned k, m, n;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
+	unsigned nn = size/nblocks;
 
 	/* Column */
 	for (n = 0; n<nblocks; n++)
@@ -154,6 +221,7 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_R, data_handles[n][k],
 						       STARPU_R, data_handles[m][k],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 			}
 			k = n;
@@ -164,6 +232,7 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 						       STARPU_R, data_handles[k][k],
 						       STARPU_RW, data_handles[m][k],
+						       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 						       0);
 			}
 			else
@@ -172,6 +241,7 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
 						       STARPU_RW, data_handles[k][k],
+						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 			}
 		}
@@ -192,6 +262,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 	unsigned a, c;
 	unsigned k, m, n;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
+	unsigned nn = size/nblocks;
 
 	/* double-antidiagonal number:
 	 * - a=0 contains (0,0) plus (1,0)
@@ -222,6 +293,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_R, data_handles[n][k],
 						       STARPU_R, data_handles[m][k],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 			}
 
@@ -233,6 +305,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 						       STARPU_R, data_handles[k][k],
 						       STARPU_RW, data_handles[m][k],
+						       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 						       0);
 			}
 			else
@@ -241,6 +314,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
 						       STARPU_RW, data_handles[k][k],
+						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 			}
 		}
@@ -263,6 +337,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_R, data_handles[n][k],
 						       STARPU_R, data_handles[m][k],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 			}
 			/* non-diagonal block, solve */
@@ -271,6 +346,7 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 					       STARPU_R, data_handles[k][k],
 					       STARPU_RW, data_handles[m][k],
+					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 					       0);
 		}
 
@@ -290,6 +366,7 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 	unsigned a;
 	int k, m, n;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
+	unsigned nn = size/nblocks;
 
 	/*
 	 * This is basically similar to above, except that we shift k according to the priorities set in the algorithm, so that prio ~ 2*a or 2*a+1
@@ -318,6 +395,7 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
 						       STARPU_RW, data_handles[k][k],
+						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 			}
 			else
@@ -327,6 +405,7 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 						       STARPU_R, data_handles[k][k],
 						       STARPU_RW, data_handles[m][k],
+						       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 						       0);
 			}
 
@@ -344,6 +423,7 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 							       STARPU_R, data_handles[n][k],
 							       STARPU_R, data_handles[m][k],
 							       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+							       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 							       0);
 				}
 			}

+ 1 - 1
mpi/tests/broadcast.c

@@ -40,7 +40,7 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 	starpu_data_handle_t handle;
-	int var;
+	int var=-1;
 	int mpi_init;
 	MPI_Status status;
 

+ 1 - 1
mpi/tests/insert_task_compute.c

@@ -47,7 +47,7 @@ int test(int rank, int node, int *before, int *after, int task_insert, int data_
 	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
-	if (starpu_cpu_worker_get_count() <= 0)
+	if (starpu_cpu_worker_get_count() == 0)
 	{
 		// If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication
 		ret = -ENODEV;

+ 4 - 4
src/debug/traces/starpu_fxt.c

@@ -2652,9 +2652,9 @@ static void handle_job_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 		fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp);
 		fprintf(sched_tasks_file, "Priority: %d\n", priority);
 		if (options->file_rank < 0)
-			fprintf(sched_tasks_file, "JobId: %d\n", task);
+			fprintf(sched_tasks_file, "JobId: %u\n", task);
 		else
-			fprintf(sched_tasks_file, "JobId: %d_%d\n", options->file_rank, task);
+			fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task);
 		fprintf(sched_tasks_file, "\n");
 	}
 }
@@ -2701,9 +2701,9 @@ static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *opti
 		fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp);
 		fprintf(sched_tasks_file, "Priority: %d\n", priority);
 		if (options->file_rank < 0)
-			fprintf(sched_tasks_file, "JobId: %d\n", task);
+			fprintf(sched_tasks_file, "JobId: %u\n", task);
 		else
-			fprintf(sched_tasks_file, "JobId: %d_%d\n", options->file_rank, task);
+			fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task);
 		fprintf(sched_tasks_file, "\n");
 	}
 }

+ 2 - 2
src/debug/traces/starpu_fxt_mpi.c

@@ -357,7 +357,7 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, FILE *out_comm
 					fprintf(out_comms_file, "SendJobId: %d_%lu\n", src, cur->jobid);
 				if (match->jobid != -1)
 					fprintf(out_comms_file, "RecvJobId: %d_%lu\n", dst, match->jobid);
-				fprintf(out_comms_file, "Size: %ld\n", size);
+				fprintf(out_comms_file, "Size: %lu\n", size);
 				fprintf(out_comms_file, "\n");
 			}
 		}
@@ -372,7 +372,7 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, FILE *out_comm
 	if (nb_wrong_comm_timing == 1)
 		_STARPU_MSG("Warning: a communication finished before it started !\n");
 	else if (nb_wrong_comm_timing > 1)
-		_STARPU_MSG("Warning: %d communications finished before they started !\n", nb_wrong_comm_timing);
+		_STARPU_MSG("Warning: %u communications finished before they started !\n", nb_wrong_comm_timing);
 }
 
 void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks STARPU_ATTRIBUTE_UNUSED, FILE *out_paje_file, FILE* out_comms_file)

+ 3 - 3
tests/datawizard/bcsr.c

@@ -39,20 +39,20 @@ void cpu_show_bcsr(void *descr[], void *arg)
 
 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 
-	printf("\nnnz %d elemsize %d\n", nnz, elemsize);
+	printf("\nnnz %u elemsize %u\n", nnz, elemsize);
 
 	for (i = 0; i < nrow; i++)
 	{
 		uint32_t row_start = rowptr[i] - firstentry;
 		uint32_t row_end = rowptr[i+1] - firstentry;
 
-		printf("row %d\n", i);
+		printf("row %u\n", i);
 
 		for (j = row_start; j < row_end; j++)
 		{
 			int *block = nzval + j * r*c;
 
-			printf( " column %d\n", colind[j]);
+			printf( " column %u\n", colind[j]);
 
 			for (y = 0; y < r; y++)
 			{

+ 4 - 4
tests/helper/starpu_data_dup_ro.c

@@ -84,7 +84,7 @@ int main(int argc, char **argv)
 	ret = EXIT_SUCCESS;
 	if (*var != 42)
 	{
-	     FPRINTF(stderr, "var2 is %d but it should be %d\n", *var, 42);
+	     FPRINTF(stderr, "var2 is %u but it should be %d\n", *var, 42);
 	     ret = EXIT_FAILURE;
 	}
 	starpu_data_release(var2_handle);
@@ -93,7 +93,7 @@ int main(int argc, char **argv)
 	var = starpu_data_get_local_ptr(var3_handle);
 	if (*var != 42)
 	{
-	     FPRINTF(stderr, "var3 is %d but it should be %d\n", *var, 42);
+	     FPRINTF(stderr, "var3 is %u but it should be %d\n", *var, 42);
 	     ret = EXIT_FAILURE;
 	}
 	starpu_data_release(var3_handle);
@@ -102,7 +102,7 @@ int main(int argc, char **argv)
 	var = starpu_data_get_local_ptr(var4_handle);
 	if (*var != 42)
 	{
-	     FPRINTF(stderr, "var4 is %d but it should be %d\n", *var, 42);
+	     FPRINTF(stderr, "var4 is %u but it should be %d\n", *var, 42);
 	     ret = EXIT_FAILURE;
 	}
 	starpu_data_release(var4_handle);
@@ -111,7 +111,7 @@ int main(int argc, char **argv)
 	var = starpu_data_get_local_ptr(var5_handle);
 	if (*var != 43)
 	{
-	     FPRINTF(stderr, "var5 is %d but it should be %d\n", *var, 43);
+	     FPRINTF(stderr, "var5 is %u but it should be %d\n", *var, 43);
 	     ret = EXIT_FAILURE;
 	}
 	starpu_data_release(var5_handle);

+ 1 - 1
tests/parallel_tasks/explicit_combined_worker.c

@@ -114,7 +114,7 @@ int main(void)
 enodev:
 	starpu_data_unregister(v_handle);
 	starpu_free(v);
-	fprintf(stderr, "WARNING: No one can execute the task on workerid %d\n", worker);
+	fprintf(stderr, "WARNING: No one can execute the task on workerid %u\n", worker);
 	/* yes, we do not perform the computation but we did detect that no one
  	 * could perform the kernel, so this is not an error from StarPU */
 	starpu_shutdown();

+ 2 - 2
tools/starpu_fxt_tool.c

@@ -81,7 +81,7 @@ static int parse_args(int argc, char **argv)
 		{
 			if (options.ninputfiles >= STARPU_FXT_MAX_FILES)
 			{
-				fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%u)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES);
+				fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES);
 				return 7;
 			}
 			options.filenames[options.ninputfiles++] = argv[++i];
@@ -179,7 +179,7 @@ static int parse_args(int argc, char **argv)
 		{
 			if (options.ninputfiles >= STARPU_FXT_MAX_FILES)
 			{
-				fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%u)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES);
+				fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES);
 				return 7;
 			}
 			options.filenames[options.ninputfiles++] = argv[i];

+ 4 - 4
tools/starpu_perfmodel_recdump.c

@@ -122,7 +122,7 @@ void print_archs(FILE* output)
 	{
 		unsigned printed = 0;
 		char name[32];
-		fprintf(output, "MemoryNode: %d\n", node);
+		fprintf(output, "MemoryNode: %u\n", node);
 		starpu_memory_node_get_name(node, name, sizeof(name));
 		fprintf(output, "Name: %s\n", name);
 		fprintf(output, "Size: %ld\n", (long) starpu_memory_get_total(node));
@@ -135,7 +135,7 @@ void print_archs(FILE* output)
 					fprintf(output, "Workers:");
 					printed = 1;
 				}
-				fprintf(output, " %d", workerid);
+				fprintf(output, " %u", workerid);
 			}
 		}
 		if (printed)
@@ -149,8 +149,8 @@ void print_archs(FILE* output)
 		{
 			if (src != dst)
 			{
-				fprintf(output, "MemoryNodeSrc: %d\n", src);
-				fprintf(output, "MemoryNodeDst: %d\n", dst);
+				fprintf(output, "MemoryNodeSrc: %u\n", src);
+				fprintf(output, "MemoryNodeDst: %u\n", dst);
 				fprintf(output, "Bandwidth: %f\n", starpu_transfer_bandwidth(src, dst));
 				fprintf(output, "Latency: %f\n", starpu_transfer_latency(src, dst));
 				fprintf(output, "\n");