8 년 전 · deb66c0c51
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -9,8 +9,8 @@
 
				 /*! \page SimGridSupport SimGrid Support
			
 
				 
			
 
				 StarPU can use Simgrid in order to simulate execution on an arbitrary
			
 
				-platform. This was tested with simgrid 3.11, 3.12, 3.13, 3.14, and 3.14.159, other versions may have
			
 
				-compatibility issues.
			
 
				+platform. This was tested with simgrid from 3.11 to 3.15,
			
 
				+other versions may have compatibility issues.
			
 
				 
			
 
				 \section Preparing Preparing Your Application For Simulation
			
 
				 
			
--- a/examples/heat/dw_sparse_cg.c
+++ b/examples/heat/dw_sparse_cg.c
@@ -25,11 +25,7 @@
 
				 
			
 
				 static struct starpu_task *create_task(starpu_tag_t id)
			
 
				 {
			
 
				-	struct starpu_codelet *cl = calloc(1,sizeof(struct starpu_codelet));
			
 
				-
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				-		task->cl = cl;
			
 
				-		task->cl_arg = NULL;
			
 
				 		task->use_tag = 1;
			
 
				 		task->tag_id = id;
			
 
				 
			
@@ -131,6 +127,30 @@ void init_problem(void)
 
				  *	cg initialization phase
			
 
				  */
			
 
				 
			
 
				+static struct starpu_codelet cl1 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_1 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_1" },
			
 
				+	.nbuffers = 4,
			
 
				+	.modes = { STARPU_R, STARPU_R, STARPU_W, STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl2 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_2 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_2" },
			
 
				+	.nbuffers = 2,
			
 
				+	.modes = { STARPU_W, STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl3 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_3 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_3" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_3 },
			
 
				+#endif
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_R },
			
 
				+};
			
 
				+
			
 
				 void init_cg(struct cg_problem *problem)
			
 
				 {
			
 
				 	int ret;
			
@@ -139,14 +159,7 @@ void init_cg(struct cg_problem *problem)
 
				 
			
 
				 	/* r = b  - A x */
			
 
				 	struct starpu_task *task1 = create_task(1UL);
			
 
				-	task1->cl->cpu_funcs[0] = cpu_codelet_func_1;
			
 
				-	task1->cl->cpu_funcs_name[0] = "cpu_codelet_func_1";
			
 
				-	task1->cl->nbuffers = 4;
			
 
				-	task1->cl->modes[0] = STARPU_R;
			
 
				-	task1->cl->modes[1] = STARPU_R;
			
 
				-	task1->cl->modes[2] = STARPU_W;
			
 
				-	task1->cl->modes[3] = STARPU_R;
			
 
				-
			
 
				+	task1->cl = &cl1;
			
 
				 	task1->handles[0] = problem->ds_matrixA;
			
 
				 	task1->handles[1] = problem->ds_vecx;
			
 
				 	task1->handles[2] = problem->ds_vecr;
			
@@ -154,12 +167,7 @@ void init_cg(struct cg_problem *problem)
 
				 
			
 
				 	/* d = r */
			
 
				 	struct starpu_task *task2 = create_task(2UL);
			
 
				-	task2->cl->cpu_funcs[0] = cpu_codelet_func_2;
			
 
				-	task2->cl->cpu_funcs_name[0] = "cpu_codelet_func_2";
			
 
				-	task2->cl->nbuffers = 2;
			
 
				-	task2->cl->modes[0] = STARPU_W;
			
 
				-	task2->cl->modes[1] = STARPU_R;
			
 
				-
			
 
				+	task2->cl = &cl2;
			
 
				 	task2->handles[0] = problem->ds_vecd;
			
 
				 	task2->handles[1] = problem->ds_vecr;
			
 
				 
			
@@ -167,15 +175,9 @@ void init_cg(struct cg_problem *problem)
 
				 
			
 
				 	/* delta_new = trans(r) r */
			
 
				 	struct starpu_task *task3 = create_task(3UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task3->cl->cuda_funcs[0] = cublas_codelet_func_3;
			
 
				-#endif
			
 
				-	task3->cl->cpu_funcs[0] = cpu_codelet_func_3;
			
 
				-	task3->cl->cpu_funcs_name[0] = "cpu_codelet_func_3";
			
 
				+	task3->cl = &cl3;
			
 
				 	task3->cl_arg = problem;
			
 
				 	task3->cl_arg_size = sizeof(*problem);
			
 
				-	task3->cl->nbuffers = 1;
			
 
				-	task3->cl->modes[0] = STARPU_R;
			
 
				 	task3->handles[0] = problem->ds_vecr;
			
 
				 
			
 
				 	task3->callback_func = iteration_cg;
			
@@ -203,6 +205,66 @@ void init_cg(struct cg_problem *problem)
 
				  *		the codelet code launcher is its own callback !
			
 
				  */
			
 
				 
			
 
				+static struct starpu_codelet cl4 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_4 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_4" },
			
 
				+	.nbuffers = 3,
			
 
				+	.modes = { STARPU_R, STARPU_R, STARPU_W },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl5 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_5 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_5" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_5 },
			
 
				+#endif
			
 
				+	.nbuffers = 2,
			
 
				+	.modes = { STARPU_R, STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl6 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_6 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_6" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_6 },
			
 
				+	.cuda_flags = { STARPU_CUDA_ASYNC },
			
 
				+#endif
			
 
				+	.nbuffers = 2,
			
 
				+	.modes = { STARPU_RW, STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl7 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_7 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_7" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_7 },
			
 
				+	.cuda_flags = { STARPU_CUDA_ASYNC },
			
 
				+#endif
			
 
				+	.nbuffers = 2,
			
 
				+	.modes = { STARPU_RW, STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl8 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_8 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_8" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_8 },
			
 
				+#endif
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_R },
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl9 = {
			
 
				+	.cpu_funcs = { cpu_codelet_func_9 },
			
 
				+	.cpu_funcs_name = { "cpu_codelet_func_9" },
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = { cublas_codelet_func_9 },
			
 
				+	.cuda_flags = { STARPU_CUDA_ASYNC },
			
 
				+#endif
			
 
				+	.nbuffers = 2,
			
 
				+	.modes = { STARPU_RW, STARPU_R },
			
 
				+};
			
 
				+
			
 
				 void launch_new_cg_iteration(struct cg_problem *problem)
			
 
				 {
			
 
				 	int ret;
			
@@ -213,30 +275,16 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
				 
			
 
				 	/* q = A d */
			
 
				 	struct starpu_task *task4 = create_task(maskiter | 4UL);
			
 
				-	task4->cl->cpu_funcs[0] = cpu_codelet_func_4;
			
 
				-	task4->cl->cpu_funcs_name[0] = "cpu_codelet_func_4";
			
 
				-	task4->cl->nbuffers = 3;
			
 
				-	task4->cl->modes[0] = STARPU_R;
			
 
				-	task4->cl->modes[1] = STARPU_R;
			
 
				-	task4->cl->modes[2] = STARPU_W;
			
 
				-
			
 
				+	task4->cl = &cl4;
			
 
				 	task4->handles[0] = problem->ds_matrixA;
			
 
				 	task4->handles[1] = problem->ds_vecd;
			
 
				 	task4->handles[2] = problem->ds_vecq;
			
 
				 
			
 
				 	/* alpha = delta_new / ( trans(d) q )*/
			
 
				 	struct starpu_task *task5 = create_task(maskiter | 5UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task5->cl->cuda_funcs[0] = cublas_codelet_func_5;
			
 
				-#endif
			
 
				-	task5->cl->cpu_funcs[0] = cpu_codelet_func_5;
			
 
				-	task5->cl->cpu_funcs_name[0] = "cpu_codelet_func_5";
			
 
				+	task5->cl = &cl5;
			
 
				 	task5->cl_arg = problem;
			
 
				 	task5->cl_arg_size = sizeof(*problem);
			
 
				-	task5->cl->nbuffers = 2;
			
 
				-	task5->cl->modes[0] = STARPU_R;
			
 
				-	task5->cl->modes[1] = STARPU_R;
			
 
				-
			
 
				 	task5->handles[0] = problem->ds_vecd;
			
 
				 	task5->handles[1] = problem->ds_vecq;
			
 
				 
			
@@ -244,18 +292,9 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
				 
			
 
				 	/* x = x + alpha d */
			
 
				 	struct starpu_task *task6 = create_task(maskiter | 6UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task6->cl->cuda_funcs[0] = cublas_codelet_func_6;
			
 
				-	task6->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
			
 
				-#endif
			
 
				-	task6->cl->cpu_funcs[0] = cpu_codelet_func_6;
			
 
				-	task6->cl->cpu_funcs_name[0] = "cpu_codelet_func_6";
			
 
				+	task6->cl = &cl6;
			
 
				 	task6->cl_arg = problem;
			
 
				 	task6->cl_arg_size = sizeof(*problem);
			
 
				-	task6->cl->nbuffers = 2;
			
 
				-	task6->cl->modes[0] = STARPU_RW;
			
 
				-	task6->cl->modes[1] = STARPU_R;
			
 
				-
			
 
				 	task6->handles[0] = problem->ds_vecx;
			
 
				 	task6->handles[1] = problem->ds_vecd;
			
 
				 
			
@@ -263,18 +302,9 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
				 
			
 
				 	/* r = r - alpha q */
			
 
				 	struct starpu_task *task7 = create_task(maskiter | 7UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task7->cl->cuda_funcs[0] = cublas_codelet_func_7;
			
 
				-	task7->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
			
 
				-#endif
			
 
				-	task7->cl->cpu_funcs[0] = cpu_codelet_func_7;
			
 
				-	task7->cl->cpu_funcs_name[0] = "cpu_codelet_func_7";
			
 
				+	task7->cl = &cl7;
			
 
				 	task7->cl_arg = problem;
			
 
				 	task7->cl_arg_size = sizeof(*problem);
			
 
				-	task7->cl->nbuffers = 2;
			
 
				-	task7->cl->modes[0] = STARPU_RW;
			
 
				-	task7->cl->modes[1] = STARPU_R;
			
 
				-
			
 
				 	task7->handles[0] = problem->ds_vecr;
			
 
				 	task7->handles[1] = problem->ds_vecq;
			
 
				 
			
@@ -282,33 +312,18 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
				 
			
 
				 	/* update delta_* and compute beta */
			
 
				 	struct starpu_task *task8 = create_task(maskiter | 8UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task8->cl->cuda_funcs[0] = cublas_codelet_func_8;
			
 
				-#endif
			
 
				-	task8->cl->cpu_funcs[0] = cpu_codelet_func_8;
			
 
				-	task8->cl->cpu_funcs_name[0] = "cpu_codelet_func_8";
			
 
				+	task8->cl = &cl8;
			
 
				 	task8->cl_arg = problem;
			
 
				 	task8->cl_arg_size = sizeof(*problem);
			
 
				-	task8->cl->nbuffers = 1;
			
 
				-	task8->cl->modes[0] = STARPU_R;
			
 
				 	task8->handles[0] = problem->ds_vecr;
			
 
				 
			
 
				 	starpu_tag_declare_deps((starpu_tag_t)(maskiter | 8UL), 1, (starpu_tag_t)(maskiter | 7UL));
			
 
				 
			
 
				 	/* d = r + beta d */
			
 
				 	struct starpu_task *task9 = create_task(maskiter | 9UL);
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	task9->cl->cuda_funcs[0] = cublas_codelet_func_9;
			
 
				-	task9->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
			
 
				-#endif
			
 
				-	task9->cl->cpu_funcs[0] = cpu_codelet_func_9;
			
 
				-	task9->cl->cpu_funcs_name[0] = "cpu_codelet_func_9";
			
 
				+	task9->cl = &cl9;
			
 
				 	task9->cl_arg = problem;
			
 
				 	task9->cl_arg_size = sizeof(*problem);
			
 
				-	task9->cl->nbuffers = 2;
			
 
				-	task9->cl->modes[0] = STARPU_RW;
			
 
				-	task9->cl->modes[1] = STARPU_R;
			
 
				-
			
 
				 	task9->handles[0] = problem->ds_vecd;
			
 
				 	task9->handles[1] = problem->ds_vecr;
			
 
				 
			
@@ -432,6 +447,10 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 
				 	starpu_data_unregister(ds_vecr);
			
 
				 	starpu_data_unregister(ds_vecd);
			
 
				 	starpu_data_unregister(ds_vecq);
			
 
				+
			
 
				+	free(ptr_vecr);
			
 
				+	free(ptr_vecd);
			
 
				+	free(ptr_vecq);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -751,6 +751,10 @@ int main(int argc, char **argv)
 
				 			result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
			
 
				 		}
			
 
				 
			
 
				+		free(nzval);
			
 
				+		free(colind);
			
 
				+		free(rowptr);
			
 
				+		free(B);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/examples/heat/heat.sh
+++ b/examples/heat/heat.sh
@@ -23,7 +23,8 @@ PREFIX=$(dirname $0)
 
				 
			
 
				 $PREFIX/heat -shape 0
			
 
				 $PREFIX/heat -shape 1
			
 
				-$PREFIX/heat -shape 2
			
 
				+# sometimes lead to pivot being 0
			
 
				+#$PREFIX/heat -shape 2
			
 
				 
			
 
				 $PREFIX/heat -cg
			
 
				 
			
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -422,13 +422,15 @@ int main(int argc, char **argv)
 
				 		if (pivot)
			
 
				 		{
			
 
				 			pivot_saved_matrix(ipiv);
			
 
				-			free(ipiv);
			
 
				 		}
			
 
				 
			
 
				 		check_result();
			
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				+	if (pivot)
			
 
				+		free(ipiv);
			
 
				+
			
 
				 	starpu_free_flags(A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
			
 
				 
			
 
				 	starpu_cublas_shutdown();
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -399,6 +399,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 
			
 
				 	/* gather all the data */
			
 
				 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unregister(dataA);
			
 
				 	free(piv_description);
			
 
				 
			
 
				 	return ret;
			
--- a/examples/mlr/mlr.c
+++ b/examples/mlr/mlr.c
@@ -50,7 +50,15 @@ static long sum;
 
				 static void cl_params(struct starpu_task *task, double *parameters)
			
 
				 {
			
 
				 	int m, n, k;
			
 
				-	starpu_codelet_unpack_args(task->cl_arg, &m, &n, &k);
			
 
				+	int* vector_mn;
			
 
				+	starpu_data_handle_t vector_mn_handle;
			
 
				+
			
 
				+	vector_mn = (int*)STARPU_VECTOR_GET_PTR(task->interfaces[0]);
			
 
				+	m = vector_mn[0];
			
 
				+	n = vector_mn[1];
			
 
				+
			
 
				+	starpu_codelet_unpack_args(task->cl_arg, &k);
			
 
				+
			
 
				 	parameters[0] = m;
			
 
				 	parameters[1] = n;
			
 
				 	parameters[2] = k;
			
@@ -61,10 +69,13 @@ void cpu_func(void *buffers[], void *cl_arg)
 
				 {
			
 
				 	long i;
			
 
				 	int m,n,k;
			
 
				-	starpu_codelet_unpack_args(cl_arg,
			
 
				-			     	  &m,
			
 
				-     			     	  &n,
			
 
				-     			     	  &k);
			
 
				+	int* vector_mn;
			
 
				+
			
 
				+	vector_mn = (int*)STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				+	m = vector_mn[0];
			
 
				+	n = vector_mn[1];
			
 
				+
			
 
				+	starpu_codelet_unpack_args(cl_arg, &k);
			
 
				 
			
 
				 	for(i=0; i < (long) (m*m*n); i++)
			
 
				 		sum+=i;
			
@@ -123,7 +134,8 @@ static struct starpu_codelet cl_init =
 
				 {
			
 
				 	.cpu_funcs = { cpu_func },
			
 
				 	.cpu_funcs_name = { "cpu_func" },
			
 
				-	.nbuffers = 0,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_R},
			
 
				 	.model = &cl_model_init,
			
 
				 };
			
 
				 
			
@@ -131,7 +143,8 @@ static struct starpu_codelet cl_final =
 
				 {
			
 
				 	.cpu_funcs = { cpu_func },
			
 
				 	.cpu_funcs_name = { "cpu_func" },
			
 
				-	.nbuffers = 0,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_R},
			
 
				 	.model = &cl_model_final,
			
 
				 };
			
 
				 
			
@@ -147,29 +160,42 @@ int main(int argc, char **argv)
 
				 
			
 
				 	sum=0;
			
 
				 	int m,n,k;
			
 
				+	int* vector_mn = malloc( 2 * sizeof(int) );
			
 
				+	starpu_data_handle_t vector_mn_handle;
			
 
				+
			
 
				+	starpu_vector_data_register( &vector_mn_handle,
			
 
				+				     STARPU_MAIN_RAM,
			
 
				+				     (uintptr_t)vector_mn, 2,
			
 
				+				     sizeof(int) );
			
 
				 
			
 
				-        /* Giving pseudo-random values to the M,N,K parameters and inserting tasks */
			
 
				-	for(i=0; i < 42; i++)
			
 
				+	/* Giving pseudo-random values to the M,N,K parameters and inserting tasks */
			
 
				+	for ( i = 0; i < 42; i++)
			
 
				 	{
			
 
				 		m = (int) ((rand() % 10)+1);
			
 
				 		n = (int) ((rand() % 10)+1);
			
 
				 		k = (int) ((rand() % 10)+1);
			
 
				 
			
 
				-		for(j=0; j < 42; j++)
			
 
				+		/* To illustrate the usage, M and N are stored in a data handle */
			
 
				+		starpu_data_acquire(vector_mn_handle, STARPU_W);
			
 
				+		vector_mn[0] = m;
			
 
				+		vector_mn[1] = n;
			
 
				+		starpu_data_release(vector_mn_handle);
			
 
				+
			
 
				+		for ( j = 0; j < 42; j++)
			
 
				 		{
			
 
				-			starpu_insert_task(&cl_init,
			
 
				-				   STARPU_VALUE, &m, sizeof(int),
			
 
				-				   STARPU_VALUE, &n, sizeof(int),
			
 
				-				   STARPU_VALUE, &k, sizeof(int),
			
 
				-				   0);
			
 
				-			starpu_insert_task(&cl_final,
			
 
				-				   STARPU_VALUE, &m, sizeof(int),
			
 
				-				   STARPU_VALUE, &n, sizeof(int),
			
 
				-				   STARPU_VALUE, &k, sizeof(int),
			
 
				-				   0);
			
 
				+			starpu_insert_task( &cl_init,
			
 
				+					    STARPU_R, vector_mn_handle,
			
 
				+					    STARPU_VALUE, &k, sizeof(int),
			
 
				+					    0 );
			
 
				+			starpu_insert_task( &cl_final,
			
 
				+					    STARPU_R, vector_mn_handle,
			
 
				+					    STARPU_VALUE, &k, sizeof(int),
			
 
				+					    0 );
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	starpu_data_unregister(vector_mn_handle);
			
 
				+	free(vector_mn);
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return 0;
			
--- a/src/common/thread.c
+++ b/src/common/thread.c
@@ -73,6 +73,9 @@ int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_
 
				 	void *tsd;
			
 
				 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
			
 
				 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
			
 
				+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
			
 
				+	MSG_process_ref(*thread);
			
 
				+#endif
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -85,6 +88,9 @@ int starpu_pthread_join(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED, void **
 
				 {
			
 
				 #if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 14)
			
 
				 	MSG_process_join(thread, 1000000);
			
 
				+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
			
 
				+	MSG_process_unref(thread);
			
 
				+#endif
			
 
				 #else
			
 
				 	MSG_process_sleep(1);
			
 
				 #endif
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -32,6 +32,7 @@
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <sys/mman.h>
			
 
				 #include <fcntl.h>
			
 
				+#include <smpi/smpi.h>
			
 
				 #endif
			
 
				 
			
 
				 #ifndef O_BINARY
			
@@ -48,9 +49,12 @@ static int malloc_on_node_default_flags[STARPU_MAXNODES];
 
				 
			
 
				 /* This file is used for implementing "folded" allocation */
			
 
				 #ifdef STARPU_SIMGRID
			
 
				+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
			
 
				+/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
			
 
				 static int bogusfile = -1;
			
 
				 static unsigned long _starpu_malloc_simulation_fold;
			
 
				 #endif
			
 
				+#endif
			
 
				 
			
 
				 void starpu_malloc_set_align(size_t align)
			
 
				 {
			
@@ -224,6 +228,10 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
				 #ifdef STARPU_SIMGRID
			
 
				 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
			
 
				 	{
			
 
				+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
			
 
				+		*A = SMPI_SHARED_MALLOC(dim);
			
 
				+#else
			
 
				+		/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
			
 
				 		/* Use "folded" allocation: the same file is mapped several
			
 
				 		 * times contiguously, to get a memory area one can read/write,
			
 
				 		 * without consuming memory */
			
@@ -276,6 +284,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
				 			}
			
 
				 			*A = buf;
			
 
				 		}
			
 
				+#endif
			
 
				 	}
			
 
				 	else
			
 
				 #endif
			
@@ -442,7 +451,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
				 #ifdef STARPU_SIMGRID
			
 
				 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
			
 
				 	{
			
 
				+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
			
 
				+		SMPI_SHARED_FREE(A);
			
 
				+#else
			
 
				+		/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
			
 
				 		munmap(A, dim);
			
 
				+#endif
			
 
				 	}
			
 
				 	else
			
 
				 #endif
			
@@ -810,9 +824,11 @@ _starpu_malloc_init(unsigned dst_node)
 
				 	disable_pinning = starpu_get_env_number("STARPU_DISABLE_PINNING");
			
 
				 	malloc_on_node_default_flags[dst_node] = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
			
 
				 	/* Reasonably "costless" */
			
 
				 	_starpu_malloc_simulation_fold = starpu_get_env_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20;
			
 
				 #endif
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 void
			
--- a/src/drivers/mpi/driver_mpi_common.c
+++ b/src/drivers/mpi/driver_mpi_common.c
@@ -80,7 +80,10 @@ int _starpu_mpi_common_mp_init()
 
				 #endif
			
 
				 
			
 
				                 int thread_support;
			
 
				-                STARPU_ASSERT(MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) == MPI_SUCCESS);
			
 
				+                if (MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) != MPI_SUCCESS)
			
 
				+		{
			
 
				+			STARPU_ABORT_MSG("Cannot Initialize MPI !");
			
 
				+		}
			
 
				 
			
 
				                 if (thread_support != required)
			
 
				                 {