소스 검색

merge from trunk

Olivier Aumage 8 년 전
부모
커밋
deb66c0c51

+ 2 - 2
doc/doxygen/chapters/470_simgrid.doxy

@@ -9,8 +9,8 @@
 /*! \page SimGridSupport SimGrid Support
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
-platform. This was tested with simgrid 3.11, 3.12, 3.13, 3.14, and 3.14.159, other versions may have
-compatibility issues.
+platform. This was tested with simgrid from 3.11 to 3.15,
+other versions may have compatibility issues.
 
 \section Preparing Preparing Your Application For Simulation
 

+ 97 - 78
examples/heat/dw_sparse_cg.c

@@ -25,11 +25,7 @@
 
 static struct starpu_task *create_task(starpu_tag_t id)
 {
-	struct starpu_codelet *cl = calloc(1,sizeof(struct starpu_codelet));
-
 	struct starpu_task *task = starpu_task_create();
-		task->cl = cl;
-		task->cl_arg = NULL;
 		task->use_tag = 1;
 		task->tag_id = id;
 
@@ -131,6 +127,30 @@ void init_problem(void)
  *	cg initialization phase
  */
 
+static struct starpu_codelet cl1 = {
+	.cpu_funcs = { cpu_codelet_func_1 },
+	.cpu_funcs_name = { "cpu_codelet_func_1" },
+	.nbuffers = 4,
+	.modes = { STARPU_R, STARPU_R, STARPU_W, STARPU_R },
+};
+
+static struct starpu_codelet cl2 = {
+	.cpu_funcs = { cpu_codelet_func_2 },
+	.cpu_funcs_name = { "cpu_codelet_func_2" },
+	.nbuffers = 2,
+	.modes = { STARPU_W, STARPU_R },
+};
+
+static struct starpu_codelet cl3 = {
+	.cpu_funcs = { cpu_codelet_func_3 },
+	.cpu_funcs_name = { "cpu_codelet_func_3" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_3 },
+#endif
+	.nbuffers = 1,
+	.modes = { STARPU_R },
+};
+
 void init_cg(struct cg_problem *problem)
 {
 	int ret;
@@ -139,14 +159,7 @@ void init_cg(struct cg_problem *problem)
 
 	/* r = b  - A x */
 	struct starpu_task *task1 = create_task(1UL);
-	task1->cl->cpu_funcs[0] = cpu_codelet_func_1;
-	task1->cl->cpu_funcs_name[0] = "cpu_codelet_func_1";
-	task1->cl->nbuffers = 4;
-	task1->cl->modes[0] = STARPU_R;
-	task1->cl->modes[1] = STARPU_R;
-	task1->cl->modes[2] = STARPU_W;
-	task1->cl->modes[3] = STARPU_R;
-
+	task1->cl = &cl1;
 	task1->handles[0] = problem->ds_matrixA;
 	task1->handles[1] = problem->ds_vecx;
 	task1->handles[2] = problem->ds_vecr;
@@ -154,12 +167,7 @@ void init_cg(struct cg_problem *problem)
 
 	/* d = r */
 	struct starpu_task *task2 = create_task(2UL);
-	task2->cl->cpu_funcs[0] = cpu_codelet_func_2;
-	task2->cl->cpu_funcs_name[0] = "cpu_codelet_func_2";
-	task2->cl->nbuffers = 2;
-	task2->cl->modes[0] = STARPU_W;
-	task2->cl->modes[1] = STARPU_R;
-
+	task2->cl = &cl2;
 	task2->handles[0] = problem->ds_vecd;
 	task2->handles[1] = problem->ds_vecr;
 
@@ -167,15 +175,9 @@ void init_cg(struct cg_problem *problem)
 
 	/* delta_new = trans(r) r */
 	struct starpu_task *task3 = create_task(3UL);
-#ifdef STARPU_USE_CUDA
-	task3->cl->cuda_funcs[0] = cublas_codelet_func_3;
-#endif
-	task3->cl->cpu_funcs[0] = cpu_codelet_func_3;
-	task3->cl->cpu_funcs_name[0] = "cpu_codelet_func_3";
+	task3->cl = &cl3;
 	task3->cl_arg = problem;
 	task3->cl_arg_size = sizeof(*problem);
-	task3->cl->nbuffers = 1;
-	task3->cl->modes[0] = STARPU_R;
 	task3->handles[0] = problem->ds_vecr;
 
 	task3->callback_func = iteration_cg;
@@ -203,6 +205,66 @@ void init_cg(struct cg_problem *problem)
  *		the codelet code launcher is its own callback !
  */
 
+static struct starpu_codelet cl4 = {
+	.cpu_funcs = { cpu_codelet_func_4 },
+	.cpu_funcs_name = { "cpu_codelet_func_4" },
+	.nbuffers = 3,
+	.modes = { STARPU_R, STARPU_R, STARPU_W },
+};
+
+static struct starpu_codelet cl5 = {
+	.cpu_funcs = { cpu_codelet_func_5 },
+	.cpu_funcs_name = { "cpu_codelet_func_5" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_5 },
+#endif
+	.nbuffers = 2,
+	.modes = { STARPU_R, STARPU_R },
+};
+
+static struct starpu_codelet cl6 = {
+	.cpu_funcs = { cpu_codelet_func_6 },
+	.cpu_funcs_name = { "cpu_codelet_func_6" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_6 },
+	.cuda_flags = { STARPU_CUDA_ASYNC },
+#endif
+	.nbuffers = 2,
+	.modes = { STARPU_RW, STARPU_R },
+};
+
+static struct starpu_codelet cl7 = {
+	.cpu_funcs = { cpu_codelet_func_7 },
+	.cpu_funcs_name = { "cpu_codelet_func_7" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_7 },
+	.cuda_flags = { STARPU_CUDA_ASYNC },
+#endif
+	.nbuffers = 2,
+	.modes = { STARPU_RW, STARPU_R },
+};
+
+static struct starpu_codelet cl8 = {
+	.cpu_funcs = { cpu_codelet_func_8 },
+	.cpu_funcs_name = { "cpu_codelet_func_8" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_8 },
+#endif
+	.nbuffers = 1,
+	.modes = { STARPU_R },
+};
+
+static struct starpu_codelet cl9 = {
+	.cpu_funcs = { cpu_codelet_func_9 },
+	.cpu_funcs_name = { "cpu_codelet_func_9" },
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = { cublas_codelet_func_9 },
+	.cuda_flags = { STARPU_CUDA_ASYNC },
+#endif
+	.nbuffers = 2,
+	.modes = { STARPU_RW, STARPU_R },
+};
+
 void launch_new_cg_iteration(struct cg_problem *problem)
 {
 	int ret;
@@ -213,30 +275,16 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* q = A d */
 	struct starpu_task *task4 = create_task(maskiter | 4UL);
-	task4->cl->cpu_funcs[0] = cpu_codelet_func_4;
-	task4->cl->cpu_funcs_name[0] = "cpu_codelet_func_4";
-	task4->cl->nbuffers = 3;
-	task4->cl->modes[0] = STARPU_R;
-	task4->cl->modes[1] = STARPU_R;
-	task4->cl->modes[2] = STARPU_W;
-
+	task4->cl = &cl4;
 	task4->handles[0] = problem->ds_matrixA;
 	task4->handles[1] = problem->ds_vecd;
 	task4->handles[2] = problem->ds_vecq;
 
 	/* alpha = delta_new / ( trans(d) q )*/
 	struct starpu_task *task5 = create_task(maskiter | 5UL);
-#ifdef STARPU_USE_CUDA
-	task5->cl->cuda_funcs[0] = cublas_codelet_func_5;
-#endif
-	task5->cl->cpu_funcs[0] = cpu_codelet_func_5;
-	task5->cl->cpu_funcs_name[0] = "cpu_codelet_func_5";
+	task5->cl = &cl5;
 	task5->cl_arg = problem;
 	task5->cl_arg_size = sizeof(*problem);
-	task5->cl->nbuffers = 2;
-	task5->cl->modes[0] = STARPU_R;
-	task5->cl->modes[1] = STARPU_R;
-
 	task5->handles[0] = problem->ds_vecd;
 	task5->handles[1] = problem->ds_vecq;
 
@@ -244,18 +292,9 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* x = x + alpha d */
 	struct starpu_task *task6 = create_task(maskiter | 6UL);
-#ifdef STARPU_USE_CUDA
-	task6->cl->cuda_funcs[0] = cublas_codelet_func_6;
-	task6->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
-#endif
-	task6->cl->cpu_funcs[0] = cpu_codelet_func_6;
-	task6->cl->cpu_funcs_name[0] = "cpu_codelet_func_6";
+	task6->cl = &cl6;
 	task6->cl_arg = problem;
 	task6->cl_arg_size = sizeof(*problem);
-	task6->cl->nbuffers = 2;
-	task6->cl->modes[0] = STARPU_RW;
-	task6->cl->modes[1] = STARPU_R;
-
 	task6->handles[0] = problem->ds_vecx;
 	task6->handles[1] = problem->ds_vecd;
 
@@ -263,18 +302,9 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* r = r - alpha q */
 	struct starpu_task *task7 = create_task(maskiter | 7UL);
-#ifdef STARPU_USE_CUDA
-	task7->cl->cuda_funcs[0] = cublas_codelet_func_7;
-	task7->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
-#endif
-	task7->cl->cpu_funcs[0] = cpu_codelet_func_7;
-	task7->cl->cpu_funcs_name[0] = "cpu_codelet_func_7";
+	task7->cl = &cl7;
 	task7->cl_arg = problem;
 	task7->cl_arg_size = sizeof(*problem);
-	task7->cl->nbuffers = 2;
-	task7->cl->modes[0] = STARPU_RW;
-	task7->cl->modes[1] = STARPU_R;
-
 	task7->handles[0] = problem->ds_vecr;
 	task7->handles[1] = problem->ds_vecq;
 
@@ -282,33 +312,18 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* update delta_* and compute beta */
 	struct starpu_task *task8 = create_task(maskiter | 8UL);
-#ifdef STARPU_USE_CUDA
-	task8->cl->cuda_funcs[0] = cublas_codelet_func_8;
-#endif
-	task8->cl->cpu_funcs[0] = cpu_codelet_func_8;
-	task8->cl->cpu_funcs_name[0] = "cpu_codelet_func_8";
+	task8->cl = &cl8;
 	task8->cl_arg = problem;
 	task8->cl_arg_size = sizeof(*problem);
-	task8->cl->nbuffers = 1;
-	task8->cl->modes[0] = STARPU_R;
 	task8->handles[0] = problem->ds_vecr;
 
 	starpu_tag_declare_deps((starpu_tag_t)(maskiter | 8UL), 1, (starpu_tag_t)(maskiter | 7UL));
 
 	/* d = r + beta d */
 	struct starpu_task *task9 = create_task(maskiter | 9UL);
-#ifdef STARPU_USE_CUDA
-	task9->cl->cuda_funcs[0] = cublas_codelet_func_9;
-	task9->cl->cuda_flags[0] = STARPU_CUDA_ASYNC;
-#endif
-	task9->cl->cpu_funcs[0] = cpu_codelet_func_9;
-	task9->cl->cpu_funcs_name[0] = "cpu_codelet_func_9";
+	task9->cl = &cl9;
 	task9->cl_arg = problem;
 	task9->cl_arg_size = sizeof(*problem);
-	task9->cl->nbuffers = 2;
-	task9->cl->modes[0] = STARPU_RW;
-	task9->cl->modes[1] = STARPU_R;
-
 	task9->handles[0] = problem->ds_vecd;
 	task9->handles[1] = problem->ds_vecr;
 
@@ -432,6 +447,10 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 	starpu_data_unregister(ds_vecr);
 	starpu_data_unregister(ds_vecd);
 	starpu_data_unregister(ds_vecq);
+
+	free(ptr_vecr);
+	free(ptr_vecd);
+	free(ptr_vecq);
 }
 
 

+ 4 - 0
examples/heat/heat.c

@@ -751,6 +751,10 @@ int main(int argc, char **argv)
 			result[TRANSLATE(i)] = Bformer[TRANSLATE(i)];
 		}
 
+		free(nzval);
+		free(colind);
+		free(rowptr);
+		free(B);
 	}
 	else
 	{

+ 2 - 1
examples/heat/heat.sh

@@ -23,7 +23,8 @@ PREFIX=$(dirname $0)
 
 $PREFIX/heat -shape 0
 $PREFIX/heat -shape 1
-$PREFIX/heat -shape 2
+# sometimes lead to pivot being 0
+#$PREFIX/heat -shape 2
 
 $PREFIX/heat -cg
 

+ 3 - 1
examples/lu/lu_example.c

@@ -422,13 +422,15 @@ int main(int argc, char **argv)
 		if (pivot)
 		{
 			pivot_saved_matrix(ipiv);
-			free(ipiv);
 		}
 
 		check_result();
 	}
 #endif
 
+	if (pivot)
+		free(ipiv);
+
 	starpu_free_flags(A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
 
 	starpu_cublas_shutdown();

+ 1 - 0
examples/lu/xlu_pivot.c

@@ -399,6 +399,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
+	starpu_data_unregister(dataA);
 	free(piv_description);
 
 	return ret;

+ 46 - 20
examples/mlr/mlr.c

@@ -50,7 +50,15 @@ static long sum;
 static void cl_params(struct starpu_task *task, double *parameters)
 {
 	int m, n, k;
-	starpu_codelet_unpack_args(task->cl_arg, &m, &n, &k);
+	int* vector_mn;
+	starpu_data_handle_t vector_mn_handle;
+
+	vector_mn = (int*)STARPU_VECTOR_GET_PTR(task->interfaces[0]);
+	m = vector_mn[0];
+	n = vector_mn[1];
+
+	starpu_codelet_unpack_args(task->cl_arg, &k);
+
 	parameters[0] = m;
 	parameters[1] = n;
 	parameters[2] = k;
@@ -61,10 +69,13 @@ void cpu_func(void *buffers[], void *cl_arg)
 {
 	long i;
 	int m,n,k;
-	starpu_codelet_unpack_args(cl_arg,
-			     	  &m,
-     			     	  &n,
-     			     	  &k);
+	int* vector_mn;
+
+	vector_mn = (int*)STARPU_VECTOR_GET_PTR(buffers[0]);
+	m = vector_mn[0];
+	n = vector_mn[1];
+
+	starpu_codelet_unpack_args(cl_arg, &k);
 
 	for(i=0; i < (long) (m*m*n); i++)
 		sum+=i;
@@ -123,7 +134,8 @@ static struct starpu_codelet cl_init =
 {
 	.cpu_funcs = { cpu_func },
 	.cpu_funcs_name = { "cpu_func" },
-	.nbuffers = 0,
+	.nbuffers = 1,
+	.modes = {STARPU_R},
 	.model = &cl_model_init,
 };
 
@@ -131,7 +143,8 @@ static struct starpu_codelet cl_final =
 {
 	.cpu_funcs = { cpu_func },
 	.cpu_funcs_name = { "cpu_func" },
-	.nbuffers = 0,
+	.nbuffers = 1,
+	.modes = {STARPU_R},
 	.model = &cl_model_final,
 };
 
@@ -147,29 +160,42 @@ int main(int argc, char **argv)
 
 	sum=0;
 	int m,n,k;
+	int* vector_mn = malloc( 2 * sizeof(int) );
+	starpu_data_handle_t vector_mn_handle;
+
+	starpu_vector_data_register( &vector_mn_handle,
+				     STARPU_MAIN_RAM,
+				     (uintptr_t)vector_mn, 2,
+				     sizeof(int) );
 
-        /* Giving pseudo-random values to the M,N,K parameters and inserting tasks */
-	for(i=0; i < 42; i++)
+	/* Giving pseudo-random values to the M,N,K parameters and inserting tasks */
+	for ( i = 0; i < 42; i++)
 	{
 		m = (int) ((rand() % 10)+1);
 		n = (int) ((rand() % 10)+1);
 		k = (int) ((rand() % 10)+1);
 
-		for(j=0; j < 42; j++)
+		/* To illustrate the usage, M and N are stored in a data handle */
+		starpu_data_acquire(vector_mn_handle, STARPU_W);
+		vector_mn[0] = m;
+		vector_mn[1] = n;
+		starpu_data_release(vector_mn_handle);
+
+		for ( j = 0; j < 42; j++)
 		{
-			starpu_insert_task(&cl_init,
-				   STARPU_VALUE, &m, sizeof(int),
-				   STARPU_VALUE, &n, sizeof(int),
-				   STARPU_VALUE, &k, sizeof(int),
-				   0);
-			starpu_insert_task(&cl_final,
-				   STARPU_VALUE, &m, sizeof(int),
-				   STARPU_VALUE, &n, sizeof(int),
-				   STARPU_VALUE, &k, sizeof(int),
-				   0);
+			starpu_insert_task( &cl_init,
+					    STARPU_R, vector_mn_handle,
+					    STARPU_VALUE, &k, sizeof(int),
+					    0 );
+			starpu_insert_task( &cl_final,
+					    STARPU_R, vector_mn_handle,
+					    STARPU_VALUE, &k, sizeof(int),
+					    0 );
 		}
 	}
 
+	starpu_data_unregister(vector_mn_handle);
+	free(vector_mn);
 	starpu_shutdown();
 
 	return 0;

+ 6 - 0
src/common/thread.c

@@ -73,6 +73,9 @@ int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_
 	void *tsd;
 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
+	MSG_process_ref(*thread);
+#endif
 	return 0;
 }
 
@@ -85,6 +88,9 @@ int starpu_pthread_join(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED, void **
 {
 #if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 14)
 	MSG_process_join(thread, 1000000);
+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
+	MSG_process_unref(thread);
+#endif
 #else
 	MSG_process_sleep(1);
 #endif

+ 16 - 0
src/datawizard/malloc.c

@@ -32,6 +32,7 @@
 #ifdef STARPU_SIMGRID
 #include <sys/mman.h>
 #include <fcntl.h>
+#include <smpi/smpi.h>
 #endif
 
 #ifndef O_BINARY
@@ -48,9 +49,12 @@ static int malloc_on_node_default_flags[STARPU_MAXNODES];
 
 /* This file is used for implementing "folded" allocation */
 #ifdef STARPU_SIMGRID
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
+/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
 static int bogusfile = -1;
 static unsigned long _starpu_malloc_simulation_fold;
 #endif
+#endif
 
 void starpu_malloc_set_align(size_t align)
 {
@@ -224,6 +228,10 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 #ifdef STARPU_SIMGRID
 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
 	{
+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
+		*A = SMPI_SHARED_MALLOC(dim);
+#else
+		/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
 		/* Use "folded" allocation: the same file is mapped several
 		 * times contiguously, to get a memory area one can read/write,
 		 * without consuming memory */
@@ -276,6 +284,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 			}
 			*A = buf;
 		}
+#endif
 	}
 	else
 #endif
@@ -442,7 +451,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 #ifdef STARPU_SIMGRID
 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
 	{
+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
+		SMPI_SHARED_FREE(A);
+#else
+		/* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
 		munmap(A, dim);
+#endif
 	}
 	else
 #endif
@@ -810,9 +824,11 @@ _starpu_malloc_init(unsigned dst_node)
 	disable_pinning = starpu_get_env_number("STARPU_DISABLE_PINNING");
 	malloc_on_node_default_flags[dst_node] = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT;
 #ifdef STARPU_SIMGRID
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
 	/* Reasonably "costless" */
 	_starpu_malloc_simulation_fold = starpu_get_env_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20;
 #endif
+#endif
 }
 
 void

+ 4 - 1
src/drivers/mpi/driver_mpi_common.c

@@ -80,7 +80,10 @@ int _starpu_mpi_common_mp_init()
 #endif
 
                 int thread_support;
-                STARPU_ASSERT(MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) == MPI_SUCCESS);
+                if (MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) != MPI_SUCCESS)
+		{
+			STARPU_ABORT_MSG("Cannot Initialize MPI !");
+		}
 
                 if (thread_support != required)
                 {