Browse Source

merge trunk@6869:6879

Nathalie Furmento 12 years ago
parent
commit
a0caabe241

+ 12 - 0
configure.ac

@@ -1111,6 +1111,18 @@ if test x$use_mpi = xyes; then
 	AC_DEFINE(STARPU_USE_MPI,[],[whether the StarPU MPI library is available])
 fi
 
+AC_MSG_CHECKING(whether communication statistics should be generated)
+AC_ARG_ENABLE(comm-stats, [AS_HELP_STRING([--enable-comm-stats],
+			[enable communication statistics (only valid with the StarPU MPI library])],
+			enable_comm_stats=$enableval, enable_comm_stats=no)
+AC_MSG_RESULT($enable_comm_stats)
+AC_SUBST(STATS, $enable_comm_stats)
+AC_SUBST(STARPU_COMM_STATS, $enable_comm_stats)
+
+if test x$enable_comm_stats = xyes; then
+        AC_DEFINE(STARPU_COMM_STATS, [1], [enable communication statistics])
+fi
+
 ###############################################################################
 #                                                                             #
 #                               StarPU-Top                                    #

+ 4 - 2
mpi/Makefile.am

@@ -76,7 +76,8 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined
 noinst_HEADERS =					\
 	starpu_mpi_private.h				\
 	starpu_mpi_fxt.h				\
-	starpu_mpi_insert_task_cache.h
+	starpu_mpi_insert_task_cache.h			\
+	starpu_mpi_stats.h
 
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 				\
@@ -89,7 +90,8 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi_datatype.c				\
 	starpu_mpi_insert_task.c			\
 	starpu_mpi_insert_task_cache.c			\
-	starpu_mpi_collective.c
+	starpu_mpi_collective.c				\
+	starpu_mpi_stats.c
 
 ###################
 # Stencil example #

+ 2 - 1
mpi/examples/cholesky/mpi_cholesky.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -213,6 +213,7 @@ int main(int argc, char **argv)
 		  {
 		       dblockx = nodes/factor;
 		       dblocky = factor;
+		       break;
 		  }
 	     }
 	}

+ 12 - 2
mpi/starpu_mpi.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,6 +21,7 @@
 //#define STARPU_MPI_VERBOSE	1
 #include <starpu_mpi_private.h>
 #include <starpu_profiling.h>
+#include <starpu_mpi_stats.h>
 
 /* TODO find a better way to select the polling method (perhaps during the
  * configuration) */
@@ -63,6 +64,8 @@ static void starpu_mpi_isend_func(struct _starpu_mpi_req *req)
 
 	starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
 
+	_starpu_mpi_comm_amounts_inc(req->comm, req->srcdst, req->datatype);
+
         req->ret = MPI_Isend(ptr, 1, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
         STARPU_ASSERT(req->ret == MPI_SUCCESS);
 
@@ -765,6 +768,10 @@ static void _starpu_mpi_add_sync_point_in_fxt(void)
 static
 int _starpu_mpi_initialize(int initialize_mpi, int *rank, int *world_size)
 {
+#ifdef STARPU_COMM_STATS
+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-comm-stats, which slows down a bit\n");
+#endif
+
 	_STARPU_PTHREAD_MUTEX_INIT(&mutex, NULL);
 	_STARPU_PTHREAD_COND_INIT(&cond_progression, NULL);
 	_STARPU_PTHREAD_COND_INIT(&cond_finished, NULL);
@@ -800,7 +807,7 @@ int _starpu_mpi_initialize(int initialize_mpi, int *rank, int *world_size)
 #endif
 
 	_starpu_mpi_add_sync_point_in_fxt();
-
+	_starpu_mpi_comm_amounts_init();
 	return 0;
 }
 
@@ -834,6 +841,9 @@ int starpu_mpi_shutdown(void)
 	_starpu_mpi_req_list_delete(detached_requests);
 	_starpu_mpi_req_list_delete(new_requests);
 
+	_starpu_mpi_comm_amounts_display();
+	_starpu_mpi_comm_amounts_free();
+
 	return 0;
 }
 

+ 105 - 0
mpi/starpu_mpi_stats.c

@@ -0,0 +1,105 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi_stats.h>
+#include <common/config.h>
+#include <stdio.h>
+//#define STARPU_MPI_VERBOSE	1
+#include <starpu_mpi_private.h>
+
+/* measure the amount of data transfers between each pair of MPI nodes */
+#ifdef STARPU_COMM_STATS
+static size_t **comm_amount;
+static int world_size;
+#endif /* STARPU_COMM_STATS */
+
+void _starpu_mpi_comm_amounts_init()
+{
+#ifdef STARPU_COMM_STATS
+	int i;
+
+	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+	_STARPU_MPI_DEBUG("allocating for %d nodes\n", world_size);
+
+	comm_amount = (size_t **) calloc(1, world_size * sizeof(size_t *));
+	for(i=0 ; i<world_size ; i++)
+	{
+		comm_amount[i] = (size_t *) calloc(1, world_size * sizeof(size_t));
+	}
+#endif /* STARPU_COMM_STATS */
+}
+
+void _starpu_mpi_comm_amounts_free()
+{
+#ifdef STARPU_COMM_STATS
+	int i;
+	for(i=0 ; i<world_size ; i++)
+	{
+		free(comm_amount[i]);
+	}
+	free(comm_amount);
+#endif /* STARPU_COMM_STATS */
+}
+
+void _starpu_mpi_comm_amounts_inc(MPI_Comm comm  __attribute__ ((unused)), 
+				  unsigned dst  __attribute__ ((unused)), MPI_Datatype datatype  __attribute__ ((unused)))
+{
+#ifdef STARPU_COMM_STATS
+	int src, size;
+
+	MPI_Comm_rank(comm, &src);
+	MPI_Type_size(datatype, &size);
+
+	_STARPU_MPI_DEBUG("adding %d from %d to %d\n", size, src, dst);
+
+	comm_amount[src][dst] += size;
+#endif /* STARPU_COMM_STATS */
+}
+
+void _starpu_mpi_comm_amounts_display()
+{
+#ifdef STARPU_COMM_STATS
+	unsigned src, dst;
+
+	size_t sum = 0;
+
+	for (dst = 0; dst < world_size; dst++)
+		for (src = 0; src < world_size; src++)
+		{
+			sum += comm_amount[src][dst];
+		}
+
+	fprintf(stderr, "\nCommunication transfers stats:\nTOTAL transfers %f B\t%f MB\n", (float)sum, (float)sum/1024/1024);
+
+	for (dst = 0; dst < world_size; dst++)
+		for (src = 0; src < world_size; src++)
+		{
+			if (comm_amount[src][dst])
+			{
+				fprintf(stderr, "\t%d <-> %d\t%f B\t%f MB\n",
+					src, dst, (float)comm_amount[src][dst] + (float)comm_amount[dst][src],
+					((float)comm_amount[src][dst] + (float)comm_amount[dst][src])/(1024*1024));
+				fprintf(stderr, "\t\t%d -> %d\t%f B\t%f MB\n",
+					src, dst, (float)comm_amount[src][dst],
+					((float)comm_amount[src][dst])/(1024*1024));
+				fprintf(stderr, "\t\t%d -> %d\t%f B\t%f MB\n",
+					dst, src, (float)comm_amount[dst][src],
+					((float)comm_amount[dst][src])/(1024*1024));
+			}
+		}
+#endif /* STARPU_COMM_STATS */
+}
+

+ 24 - 0
mpi/starpu_mpi_stats.h

@@ -0,0 +1,24 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <mpi.h>
+
+void _starpu_mpi_comm_amounts_init();
+void _starpu_mpi_comm_amounts_free();
+void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned dst, MPI_Datatype datatype);
+void _starpu_mpi_comm_amounts_display();
+

+ 14 - 24
src/core/task.c

@@ -50,38 +50,20 @@ void starpu_task_init(struct starpu_task *task)
 {
 	STARPU_ASSERT(task);
 
-	task->cl = NULL;
-	task->cl_arg = NULL;
-	task->cl_arg_size = 0;
-
-	task->callback_func = NULL;
-	task->callback_arg = NULL;
+	/* As most of the fields must be initialised at NULL, let's put 0
+	 * everywhere */
+	memset(task, 0, sizeof(struct starpu_task));
 
+	/* Now we can initialise fields which recquire custom value */
 	task->priority = STARPU_DEFAULT_PRIO;
-	task->use_tag = 0;
-	task->synchronous = 0;
-
-	task->execute_on_a_specific_worker = 0;
-
-	task->bundle = NULL;
 
 	task->detach = 1;
 
-	/* by default, we do not let StarPU free the task structure since
-	 * starpu_task_init is likely to be used only for statically allocated
-	 * tasks */
-	task->destroy = 0;
-
-	task->regenerate = 0;
-
 	task->status = STARPU_TASK_INVALID;
 
-	task->profiling_info = NULL;
-
 	task->predicted = NAN;
 	task->predicted_transfer = NAN;
 
-	task->starpu_private = NULL;
 	task->magic = 42;
 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
 	
@@ -124,7 +106,7 @@ struct starpu_task * __attribute__((malloc)) starpu_task_create(void)
 {
 	struct starpu_task *task;
 
-	task = (struct starpu_task *) calloc(1, sizeof(struct starpu_task));
+	task = (struct starpu_task *) malloc(sizeof(struct starpu_task));
 	STARPU_ASSERT(task);
 
 	starpu_task_init(task);
@@ -544,6 +526,15 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	if (task->cl->power_model)
 		_starpu_load_perfmodel(task->cl->power_model);
 
+	/* We retain handle reference count */
+	unsigned i;
+	for (i=0; i<task->cl->nbuffers; i++) {
+		starpu_data_handle_t handle = task->handles[i];
+		_starpu_spin_lock(&handle->header_lock);
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
+	}
+
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	_starpu_increment_nsubmitted_tasks();
 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
@@ -551,7 +542,6 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	j->submitted = 1;
 	_starpu_increment_nready_tasks();
 
-	unsigned i;
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	{
 		j->ordered_buffers[i].handle = j->task->handles[i];

+ 13 - 2
src/debug/traces/starpu_fxt.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -686,6 +686,17 @@ void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 	fprintf(activity_file, "cnt_submitted\t%f\t%lu\n", current_timestamp, nsubmitted);
 }
 
+static void handle_codelet_tag(struct fxt_ev_64 *ev)
+{
+	uint64_t tag;
+	unsigned long job;
+
+	tag = ev->param[0];
+	job = ev->param[1];
+
+	_starpu_fxt_dag_add_tag(tag, job);
+}
+
 static void handle_codelet_tag_deps(struct fxt_ev_64 *ev)
 {
 	uint64_t child;
@@ -960,7 +971,7 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 				break;
 
 			case _STARPU_FUT_TAG:
-				/* XXX */
+				handle_codelet_tag(&ev);
 				break;
 
 			case _STARPU_FUT_TAG_DEPS:

+ 2 - 1
src/debug/traces/starpu_fxt.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -40,6 +40,7 @@
 
 void _starpu_fxt_dag_init(char *dag_filename);
 void _starpu_fxt_dag_terminate(void);
+void _starpu_fxt_dag_add_tag(uint64_t tag, unsigned long job_id);
 void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father);
 void _starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color);
 void _starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ);

+ 8 - 1
src/debug/traces/starpu_fxt_dag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -64,6 +64,13 @@ void _starpu_fxt_dag_terminate(void)
 	fclose(out_file);
 }
 
+void _starpu_fxt_dag_add_tag(uint64_t tag, unsigned long job_id)
+{
+	if (out_file)
+		fprintf(out_file, "\t \"tag_%llx\"->\"task_%llx\"->\"tag_%llx\"\n",
+			(unsigned long long)tag, (unsigned long long)job_id, (unsigned long long) tag);
+}
+
 void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
 {
 	if (out_file)