Browse Source

MPI: New flag STARPU_NODE_SELECTION_POLICY to specify a policy for
selecting a node to execute the codelet when several nodes own data in
W mode.

Nathalie Furmento 10 years ago
parent
commit
1b33209807

+ 4 - 0
ChangeLog

@@ -40,6 +40,10 @@ New features:
 	  defined for the given data)
 	  defined for the given data)
         - New functions starpu_mpi_task_build() and
         - New functions starpu_mpi_task_build() and
   	  starpu_mpi_task_post_build()
   	  starpu_mpi_task_post_build()
+        - New flag STARPU_NODE_SELECTION_POLICY to specify a policy for
+          selecting a node to execute the codelet when several nodes
+	  own data in W mode.
+
   * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
   * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
     let starpu commute write accesses.
     let starpu commute write accesses.
   * Out-of-core support, through registration of disk areas as additional memory
   * Out-of-core support, through registration of disk areas as additional memory

+ 12 - 1
doc/doxygen/chapters/api/mpi.doxy

@@ -250,7 +250,7 @@ The internal algorithm is as follows:
         Find out which MPI node is going to execute the codelet.
         Find out which MPI node is going to execute the codelet.
         <ul>
         <ul>
             <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
             <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
-            <li>If there is several nodes owning data in ::STARPU_W node, the one selected will be the one having the least data in R mode so as to minimize the amount of data to be transfered;
+            <li>If there is several nodes owning data in ::STARPU_W node, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_default_policy())
             <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
             <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
             <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
             <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
         </ul>
         </ul>
@@ -299,6 +299,17 @@ owner if needed. At least the target node and the owner have to call
 the function. On reception, the \p callback function is called with
 the function. On reception, the \p callback function is called with
 the argument \p arg.
 the argument \p arg.
 
 
+\fn char *starpu_mpi_node_selection_get_default_policy()
+\ingroup API_MPI_Support
+Return the current default policy used to select the node which will execute the codelet
+
+\fn int starpu_mpi_node_selection_set_default_policy(char *policy)
+\ingroup API_MPI_Support
+Set the current default policy used to select the node which will
+execute the codelet. The policy "node_with_most_R_data" selects the
+node having the most data in R mode so as to minimize the amount of
+data to be transfered.
+
 @name Collective Operations
 @name Collective Operations
 \anchor MPICollectiveOperations
 \anchor MPICollectiveOperations
 \ingroup API_MPI_Support
 \ingroup API_MPI_Support

+ 1 - 0
include/starpu_task_util.h

@@ -53,6 +53,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_TAG_ONLY          (19<<16)
 #define STARPU_TAG_ONLY          (19<<16)
 #define STARPU_POSSIBLY_PARALLEL    (20<<16)
 #define STARPU_POSSIBLY_PARALLEL    (20<<16)
 #define STARPU_WORKER_ORDER      (21<<16)
 #define STARPU_WORKER_ORDER      (21<<16)
+#define STARPU_NODE_SELECTION_POLICY (22<<16)
 
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 3 - 0
mpi/include/starpu_mpi.h

@@ -78,6 +78,9 @@ void starpu_mpi_set_communication_tag(int tag);
 
 
 void starpu_mpi_data_register(starpu_data_handle_t data_handle, int tag, int rank);
 void starpu_mpi_data_register(starpu_data_handle_t data_handle, int tag, int rank);
 
 
+char *starpu_mpi_node_selection_get_default_policy();
+int starpu_mpi_node_selection_set_default_policy(char *policy);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 26 - 2
mpi/src/starpu_mpi_select_node.c

@@ -24,13 +24,27 @@
 #include <starpu_mpi_task_insert.h>
 #include <starpu_mpi_task_insert.h>
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
 
 
-int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+static char *_default_policy = "node_with_most_R_data";
+
+char *starpu_mpi_node_selection_get_default_policy()
+{
+	return _default_policy;
+}
+
+int starpu_mpi_node_selection_set_default_policy(char *policy)
+{
+	strcpy(_default_policy, policy);
+	return 0;
+}
+
+int _starpu_mpi_select_node_with_most_R_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
 {
 {
 	size_t *size_on_nodes;
 	size_t *size_on_nodes;
 	size_t max_size;
 	size_t max_size;
 	int i;
 	int i;
 	int xrank;
 	int xrank;
 
 
+	(void)me;
 	size_on_nodes = (size_t *)calloc(1, nb_nodes * sizeof(size_t));
 	size_on_nodes = (size_t *)calloc(1, nb_nodes * sizeof(size_t));
 
 
 	for(i= 0 ; i<nb_data ; i++)
 	for(i= 0 ; i<nb_data ; i++)
@@ -44,7 +58,6 @@ int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *desc
 		}
 		}
 	}
 	}
 
 
-	// We select the node which has the most data in R mode
 	max_size = 0;
 	max_size = 0;
 	for(i=0 ; i<nb_nodes ; i++)
 	for(i=0 ; i<nb_nodes ; i++)
 	{
 	{
@@ -57,3 +70,14 @@ int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *desc
 
 
 	return xrank;
 	return xrank;
 }
 }
+
+int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, char *policy)
+{
+	char *current_policy = policy ? policy : _default_policy;
+	if (current_policy == NULL)
+		STARPU_ABORT_MSG("Node selection policy MUST be defined\n");
+	if (strcmp(current_policy, "node_with_most_R_data") == 0)
+		return _starpu_mpi_select_node_with_most_R_data(me, nb_nodes, descr, nb_data);
+	else
+		STARPU_ABORT_MSG("Node selection policy <%s> unknown\n", current_policy);
+}

+ 1 - 1
mpi/src/starpu_mpi_select_node.h

@@ -23,7 +23,7 @@
 extern "C" {
 extern "C" {
 #endif
 #endif
 
 
-int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data);
+int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, char *policy);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }

+ 7 - 1
mpi/src/starpu_mpi_task_insert.c

@@ -206,11 +206,13 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 	int nb_allocated_data = 16;
 	int nb_allocated_data = 16;
 	struct starpu_data_descr *descrs;
 	struct starpu_data_descr *descrs;
 	int nb_data;
 	int nb_data;
+	char *select_node_policy = NULL;
 
 
 	descrs = (struct starpu_data_descr *)malloc(nb_allocated_data * sizeof(struct starpu_data_descr));
 	descrs = (struct starpu_data_descr *)malloc(nb_allocated_data * sizeof(struct starpu_data_descr));
 	nb_data = 0;
 	nb_data = 0;
 	*do_execute = -1;
 	*do_execute = -1;
 	*xrank = -1;
 	*xrank = -1;
+
 	va_copy(varg_list_copy, varg_list);
 	va_copy(varg_list_copy, varg_list);
 	while ((arg_type = va_arg(varg_list_copy, int)) != 0)
 	while ((arg_type = va_arg(varg_list_copy, int)) != 0)
 	{
 	{
@@ -381,6 +383,10 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		{
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
 		}
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			select_node_policy = va_arg(varg_list, char *);
+		}
 		else
 		else
 		{
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -393,7 +399,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 	{
 	{
 		// We need to find out which node is going to execute the codelet.
 		// We need to find out which node is going to execute the codelet.
 		_STARPU_MPI_DISP("Different nodes are owning W data. Need to specify which node is going to execute the codelet, using STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA\n");
 		_STARPU_MPI_DISP("Different nodes are owning W data. Need to specify which node is going to execute the codelet, using STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA\n");
-		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data);
+		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy);
 		*do_execute = (me == *xrank);
 		*do_execute = (me == *xrank);
 	}
 	}
 	else
 	else

+ 8 - 0
src/util/starpu_task_insert_utils.c

@@ -154,6 +154,10 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 		{
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			(void)va_arg(varg_list, char *);
+		}
 		else
 		else
 		{
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -441,6 +445,10 @@ void _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 			starpu_tag_t tag = va_arg(varg_list, starpu_tag_t);
 			starpu_tag_t tag = va_arg(varg_list, starpu_tag_t);
 			(*task)->tag_id = tag;
 			(*task)->tag_id = tag;
 		}
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			(void)va_arg(varg_list, char *);
+		}
 		else
 		else
 		{
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);