瀏覽代碼

MPI: New flag STARPU_NODE_SELECTION_POLICY to specify a policy for
selecting a node to execute the codelet when several nodes own data in
W mode.

Nathalie Furmento 10 年之前
父節點
當前提交
1b33209807

+ 4 - 0
ChangeLog

@@ -40,6 +40,10 @@ New features:
 	  defined for the given data)
         - New functions starpu_mpi_task_build() and
   	  starpu_mpi_task_post_build()
+        - New flag STARPU_NODE_SELECTION_POLICY to specify a policy for
+          selecting a node to execute the codelet when several nodes
+	  own data in W mode.
+
   * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
     let starpu commute write accesses.
   * Out-of-core support, through registration of disk areas as additional memory

+ 12 - 1
doc/doxygen/chapters/api/mpi.doxy

@@ -250,7 +250,7 @@ The internal algorithm is as follows:
         Find out which MPI node is going to execute the codelet.
         <ul>
             <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
-            <li>If there is several nodes owning data in ::STARPU_W node, the one selected will be the one having the least data in R mode so as to minimize the amount of data to be transfered;
+            <li>If there is several nodes owning data in ::STARPU_W node, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_default_policy())
             <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
             <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
         </ul>
@@ -299,6 +299,17 @@ owner if needed. At least the target node and the owner have to call
 the function. On reception, the \p callback function is called with
 the argument \p arg.
 
+\fn char *starpu_mpi_node_selection_get_default_policy()
+\ingroup API_MPI_Support
+Return the current default policy used to select the node which will execute the codelet
+
+\fn int starpu_mpi_node_selection_set_default_policy(char *policy)
+\ingroup API_MPI_Support
+Set the current default policy used to select the node which will
+execute the codelet. The policy "node_with_most_R_data" selects the
+node having the most data in R mode so as to minimize the amount of
+data to be transfered.
+
 @name Collective Operations
 \anchor MPICollectiveOperations
 \ingroup API_MPI_Support

+ 1 - 0
include/starpu_task_util.h

@@ -53,6 +53,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_TAG_ONLY          (19<<16)
 #define STARPU_POSSIBLY_PARALLEL    (20<<16)
 #define STARPU_WORKER_ORDER      (21<<16)
+#define STARPU_NODE_SELECTION_POLICY (22<<16)
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 3 - 0
mpi/include/starpu_mpi.h

@@ -78,6 +78,9 @@ void starpu_mpi_set_communication_tag(int tag);
 
 void starpu_mpi_data_register(starpu_data_handle_t data_handle, int tag, int rank);
 
+char *starpu_mpi_node_selection_get_default_policy();
+int starpu_mpi_node_selection_set_default_policy(char *policy);
+
 #ifdef __cplusplus
 }
 #endif

+ 26 - 2
mpi/src/starpu_mpi_select_node.c

@@ -24,13 +24,27 @@
 #include <starpu_mpi_task_insert.h>
 #include <datawizard/coherency.h>
 
-int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+static char *_default_policy = "node_with_most_R_data";
+
+char *starpu_mpi_node_selection_get_default_policy()
+{
+	return _default_policy;
+}
+
+int starpu_mpi_node_selection_set_default_policy(char *policy)
+{
+	strcpy(_default_policy, policy);
+	return 0;
+}
+
+int _starpu_mpi_select_node_with_most_R_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
 {
 	size_t *size_on_nodes;
 	size_t max_size;
 	int i;
 	int xrank;
 
+	(void)me;
 	size_on_nodes = (size_t *)calloc(1, nb_nodes * sizeof(size_t));
 
 	for(i= 0 ; i<nb_data ; i++)
@@ -44,7 +58,6 @@ int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *desc
 		}
 	}
 
-	// We select the node which has the most data in R mode
 	max_size = 0;
 	for(i=0 ; i<nb_nodes ; i++)
 	{
@@ -57,3 +70,14 @@ int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *desc
 
 	return xrank;
 }
+
+int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, char *policy)
+{
+	char *current_policy = policy ? policy : _default_policy;
+	if (current_policy == NULL)
+		STARPU_ABORT_MSG("Node selection policy MUST be defined\n");
+	if (strcmp(current_policy, "node_with_most_R_data") == 0)
+		return _starpu_mpi_select_node_with_most_R_data(me, nb_nodes, descr, nb_data);
+	else
+		STARPU_ABORT_MSG("Node selection policy <%s> unknown\n", current_policy);
+}

+ 1 - 1
mpi/src/starpu_mpi_select_node.h

@@ -23,7 +23,7 @@
 extern "C" {
 #endif
 
-int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data);
+int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, char *policy);
 
 #ifdef __cplusplus
 }

+ 7 - 1
mpi/src/starpu_mpi_task_insert.c

@@ -206,11 +206,13 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 	int nb_allocated_data = 16;
 	struct starpu_data_descr *descrs;
 	int nb_data;
+	char *select_node_policy = NULL;
 
 	descrs = (struct starpu_data_descr *)malloc(nb_allocated_data * sizeof(struct starpu_data_descr));
 	nb_data = 0;
 	*do_execute = -1;
 	*xrank = -1;
+
 	va_copy(varg_list_copy, varg_list);
 	while ((arg_type = va_arg(varg_list_copy, int)) != 0)
 	{
@@ -381,6 +383,10 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			select_node_policy = va_arg(varg_list, char *);
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -393,7 +399,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 	{
 		// We need to find out which node is going to execute the codelet.
 		_STARPU_MPI_DISP("Different nodes are owning W data. Need to specify which node is going to execute the codelet, using STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA\n");
-		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data);
+		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy);
 		*do_execute = (me == *xrank);
 	}
 	else

+ 8 - 0
src/util/starpu_task_insert_utils.c

@@ -154,6 +154,10 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			(void)va_arg(varg_list, char *);
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -441,6 +445,10 @@ void _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 			starpu_tag_t tag = va_arg(varg_list, starpu_tag_t);
 			(*task)->tag_id = tag;
 		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			(void)va_arg(varg_list, char *);
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);