Forráskód Böngészése

Add STARPU_SPECIFIC_NODE_LOCAL_OR_CPU node specification

Samuel Thibault 5 éve
szülő
commit
27908c8e12

+ 17 - 0
doc/doxygen/chapters/310_data_management.doxy

@@ -974,4 +974,21 @@ when the kernel does not make so many accesses to the second data, and thus data
 being remote e.g. over a PCI bus is not a performance problem, and avoids
 filling the fast local memory with data which does not need the performance.
 
+In cases where the kernel is fine with some data being either local or in the
+main memory, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU can be used. StarPU will then
+be free to leave the data in the main memory and let the kernel access it from
+accelerators, or to move it to the accelerator before starting the kernel, for
+instance:
+
+\code{.c}
+struct starpu_codelet cl =
+{
+	.cuda_funcs = { kernel },
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_R},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU},
+};
+\endcode
+
 */

+ 25 - 2
include/starpu_task.h

@@ -229,15 +229,38 @@ typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
 #define STARPU_VARIABLE_NBUFFERS (-1)
 
 /**
-   Value to be set in the field starpu_codelet::nodes to request
+   Value to be set in the starpu_codelet::nodes field to request
+   StarPU to put the data in local memory of the worker running the task (this
+   is the default behavior).
+*/
+#define STARPU_SPECIFIC_NODE_LOCAL (-1)
+
+/**
+   Value to be set in the starpu_codelet::nodes field to request
    StarPU to put the data in CPU-accessible memory (and let StarPU
    choose the NUMA node).
 */
-#define STARPU_SPECIFIC_NODE_LOCAL (-1)
 #define STARPU_SPECIFIC_NODE_CPU (-2)
+
+/**
+   Value to be set in the starpu_codelet::nodes field to request
+   StarPU to put the data in some slow memory.
+*/
 #define STARPU_SPECIFIC_NODE_SLOW (-3)
+
+/**
+   Value to be set in the starpu_codelet::nodes field to request
+   StarPU to put the data in some fast memory.
+*/
 #define STARPU_SPECIFIC_NODE_FAST (-4)
 
+/**
+   Value to be set in the starpu_codelet::nodes field to let StarPU decide
+   whether to put the data in the local memory of the worker running the task,
+   or in CPU-accessible memory (and let StarPU choose the NUMA node).
+*/
+#define STARPU_SPECIFIC_NODE_LOCAL_OR_CPU (-5)
+
 struct starpu_task;
 
 /**

+ 1 - 1
src/core/task.c

@@ -787,7 +787,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 			/* Make sure handles are not partitioned */
 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task");
 			/* Make sure the specified node exists */
-			STARPU_ASSERT_MSG(node == STARPU_SPECIFIC_NODE_LOCAL || node == STARPU_SPECIFIC_NODE_CPU || node == STARPU_SPECIFIC_NODE_SLOW || (node >= 0 && node < (int) starpu_memory_nodes_get_count()), "The codelet-specified memory node does not exist");
+			STARPU_ASSERT_MSG(node == STARPU_SPECIFIC_NODE_LOCAL || node == STARPU_SPECIFIC_NODE_CPU || node == STARPU_SPECIFIC_NODE_SLOW || node == STARPU_SPECIFIC_NODE_LOCAL_OR_CPU || (node >= 0 && node < (int) starpu_memory_nodes_get_count()), "The codelet-specified memory node does not exist");
 			/* Provide the home interface for now if any,
 			 * for can_execute hooks */
 			if (handle->home_node != -1)

+ 18 - 0
src/core/topology.c

@@ -318,6 +318,15 @@ int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index,
 		// TODO: rather leave in DDR
 		node = local_node;
 		break;
+	case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU:
+		if (task->handles[index]->per_node[local_node].state != STARPU_INVALID) {
+			/* It is here already, rather access it from here */
+			node = local_node;
+		} else {
+			/* It is not here already, do not bother moving it */
+			node = STARPU_MAIN_RAM;
+		}
+		break;
 	}
 	return node;
 }
@@ -342,6 +351,15 @@ int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned inde
 		// TODO: rather leave in DDR
 		node = local_node;
 		break;
+	case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU:
+		if (task->handles[index]->per_node[local_node].state != STARPU_INVALID) {
+			/* It is here already, rather access it from here */
+			node = local_node;
+		} else {
+			/* It is not here already, do not bother moving it */
+			node = STARPU_MAIN_RAM;
+		}
+		break;
 	}
 	return node;
 }

+ 37 - 1
tests/datawizard/specific_node.c

@@ -32,6 +32,40 @@
 
 unsigned data, data2;
 
+void specific2_kernel(void *descr[], void *arg)
+{
+	(void)arg;
+	int node = starpu_task_get_current_data_node(0);
+	STARPU_ASSERT(node >= 0);
+	STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM);
+	unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]);
+
+	if (node == STARPU_MAIN_RAM)
+		STARPU_ASSERT(dataptr == &data);
+
+	(*dataptr)++;
+
+	node = starpu_task_get_current_data_node(1);
+	STARPU_ASSERT(node >= 0);
+	STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM
+			|| node == starpu_worker_get_local_memory_node());
+	dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[1]);
+
+	if (node == STARPU_MAIN_RAM)
+		STARPU_ASSERT(dataptr == &data2);
+}
+
+static struct starpu_codelet specific2_cl =
+{
+	.cpu_funcs = {specific2_kernel},
+	.cuda_funcs = {specific2_kernel},
+	.opencl_funcs = {specific2_kernel},
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_RW},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU},
+};
+
 void specific_kernel(void *descr[], void *arg)
 {
 	(void)arg;
@@ -128,8 +162,10 @@ int main(void)
 	for (i = 0; i < ntasks; i++)
 	{
 		struct starpu_task *task = starpu_task_create();
-		if (i%2)
+		if (i%3 == 0)
 			task->cl = &specific_cl;
+		else if (i%3 == 1)
+			task->cl = &specific2_cl;
 		else
 			task->cl = &cl;
 		task->handles[0] = data_handle;