Browse Source

port r11334 from 1.1: Prefetching is now done for all schedulers when it can be done whatever the scheduling decision.

Samuel Thibault 11 years ago
parent
commit
d9bfb2bacc
4 changed files with 74 additions and 0 deletions
  1. 2 0
      ChangeLog
  2. 18 0
      src/core/sched_policy.c
  3. 43 0
      src/core/topology.c
  4. 11 0
      src/core/workers.h

+ 2 - 0
ChangeLog

@@ -52,6 +52,8 @@ New features:
     scheduled.
   * Small CUDA allocations (<= 4MiB) are now batched to avoid the huge
     cudaMalloc overhead.
+  * Prefetching is now done for all schedulers when it can be done whatever
+    the scheduling decision.
 
 Small features:
   * New functions starpu_data_acquire_cb_sequential_consistency() and

+ 18 - 0
src/core/sched_policy.c

@@ -421,6 +421,24 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 	}
 	else
 	{
+		struct _starpu_machine_config *config = _starpu_get_machine_config();
+
+		/* When a task can only be executed on a given arch and we have
+		 * only one memory node for that arch, we can systematically
+		 * prefetch before the scheduling decision. */
+		if (starpu_get_prefetch_flag()) {
+			if (task->cl->where == STARPU_CPU && config->cpus_nodeid >= 0)
+				starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
+			else if (task->cl->where == STARPU_CUDA && config->cuda_nodeid >= 0)
+				starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
+			else if (task->cl->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
+				starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
+			else if (task->cl->where == STARPU_MIC && config->mic_nodeid >= 0)
+				starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
+			else if (task->cl->where == STARPU_SCC && config->scc_nodeid >= 0)
+				starpu_prefetch_task_input_on_node(task, config->scc_nodeid);
+		}
+
 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
 		/* check out if there are any workers in the context */
 		starpu_pthread_mutex_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);

+ 43 - 0
src/core/topology.c

@@ -1313,6 +1313,7 @@ int
 _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
 {
 	int ret;
+	int i;
 
 	ret = _starpu_init_machine_config(config, no_mp_config);
 	if (ret)
@@ -1323,6 +1324,48 @@ _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
 
 	_starpu_init_workers_binding(config, no_mp_config);
 
+	config->cpus_nodeid = -1;
+	config->cuda_nodeid = -1;
+	config->opencl_nodeid = -1;
+	for (i = 0; i < starpu_worker_get_count(); i++)
+	{
+		switch (starpu_worker_get_type(i))
+		{
+			case STARPU_CPU_WORKER:
+				if (config->cpus_nodeid == -1)
+					config->cpus_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->cpus_nodeid != starpu_worker_get_memory_node(i))
+					config->cpus_nodeid = -2;
+				break;
+			case STARPU_CUDA_WORKER:
+				if (config->cuda_nodeid == -1)
+					config->cuda_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->cuda_nodeid != starpu_worker_get_memory_node(i))
+					config->cuda_nodeid = -2;
+				break;
+			case STARPU_OPENCL_WORKER:
+				if (config->opencl_nodeid == -1)
+					config->opencl_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->opencl_nodeid != starpu_worker_get_memory_node(i))
+					config->opencl_nodeid = -2;
+				break;
+			case STARPU_MIC_WORKER:
+				if (config->mic_nodeid == -1)
+					config->mic_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->mic_nodeid != starpu_worker_get_memory_node(i))
+					config->mic_nodeid = -2;
+				break;
+			case STARPU_SCC_WORKER:
+				if (config->scc_nodeid == -1)
+					config->scc_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->scc_nodeid != starpu_worker_get_memory_node(i))
+					config->scc_nodeid = -2;
+				break;
+			case STARPU_ANY_WORKER:
+				STARPU_ASSERT(0);
+		}
+	}
+
 	return 0;
 }
 

+ 11 - 0
src/core/workers.h

@@ -271,6 +271,17 @@ struct _starpu_machine_config
 	/* Which SCC do we use? */
 	int current_scc_deviceid;
 
+	/* Memory node for cpus, if only one */
+	int cpus_nodeid;
+	/* Memory node for CUDA, if only one */
+	int cuda_nodeid;
+	/* Memory node for OpenCL, if only one */
+	int opencl_nodeid;
+	/* Memory node for MIC, if only one */
+	int mic_nodeid;
+	/* Memory node for SCC, if only one */
+	int scc_nodeid;
+
 	/* Basic workers : each of this worker is running its own driver and
 	 * can be combined with other basic workers. */
 	struct _starpu_worker workers[STARPU_NMAXWORKERS];