瀏覽代碼

to avoid calling starpu_worker_get_memory_node, simply add variants of _starpu_handle_needs_conversion_task and _starpu_create_conversion_task which take the arch itself

Samuel Thibault 12 年之前
父節點
當前提交
f5363c873c
共有 5 個文件被更改,包括 25 次插入50 次删除
  1. 6 44
      src/core/perfmodel/perfmodel.c
  2. 6 2
      src/core/sched_policy.c
  3. 3 0
      src/core/sched_policy.h
  4. 6 3
      src/core/task.c
  5. 4 1
      src/core/task.h

+ 6 - 44
src/core/perfmodel/perfmodel.c

@@ -225,42 +225,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 {
 	unsigned i;
 	double sum = 0.0;
-	int node;
-
-	/* Quickly check for needed conversion before looking extensively.  */
-	for (i = 0; i < task->cl->nbuffers; i++)
-		if (!_starpu_data_is_multiformat_handle(task->handles[i]))
-			break;
-	if (i == task->cl->nbuffers)
-		return 0.0;
-
-	/* We need to get one node per archtype. This is kinda ugly,
-	 * but it does the job.
-	 * XXX : Should we return 0 if there are no devices ?
-	 * (err != 1 && err != -ERANGE)
-	 */
-#ifdef STARPU_USE_CPU
-	int cpu_worker, cpu_node;
-	cpu_worker = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0);
-	if (cpu_worker == -1)
-		return 0.0;
-	cpu_node = starpu_worker_get_memory_node(cpu_worker);
-#endif
-#ifdef STARPU_USE_CUDA
-	int cuda_worker, cuda_node;
-	cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0);
-	if (cuda_worker == -1)
-		return 0.0;
-	cuda_node = starpu_worker_get_memory_node(cuda_worker);
-#endif
-#ifdef STARPU_USE_OPENCL
-	int opencl_worker, opencl_node;
-	opencl_worker = starpu_worker_get_by_type(STARPU_OPENCL_WORKER, 0);
-	if (opencl_worker == -1)
-		return 0.0;
-
-	opencl_node = starpu_worker_get_memory_node(opencl_worker);
-#endif
+	enum starpu_node_kind node_kind;
 
 	for (i = 0; i < task->cl->nbuffers; i++)
 	{
@@ -271,26 +236,23 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 		if (!_starpu_data_is_multiformat_handle(handle))
 			continue;
 
-		node = -EINVAL;
 #ifdef STARPU_USE_CPU
 		if (arch < STARPU_CUDA_DEFAULT)
-			node = cpu_node;
+			node_kind = STARPU_CPU_RAM;
 #endif
 #ifdef STARPU_USE_CUDA
 		if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
-			node = cuda_node;
+			node_kind = STARPU_CUDA_RAM;
 #endif
 #ifdef STARPU_USE_OPENCL
 		if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
-			node = opencl_node;
+			node_kind = STARPU_OPENCL_RAM;
 #endif
-		if (node == -EINVAL)
-			STARPU_ABORT();
 
-		if (!_starpu_handle_needs_conversion_task(handle, node))
+		if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind))
 			continue;
 
-		conversion_task = _starpu_create_conversion_task(handle, node);
+		conversion_task = _starpu_create_conversion_task_for_arch(handle, node_kind);
 		sum += starpu_task_expected_length(conversion_task, arch, nimpl);
 		_starpu_spin_lock(&handle->header_lock);
 		handle->refcnt--;

+ 6 - 2
src/core/sched_policy.c

@@ -430,9 +430,14 @@ int _starpu_push_task_end(struct starpu_task *task)
 struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 						   unsigned int node)
 {
+	_starpu_create_conversion_task_for_arch(handle, starpu_node_get_kind(node));
+}
+
+struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle,
+						   enum starpu_node_kind node_kind)
+{
 	struct starpu_task *conversion_task;
 	struct starpu_multiformat_interface *format_interface;
-	enum starpu_node_kind node_kind;
 
 	conversion_task = starpu_task_create();
 	conversion_task->synchronous = 0;
@@ -440,7 +445,6 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 
 	/* The node does not really matter here */
 	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
-	node_kind = starpu_node_get_kind(node);
 
 	_starpu_spin_lock(&handle->header_lock);
 	handle->refcnt++;

+ 3 - 0
src/core/sched_policy.h

@@ -50,6 +50,9 @@ void _starpu_wait_on_sched_event(void);
 struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 						   unsigned int node);
 
+struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle,
+						   enum starpu_node_kind node_kind);
+
 void _starpu_sched_pre_exec_hook(struct starpu_task *task);
 
 /*

+ 6 - 3
src/core/task.c

@@ -817,10 +817,13 @@ int
 _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
 				     unsigned int node)
 {
-	enum starpu_node_kind node_kind;
-
-	node_kind = starpu_node_get_kind(node);
+	return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node));
+}
 
+int
+_starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
+				     enum starpu_node_kind node_kind)
+{
 	/*
 	 * Here, we assume that CUDA devices and OpenCL devices use the
 	 * same data structure. A conversion is only needed when moving

+ 4 - 1
src/core/task.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011 INRIA
  *
@@ -59,6 +59,9 @@ int _starpu_task_submit_internally(struct starpu_task *task);
 
 int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
 					 unsigned int node);
+int
+_starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
+				     enum starpu_node_kind node_kind);
 
 int _starpu_task_uses_multiformat_handles(struct starpu_task *task);