浏览代码

Correctly compute the expected conversion time.

Cyril Roelandt 13 年之前
父节点
当前提交
04a049211d
共有 1 个文件被更改,包括 71 次插入9 次删除
  1. 71 9
      src/core/perfmodel/perfmodel.c

+ 71 - 9
src/core/perfmodel/perfmodel.c

@@ -205,16 +205,78 @@ double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_arc
 	return starpu_model_expected_perf(task, task->cl->power_model, arch, nimpl);
 }
 
-double starpu_task_expected_conversion_time(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
+double starpu_task_expected_conversion_time(struct starpu_task *task,
+					    enum starpu_perf_archtype arch,
+					    unsigned nimpl)
 {
-	/* XXX : 
-	   sum = 0.0;
-	   For each multiformat handle that needs a conversion task :
-		sum += length of the conversion task
-	   return sum;
-	*/
-	return 0;
-	//return starpu_model_expected_perf(task, task->cl->conversion_model, arch, nimpl);
+	int i, err;
+	double sum = 0.0;
+	int cpu_worker, cuda_worker, opencl_worker;
+	unsigned int node, cpu_node, cuda_node, opencl_node;
+
+	/* We need to get one node per archtype. This is kinda ugly,
+	 * but it does the job.
+	 * XXX : Should we return 0 if there are no devices ?
+	 * (err != 1 && err != -ERANGE)
+	 */
+#ifdef STARPU_USE_CPU
+	err = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER,
+					    &cpu_worker, 1);
+	if (err != 1 && err != -ERANGE)
+		return 0.0;
+	cpu_node = starpu_worker_get_memory_node(cpu_worker);
+#endif
+#ifdef STARPU_USE_CUDA
+	err = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER,
+					    &cuda_worker, 1);
+	if (err != 1 && err != -ERANGE)
+		return 0.0;
+	cuda_node = starpu_worker_get_memory_node(cuda_worker);
+#endif
+#ifdef STARPU_USE_OPENCL
+	err = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER,
+					    &opencl_worker, 1);
+	if (err != 1 && err != -ERANGE)
+		return 0.0;
+
+	opencl_node = starpu_worker_get_memory_node(opencl_worker);
+#endif
+
+	for (i = 0; i < task->cl->nbuffers; i++)
+	{
+		unsigned int id;
+		starpu_data_handle_t handle;
+		struct starpu_task *conversion_task;
+		
+		handle = task->buffers[i].handle;
+		id = starpu_get_handle_interface_id(handle);
+		if (id != STARPU_MULTIFORMAT_INTERFACE_ID)
+			continue;
+
+		if (arch >= STARPU_CPU_DEFAULT && 
+		    arch < STARPU_CUDA_DEFAULT)
+			node = cpu_node;
+		else if (arch >= STARPU_CUDA_DEFAULT &&
+			 arch < STARPU_OPENCL_DEFAULT)
+			node = cuda_node;
+		else if (arch >= STARPU_OPENCL_DEFAULT &&
+			 arch < STARPU_GORDON_DEFAULT)
+			node = opencl_node;
+		else
+			STARPU_ASSERT(0);
+
+		if (!_starpu_handle_needs_conversion_task(handle, node))
+			continue;
+
+		conversion_task = _starpu_create_conversion_task(handle, node);
+		sum += starpu_task_expected_length(conversion_task, arch, nimpl);
+		handle->refcnt--;
+		handle->busy_count--;
+		starpu_task_deinit(conversion_task);
+		free(conversion_task);
+	}
+
+	return sum;
 }
 
 /* Predict the transfer time (in µs) to move a handle to a memory node */