11 years ago · 691cef0e71
--- a/tests/sched_policies/simple_cpu_gpu_sched.c
+++ b/tests/sched_policies/simple_cpu_gpu_sched.c
@@ -96,6 +96,41 @@ static struct starpu_perfmodel model_gpu_task =
 
				 };
			
 
				 
			
 
				 static void
			
 
				+init_perfmodels_gpu(int gpu_type)
			
 
				+{
			
 
				+	int nb_worker_gpu = starpu_worker_get_count_by_type(gpu_type);
			
 
				+	int *worker_gpu_ids = malloc(nb_worker_gpu * sizeof(int));
			
 
				+	int worker_gpu;
			
 
				+
			
 
				+	starpu_worker_get_ids_by_type(gpu_type, worker_gpu_ids, nb_worker_gpu);
			
 
				+	for(worker_gpu = 0 ; worker_gpu < nb_worker_gpu ; worker_gpu ++)
			
 
				+	{
			
 
				+		struct starpu_perfmodel_arch arch_gpu;
			
 
				+		arch_gpu.ndevices = 1;
			
 
				+		arch_gpu.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
			
 
				+		arch_gpu.devices[0].type = gpu_type;
			
 
				+		arch_gpu.devices[0].devid = starpu_worker_get_devid(worker_gpu_ids[worker_gpu]);
			
 
				+		arch_gpu.devices[0].ncores = 1;
			
 
				+
			
 
				+		int comb_gpu = starpu_get_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
			
 
				+		if(comb_gpu == -1)
			
 
				+		{
			
 
				+			comb_gpu = starpu_add_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
			
 
				+
			
 
				+			model_cpu_task.per_arch[comb_gpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				+			memset(&model_cpu_task.per_arch[comb_gpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				+			model_cpu_task.nimpls[comb_gpu] = 1;
			
 
				+			model_cpu_task.per_arch[comb_gpu][0].cost_function = cpu_task_gpu;
			
 
				+
			
 
				+			model_gpu_task.per_arch[comb_gpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				+			memset(&model_gpu_task.per_arch[comb_gpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				+			model_gpu_task.nimpls[comb_gpu] = 1;
			
 
				+			model_gpu_task.per_arch[comb_gpu][0].cost_function = gpu_task_gpu;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				 init_perfmodels(void)
			
 
				 {
			
 
				 	unsigned devid, ncore;
			
@@ -124,39 +159,9 @@ init_perfmodels(void)
 
				 	model_gpu_task.nimpls[comb_cpu] = 1;
			
 
				 	model_gpu_task.per_arch[comb_cpu][0].cost_function = gpu_task_cpu;
			
 
				 
			
 
				-	{
			
 
				-		// We need to set the cost function for each combination with a CUDA worker
			
 
				-		int nb_worker_cuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				-		int *worker_cuda_ids = malloc(nb_worker_cuda * sizeof(int));
			
 
				-		int worker_cuda;
			
 
				-
			
 
				-		starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, worker_cuda_ids, nb_worker_cuda);
			
 
				-		for(worker_cuda = 0 ; worker_cuda < nb_worker_cuda ; worker_cuda ++)
			
 
				-		{
			
 
				-			struct starpu_perfmodel_arch arch_cuda;
			
 
				-			arch_cuda.ndevices = 1;
			
 
				-			arch_cuda.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
			
 
				-			arch_cuda.devices[0].type = STARPU_CUDA_WORKER;
			
 
				-			arch_cuda.devices[0].devid = starpu_worker_get_devid(worker_cuda_ids[worker_cuda]);
			
 
				-			arch_cuda.devices[0].ncores = 1;
			
 
				-
			
 
				-			int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				-			if(comb_cuda == -1)
			
 
				-			{
			
 
				-				comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				-
			
 
				-				model_cpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				-				memset(&model_cpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				-				model_cpu_task.nimpls[comb_cuda] = 1;
			
 
				-				model_cpu_task.per_arch[comb_cuda][0].cost_function = cpu_task_gpu;
			
 
				-
			
 
				-				model_gpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				-				memset(&model_gpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				-				model_gpu_task.nimpls[comb_cuda] = 1;
			
 
				-				model_gpu_task.per_arch[comb_cuda][0].cost_function = gpu_task_gpu;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+	// We need to set the cost function for each combination with a CUDA or a OpenCL worker
			
 
				+	init_perfmodels_gpu(STARPU_CUDA_WORKER);
			
 
				+	init_perfmodels_gpu(STARPU_OPENCL_WORKER);
			
 
				 
			
 
				 /* 	if(model_cpu_task.per_arch[STARPU_CPU_WORKER] != NULL) */
			
 
				 /* 	{ */