Browse Source

tests/sched_policies/simple_cpu_gpu_sched.c: fix code to set the cost function for all combinations with a cuda device, this needs to be improved to be more user-friendlly

Nathalie Furmento 11 years ago
parent
commit
09d64514fa
1 changed files with 34 additions and 22 deletions
  1. 34 22
      tests/sched_policies/simple_cpu_gpu_sched.c

+ 34 - 22
tests/sched_policies/simple_cpu_gpu_sched.c

@@ -111,7 +111,7 @@ init_perfmodels(void)
 	arch_cpu.devices[0].ncores = 1;
 
 	int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
-	if(comb_cpu == -1)
+	if (comb_cpu == -1)
 		comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
 
 	model_cpu_task.per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
@@ -124,27 +124,39 @@ init_perfmodels(void)
 	model_gpu_task.nimpls[comb_cpu] = 1;
 	model_gpu_task.per_arch[comb_cpu][0].cost_function = gpu_task_cpu;
 
-	struct starpu_perfmodel_arch arch_cuda;
-	arch_cuda.ndevices = 1;
-	arch_cuda.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
-	arch_cuda.devices[0].type = STARPU_CUDA_WORKER;
-	arch_cuda.devices[0].devid = 0;
-	arch_cuda.devices[0].ncores = 1;
-
-	int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
-	if(comb_cuda == -1)
-		comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
-
-	model_cpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
-	memset(&model_cpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
-	model_cpu_task.nimpls[comb_cuda] = 1;
-	model_cpu_task.per_arch[comb_cuda][0].cost_function = cpu_task_gpu;
-
-	model_gpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
-	memset(&model_gpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
-	model_gpu_task.nimpls[comb_cuda] = 1;
-	model_gpu_task.per_arch[comb_cuda][0].cost_function = gpu_task_gpu;
-
+	{
+		// We need to set the cost function for each combination with a CUDA worker
+		int nb_worker_cuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
+		int *worker_cuda_ids = malloc(nb_worker_cuda * sizeof(int));
+		int worker_cuda;
+
+		starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, worker_cuda_ids, nb_worker_cuda);
+		for(worker_cuda = 0 ; worker_cuda < nb_worker_cuda ; worker_cuda ++)
+		{
+			struct starpu_perfmodel_arch arch_cuda;
+			arch_cuda.ndevices = 1;
+			arch_cuda.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
+			arch_cuda.devices[0].type = STARPU_CUDA_WORKER;
+			arch_cuda.devices[0].devid = starpu_worker_get_devid(worker_cuda_ids[worker_cuda]);
+			arch_cuda.devices[0].ncores = 1;
+
+			int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
+			if(comb_cuda == -1)
+			{
+				comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
+
+				model_cpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
+				memset(&model_cpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
+				model_cpu_task.nimpls[comb_cuda] = 1;
+				model_cpu_task.per_arch[comb_cuda][0].cost_function = cpu_task_gpu;
+
+				model_gpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
+				memset(&model_gpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
+				model_gpu_task.nimpls[comb_cuda] = 1;
+				model_gpu_task.per_arch[comb_cuda][0].cost_function = gpu_task_gpu;
+			}
+		}
+	}
 
 /* 	if(model_cpu_task.per_arch[STARPU_CPU_WORKER] != NULL) */
 /* 	{ */