11 years ago · 09d64514fa
--- a/tests/sched_policies/simple_cpu_gpu_sched.c
+++ b/tests/sched_policies/simple_cpu_gpu_sched.c
@@ -111,7 +111,7 @@ init_perfmodels(void)
 
				 	arch_cpu.devices[0].ncores = 1;
			
 
				 
			
 
				 	int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				-	if(comb_cpu == -1)
			
 
				+	if (comb_cpu == -1)
			
 
				 		comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 
			
 
				 	model_cpu_task.per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
@@ -124,27 +124,39 @@ init_perfmodels(void)
 
				 	model_gpu_task.nimpls[comb_cpu] = 1;
			
 
				 	model_gpu_task.per_arch[comb_cpu][0].cost_function = gpu_task_cpu;
			
 
				 
			
 
				-	struct starpu_perfmodel_arch arch_cuda;
			
 
				-	arch_cuda.ndevices = 1;
			
 
				-	arch_cuda.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
			
 
				-	arch_cuda.devices[0].type = STARPU_CUDA_WORKER;
			
 
				-	arch_cuda.devices[0].devid = 0;
			
 
				-	arch_cuda.devices[0].ncores = 1;
			
 
				-
			
 
				-	int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				-	if(comb_cuda == -1)
			
 
				-		comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				-
			
 
				-	model_cpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				-	memset(&model_cpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				-	model_cpu_task.nimpls[comb_cuda] = 1;
			
 
				-	model_cpu_task.per_arch[comb_cuda][0].cost_function = cpu_task_gpu;
			
 
				-
			
 
				-	model_gpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				-	memset(&model_gpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				-	model_gpu_task.nimpls[comb_cuda] = 1;
			
 
				-	model_gpu_task.per_arch[comb_cuda][0].cost_function = gpu_task_gpu;
			
 
				-
			
 
				+	{
			
 
				+		// We need to set the cost function for each combination with a CUDA worker
			
 
				+		int nb_worker_cuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+		int *worker_cuda_ids = malloc(nb_worker_cuda * sizeof(int));
			
 
				+		int worker_cuda;
			
 
				+
			
 
				+		starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, worker_cuda_ids, nb_worker_cuda);
			
 
				+		for(worker_cuda = 0 ; worker_cuda < nb_worker_cuda ; worker_cuda ++)
			
 
				+		{
			
 
				+			struct starpu_perfmodel_arch arch_cuda;
			
 
				+			arch_cuda.ndevices = 1;
			
 
				+			arch_cuda.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
			
 
				+			arch_cuda.devices[0].type = STARPU_CUDA_WORKER;
			
 
				+			arch_cuda.devices[0].devid = starpu_worker_get_devid(worker_cuda_ids[worker_cuda]);
			
 
				+			arch_cuda.devices[0].ncores = 1;
			
 
				+
			
 
				+			int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+			if(comb_cuda == -1)
			
 
				+			{
			
 
				+				comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+
			
 
				+				model_cpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				+				memset(&model_cpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				+				model_cpu_task.nimpls[comb_cuda] = 1;
			
 
				+				model_cpu_task.per_arch[comb_cuda][0].cost_function = cpu_task_gpu;
			
 
				+
			
 
				+				model_gpu_task.per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				+				memset(&model_gpu_task.per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
 
				+				model_gpu_task.nimpls[comb_cuda] = 1;
			
 
				+				model_gpu_task.per_arch[comb_cuda][0].cost_function = gpu_task_gpu;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 /* 	if(model_cpu_task.per_arch[STARPU_CPU_WORKER] != NULL) */
			
 
				 /* 	{ */