8 years ago · dc923e3168
--- a/examples/sched_ctx/gpu_partition.c
+++ b/examples/sched_ctx/gpu_partition.c
@@ -109,8 +109,8 @@ int main(int argc, char **argv)
 
				 	int ncuda = 0;
			
 
				 	int gpu_devid = -1;
			
 
				 
			
 
				-#warning temporary fix: skip test as cuda computation fails
			
 
				-	return 77;
			
 
				+/* #warning temporary fix: skip test as cuda computation fails */
			
 
				+/* 	return 77; */
			
 
				 
			
 
				 #ifndef STARPU_HAVE_SETENV
			
 
				 	return 77;
			
@@ -118,6 +118,7 @@ int main(int argc, char **argv)
 
				 	/* Have separate threads for streams */
			
 
				 	setenv("STARPU_CUDA_THREAD_PER_WORKER", "1", 1);
			
 
				 	setenv("STARPU_NWORKER_PER_CUDA", "2", 1);
			
 
				+	setenv("STARPU_NCUDA", "1", 1);
			
 
				 #endif
			
 
				 
			
 
				 	/* Initialize StarPU */
			
@@ -175,7 +176,7 @@ int main(int argc, char **argv)
 
				 	int ncpus = starpu_cpu_worker_get_count();
			
 
				 	int workers[ncpus+nstreams];
			
 
				 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, ncpus);
			
 
				-
			
 
				+	
			
 
				 	int sched_ctxs[nstreams];
			
 
				 	int nsms[nstreams];
			
 
				 	nsms[0] = 6;
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1443,8 +1443,13 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 
				             mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
			
 
				 #endif
			
 
				 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
			
 
				-
			
 
				-			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus + topology->ncudagpus
			
 
				+	    unsigned cuda_busy_cpus = 0;
			
 
				+#if defined(STARPU_USE_CUDA)
			
 
				+	    cuda_busy_cpus = th_per_stream ? (nworker_per_cuda * topology->ncudagpus) : 
			
 
				+		    topology->ncudagpus;
			
 
				+#endif
			
 
				+			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus 
			
 
				+				+ cuda_busy_cpus
			
 
				 				+ topology->nopenclgpus + topology->nsccdevices;
			
 
				 
			
 
				 			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;