10 years ago · 259bdb17dc
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -66,6 +66,58 @@ static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
 
				 
			
 
				 #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				 
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				+static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
			
 
				+#endif
			
 
				+
			
 
				+void *
			
 
				+_starpu_get_worker_from_driver(struct starpu_driver *d)
			
 
				+{
			
 
				+	unsigned nworkers = starpu_worker_get_count();
			
 
				+	unsigned workerid;
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	if (d->type == STARPU_CUDA_WORKER)
			
 
				+		return &cuda_worker_set[d->id.cuda_id];
			
 
				+#endif
			
 
				+
			
 
				+	for (workerid = 0; workerid < nworkers; workerid++)
			
 
				+	{
			
 
				+		if (starpu_worker_get_type(workerid) == d->type)
			
 
				+		{
			
 
				+			struct _starpu_worker *worker;
			
 
				+			worker = _starpu_get_worker_struct(workerid);
			
 
				+			switch (d->type)
			
 
				+			{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+			case STARPU_CPU_WORKER:
			
 
				+				if (worker->devid == d->id.cpu_id)
			
 
				+					return worker;
			
 
				+				break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+			case STARPU_OPENCL_WORKER:
			
 
				+			{
			
 
				+				cl_device_id device;
			
 
				+				starpu_opencl_get_device(worker->devid, &device);
			
 
				+				if (device == d->id.opencl_id)
			
 
				+					return worker;
			
 
				+				break;
			
 
				+			}
			
 
				+#endif
			
 
				+			default:
			
 
				+				_STARPU_DEBUG("Invalid device type\n");
			
 
				+				return NULL;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * Discover the topology of the machine
			
@@ -727,10 +779,12 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 
				 
			
 
				 	/* _starpu_initialize_workers_mic_deviceid (config); */
			
 
				 
			
 
				+	mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
			
 
				 	unsigned miccore_id;
			
 
				 	for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + miccore_id;
			
 
				+		config->workers[worker_idx].set = &mic_worker_set[mic_idx];
			
 
				 		config->workers[worker_idx].arch = STARPU_MIC_WORKER;
			
 
				 		config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device *) malloc(sizeof(struct starpu_perfmodel_device));
			
 
				 		config->workers[worker_idx].perf_arch.ndevices = 1;
			
@@ -833,6 +887,15 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 
				 	_starpu_initialize_workers_bindid(config);
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				+	for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++)
			
 
				+		cuda_worker_set[i].workers = NULL;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	for (i = 0; i < (int) (sizeof(mic_worker_set)/sizeof(mic_worker_set[0])); i++)
			
 
				+		mic_worker_set[i].workers = NULL;
			
 
				+#endif
			
 
				+
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 	int ncuda = config->conf.ncuda;
			
 
				 	int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
			
 
				 
			
@@ -880,10 +943,13 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 
				 	for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
			
 
				 	{
			
 
				 		int devid = _starpu_get_next_cuda_gpuid(config);
			
 
				+		int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
			
 
				+		cuda_worker_set[devid].workers = &config->workers[worker_idx0];
			
 
				 		for (i = 0; i < nworker_per_cuda; i++)
			
 
				 		{
			
 
				-			int worker_idx = topology->nworkers + cudagpu * nworker_per_cuda + i;
			
 
				+			int worker_idx = worker_idx0 + i;
			
 
				 
			
 
				+			config->workers[worker_idx].set = &cuda_worker_set[devid];
			
 
				 			config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
			
 
				 			config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
			
 
				 			config->workers[worker_idx].perf_arch.ndevices = 1;
			
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2010, 2012, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2010, 2012, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2015  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -50,4 +50,6 @@ struct _starpu_combined_worker;
 
				 /* Bind the current thread on the set of CPUs for the given combined worker. */
			
 
				 void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, struct _starpu_combined_worker *combined_worker);
			
 
				 
			
 
				+void *_starpu_get_worker_from_driver(struct starpu_driver *d);
			
 
				+
			
 
				 #endif // __TOPOLOGY_H__
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -441,13 +441,6 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
				  * Runtime initialization methods
			
 
				  */
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				-static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_MIC
			
 
				-static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
			
 
				-#endif
			
 
				-
			
 
				 static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
			
 
				 {
			
 
				 	starpu_pthread_cond_t *cond = &workerarg->sched_cond;
			
@@ -635,23 +628,12 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 	if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				-	for (i = 0; i < sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0]); i++)
			
 
				-		cuda_worker_set[i].workers = NULL;
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_MIC
			
 
				-	for (i = 0; i < sizeof(mic_worker_set)/sizeof(mic_worker_set[0]); i++)
			
 
				-		mic_worker_set[i].workers = NULL;
			
 
				-#endif
			
 
				-
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				 		struct _starpu_worker *workerarg = &pconfig->workers[worker];
			
 
				 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 		unsigned devid = workerarg->devid;
			
 
				-#endif
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				-		struct _starpu_worker_set *worker_set;
			
 
				+		struct _starpu_worker_set *worker_set = workerarg->set;
			
 
				 #endif
			
 
				 
			
 
				 		_STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
			
@@ -695,8 +677,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 			case STARPU_CUDA_WORKER:
			
 
				 				driver.id.cuda_id = workerarg->devid;
			
 
				-				worker_set = &cuda_worker_set[devid];
			
 
				-				workerarg->set = worker_set;
			
 
				 
			
 
				 				/* We spawn only one thread per CUDA driver,
			
 
				 				 * which will control all CUDA workers of this
			
@@ -714,7 +694,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 				}
			
 
				 #endif
			
 
				 
			
 
				-				worker_set->workers = workerarg;
			
 
				 				worker_set->set_is_initialized = 0;
			
 
				 
			
 
				 				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
			
@@ -772,28 +751,22 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 #endif
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 			case STARPU_MIC_WORKER:
			
 
				-				workerarg->set = &mic_worker_set[devid];
			
 
				-
			
 
				 				/* We spawn only one thread
			
 
				 				 * per MIC device, which will control all MIC
			
 
				 				 * workers of this device. (by using a worker set). */
			
 
				-				if (mic_worker_set[devid].workers)
			
 
				+				if (worker_set->workers)
			
 
				 					break;
			
 
				 
			
 
				-				mic_worker_set[devid].nworkers = pconfig->topology.nmiccores[devid];
			
 
				+				worker_set->nworkers = pconfig->topology.nmiccores[devid];
			
 
				 
			
 
				-				/* We assume all MIC workers of a given MIC
			
 
				-				 * device are contiguous so that we can
			
 
				-				 * address them with the first one only. */
			
 
				-				mic_worker_set[devid].workers = workerarg;
			
 
				-				mic_worker_set[devid].set_is_initialized = 0;
			
 
				+				worker_set->set_is_initialized = 0;
			
 
				 
			
 
				 				STARPU_PTHREAD_CREATE_ON(
			
 
				 						workerarg->name,
			
 
				-						&mic_worker_set[devid].worker_thread,
			
 
				+						&worker_set->worker_thread,
			
 
				 						NULL,
			
 
				 						_starpu_mic_src_worker,
			
 
				-						&mic_worker_set[devid],
			
 
				+						worker_set,
			
 
				 						_starpu_simgrid_get_host_by_worker(workerarg));
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
@@ -803,13 +776,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				 #endif
			
 
				 
			
 
				-				STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[devid].mutex);
			
 
				-				while (!mic_worker_set[devid].set_is_initialized)
			
 
				-					STARPU_PTHREAD_COND_WAIT(&mic_worker_set[devid].ready_cond,
			
 
				-								  &mic_worker_set[devid].mutex);
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[devid].mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
			
 
				+				while (!worker_set->set_is_initialized)
			
 
				+					STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond,
			
 
				+								  &worker_set->mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
			
 
				 
			
 
				-				mic_worker_set[devid].started = 1;
			
 
				+				worker_set->started = 1;
			
 
				 
			
 
				 				break;
			
 
				 #endif /* STARPU_USE_MIC */
			
@@ -2020,51 +1993,6 @@ int _starpu_worker_get_nsched_ctxs(int workerid)
 
				 	return config.workers[workerid].nsched_ctxs;
			
 
				 }
			
 
				 
			
 
				-static void *
			
 
				-_starpu_get_worker_from_driver(struct starpu_driver *d)
			
 
				-{
			
 
				-	unsigned nworkers = starpu_worker_get_count();
			
 
				-	unsigned workerid;
			
 
				-
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	if (d->type == STARPU_CUDA_WORKER)
			
 
				-		return &cuda_worker_set[d->id.cuda_id];
			
 
				-#endif
			
 
				-
			
 
				-	for (workerid = 0; workerid < nworkers; workerid++)
			
 
				-	{
			
 
				-		if (starpu_worker_get_type(workerid) == d->type)
			
 
				-		{
			
 
				-			struct _starpu_worker *worker;
			
 
				-			worker = _starpu_get_worker_struct(workerid);
			
 
				-			switch (d->type)
			
 
				-			{
			
 
				-#ifdef STARPU_USE_CPU
			
 
				-			case STARPU_CPU_WORKER:
			
 
				-				if (worker->devid == d->id.cpu_id)
			
 
				-					return worker;
			
 
				-				break;
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				-			case STARPU_OPENCL_WORKER:
			
 
				-			{
			
 
				-				cl_device_id device;
			
 
				-				starpu_opencl_get_device(worker->devid, &device);
			
 
				-				if (device == d->id.opencl_id)
			
 
				-					return worker;
			
 
				-				break;
			
 
				-			}
			
 
				-#endif
			
 
				-			default:
			
 
				-				_STARPU_DEBUG("Invalid device type\n");
			
 
				-				return NULL;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 starpu_driver_run(struct starpu_driver *d)
			
 
				 {