13 years ago · a499bf4c71
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -51,6 +51,33 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+enum starpu_archtype
			
 
				+{
			
 
				+	STARPU_CPU_WORKER,    /* CPU core */
			
 
				+	STARPU_CUDA_WORKER,   /* NVIDIA CUDA device */
			
 
				+	STARPU_OPENCL_WORKER, /* OpenCL device */
			
 
				+	STARPU_GORDON_WORKER  /* Cell SPU */
			
 
				+};
			
 
				+
			
 
				+struct starpu_driver
			
 
				+{
			
 
				+	enum starpu_archtype type;
			
 
				+	union
			
 
				+	{
			
 
				+		unsigned cuda_id;
			
 
				+		/*
			
 
				+		 * TODO: handle CPUs and OpenCL devices :
			
 
				+		 * 1) Add a member to this union.
			
 
				+		 * 2) Edit _starpu_launch_drivers() to make sure the driver is
			
 
				+		 *    not always launched.
			
 
				+		 * 3) Edit starpu_run_driver() so that it can handle another
			
 
				+		 *    kind of architecture.
			
 
				+		 * 4) Write _starpu_run_foobar() in the corresponding driver.
			
 
				+		 * 5) Test the whole thing :)
			
 
				+		 */
			
 
				+	} id;
			
 
				+};
			
 
				+
			
 
				 struct starpu_conf
			
 
				 {
			
 
				 	/* which scheduling policy should be used ? (NULL for default) */
			
@@ -83,6 +110,9 @@ struct starpu_conf
 
				 
			
 
				         /* indicate if the asynchronous copies should be disabled */
			
 
				 	int disable_asynchronous_copy;
			
 
				+
			
 
				+	/* A driver that the application will run in one of its own threads. */
			
 
				+	struct starpu_driver *not_launched_driver;
			
 
				 };
			
 
				 
			
 
				 /* Initialize a starpu_conf structure with default values. */
			
@@ -118,13 +148,6 @@ int starpu_combined_worker_get_id(void);
 
				 int starpu_combined_worker_get_size(void);
			
 
				 int starpu_combined_worker_get_rank(void);
			
 
				 
			
 
				-enum starpu_archtype
			
 
				-{
			
 
				-	STARPU_CPU_WORKER, /* CPU core */
			
 
				-	STARPU_CUDA_WORKER, /* NVIDIA CUDA device */
			
 
				-	STARPU_OPENCL_WORKER, /* OpenCL CUDA device */
			
 
				-	STARPU_GORDON_WORKER /* Cell SPU */
			
 
				-};
			
 
				 
			
 
				 /* This function returns the type of worker associated to an identifier (as
			
 
				  * returned by the starpu_worker_get_id function). The returned value indicates
			
@@ -163,6 +186,8 @@ void starpu_worker_get_name(int id, char *dst, size_t maxlen);
 
				  */
			
 
				 int starpu_worker_get_devid(int id);
			
 
				 
			
 
				+int starpu_run_driver(struct starpu_driver *);
			
 
				+void starpu_set_end_of_submissions(void);
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -217,6 +217,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 	unsigned nworkers = config->topology.nworkers;
			
 
				 
			
 
				 	/* Launch workers asynchronously (except for SPUs) */
			
 
				+	unsigned cuda = 0;
			
 
				 	unsigned worker;
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
@@ -260,9 +261,14 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 			case STARPU_CUDA_WORKER:
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				-				pthread_create(&workerarg->worker_thread,
			
 
				-						NULL, _starpu_cuda_worker, workerarg);
			
 
				-
			
 
				+				if (config->conf->not_launched_driver &&
			
 
				+				    !(config->conf->not_launched_driver->type == STARPU_CUDA_WORKER &&
			
 
				+				      config->conf->not_launched_driver->id.cuda_id == cuda))
			
 
				+				{
			
 
				+					pthread_create(&workerarg->worker_thread,
			
 
				+						       NULL, _starpu_cuda_worker, workerarg);
			
 
				+				}
			
 
				+				cuda++;
			
 
				 				break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
@@ -308,6 +314,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	cuda = 0;
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				 		struct _starpu_worker *workerarg = &config->workers[worker];
			
@@ -315,7 +322,25 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		switch (workerarg->arch)
			
 
				 		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				+				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				+				while (!workerarg->worker_is_initialized)
			
 
				+					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
			
 
				+				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+				break;
			
 
				 			case STARPU_CUDA_WORKER:
			
 
				+				if (config->conf->not_launched_driver &&
			
 
				+				    config->conf->not_launched_driver->type == STARPU_CUDA_WORKER &&
			
 
				+				    config->conf->not_launched_driver->id.cuda_id == cuda)
			
 
				+				{
			
 
				+					cuda++;
			
 
				+					break;
			
 
				+				}
			
 
				+				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				+				while (!workerarg->worker_is_initialized)
			
 
				+					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
			
 
				+				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+				cuda++;
			
 
				+				break;
			
 
				 			case STARPU_OPENCL_WORKER:
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				 				while (!workerarg->worker_is_initialized)
			
@@ -889,3 +914,23 @@ void starpu_worker_set_sched_condition(int workerid, pthread_cond_t *sched_cond,
 
				 	config.workers[workerid].sched_cond = sched_cond;
			
 
				 	config.workers[workerid].sched_mutex = sched_mutex;
			
 
				 }
			
 
				+
			
 
				+void
			
 
				+starpu_set_end_of_submissions(void)
			
 
				+{
			
 
				+	struct _starpu_machine_config *config;
			
 
				+	config = _starpu_get_machine_config();
			
 
				+	starpu_task_wait_for_all();
			
 
				+	config->running = 0;
			
 
				+}
			
 
				+
			
 
				+extern int _starpu_run_cuda(struct starpu_driver *);
			
 
				+
			
 
				+int
			
 
				+starpu_run_driver(struct starpu_driver *d)
			
 
				+{
			
 
				+	if (!d || d->type != STARPU_CUDA_WORKER)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return _starpu_run_cuda(d);
			
 
				+}
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -444,3 +444,30 @@ int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node STARPU_ATTRIBUT
 
				 
			
 
				 	return -EAGAIN;
			
 
				 }
			
 
				+
			
 
				+int _starpu_run_cuda(struct starpu_driver *d)
			
 
				+{
			
 
				+	STARPU_ASSERT(d && d->type == STARPU_CUDA_WORKER);
			
 
				+
			
 
				+	int workers[d->id.cuda_id + 1];
			
 
				+	int nworkers;
			
 
				+	nworkers = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, workers, d->id.cuda_id+1);
			
 
				+	if (nworkers >= 0 && (unsigned) nworkers < d->id.cuda_id)
			
 
				+		return -ENODEV;
			
 
				+	
			
 
				+	_STARPU_DEBUG("Running cuda %d from the application\n", d->id.cuda_id);
			
 
				+
			
 
				+	struct _starpu_worker *workerarg = _starpu_get_worker_struct(workers[d->id.cuda_id]);
			
 
				+
			
 
				+	workerarg->set = NULL;
			
 
				+	workerarg->worker_is_initialized = 0;
			
 
				+
			
 
				+	/* Let's go ! */
			
 
				+	_starpu_cuda_worker(workerarg);
			
 
				+
			
 
				+	/* XXX: Should we wait for the driver to be ready, as it is done when
			
 
				+	 * launching it the usual way ? Cf. the end of _starpu_launch_drivers()
			
 
				+	 */
			
 
				+
			
 
				+	return 0;
			
 
				+}