Переглянути джерело

Move device id lookup into run-driver functions, to avoid paying the cost at each task execution

Samuel Thibault 11 роки тому
батько
коміт
6325a34ab3

+ 61 - 14
src/core/workers.c

@@ -1714,6 +1714,51 @@ int _starpu_worker_get_nsched_ctxs(int workerid)
 	return config.workers[workerid].nsched_ctxs;
 }
 
+static struct _starpu_worker *
+_starpu_get_worker_from_driver(struct starpu_driver *d)
+{
+	unsigned nworkers = starpu_worker_get_count();
+	unsigned workerid;
+	for (workerid = 0; workerid < nworkers; workerid++)
+	{
+		if (starpu_worker_get_type(workerid) == d->type)
+		{
+			struct _starpu_worker *worker;
+			worker = _starpu_get_worker_struct(workerid);
+			switch (d->type)
+			{
+#ifdef STARPU_USE_CPU
+			case STARPU_CPU_WORKER:
+				if (worker->devid == d->id.cpu_id)
+					return worker;
+				break;
+#endif
+#ifdef STARPU_USE_CUDA
+			case STARPU_CUDA_WORKER:
+				if (worker->devid == d->id.cuda_id)
+					return worker;
+				break;
+#endif
+#ifdef STARPU_USE_OPENCL
+			case STARPU_OPENCL_WORKER:
+			{
+				cl_device_id device;
+				starpu_opencl_get_device(worker->devid, &device);
+				if (device == d->id.opencl_id)
+					return worker;
+				break;
+			}
+#endif
+			default:
+				_STARPU_DEBUG("Invalid device type\n");
+				return NULL;
+			}
+		}
+	}
+
+	return NULL;
+}
+
 int
 starpu_driver_run(struct starpu_driver *d)
 {
@@ -1723,47 +1768,47 @@ starpu_driver_run(struct starpu_driver *d)
 		return -EINVAL;
 	}
 
+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
 
 	switch (d->type)
 	{
 #ifdef STARPU_USE_CPU
 	case STARPU_CPU_WORKER:
-		return _starpu_run_cpu(d);
+		return _starpu_run_cpu(worker);
 #endif
 #ifdef STARPU_USE_CUDA
 	case STARPU_CUDA_WORKER:
-		return _starpu_run_cuda(d);
+		return _starpu_run_cuda(worker);
 #endif
 #ifdef STARPU_USE_OPENCL
 	case STARPU_OPENCL_WORKER:
-		return _starpu_run_opencl(d);
+		return _starpu_run_opencl(worker);
 #endif
 	default:
-	{
 		_STARPU_DEBUG("Invalid device type\n");
 		return -EINVAL;
 	}
-	}
 }
 
 int
 starpu_driver_init(struct starpu_driver *d)
 {
 	STARPU_ASSERT(d);
+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
 
 	switch (d->type)
 	{
 #ifdef STARPU_USE_CPU
 	case STARPU_CPU_WORKER:
-		return _starpu_cpu_driver_init(d);
+		return _starpu_cpu_driver_init(worker);
 #endif
 #ifdef STARPU_USE_CUDA
 	case STARPU_CUDA_WORKER:
-		return _starpu_cuda_driver_init(d);
+		return _starpu_cuda_driver_init(worker);
 #endif
 #ifdef STARPU_USE_OPENCL
 	case STARPU_OPENCL_WORKER:
-		return _starpu_opencl_driver_init(d);
+		return _starpu_opencl_driver_init(worker);
 #endif
 	default:
 		return -EINVAL;
@@ -1774,20 +1819,21 @@ int
 starpu_driver_run_once(struct starpu_driver *d)
 {
 	STARPU_ASSERT(d);
+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
 
 	switch (d->type)
 	{
 #ifdef STARPU_USE_CPU
 	case STARPU_CPU_WORKER:
-		return _starpu_cpu_driver_run_once(d);
+		return _starpu_cpu_driver_run_once(worker);
 #endif
 #ifdef STARPU_USE_CUDA
 	case STARPU_CUDA_WORKER:
-		return _starpu_cuda_driver_run_once(d);
+		return _starpu_cuda_driver_run_once(worker);
 #endif
 #ifdef STARPU_USE_OPENCL
 	case STARPU_OPENCL_WORKER:
-		return _starpu_opencl_driver_run_once(d);
+		return _starpu_opencl_driver_run_once(worker);
 #endif
 	default:
 		return -EINVAL;
@@ -1798,20 +1844,21 @@ int
 starpu_driver_deinit(struct starpu_driver *d)
 {
 	STARPU_ASSERT(d);
+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
 
 	switch (d->type)
 	{
 #ifdef STARPU_USE_CPU
 	case STARPU_CPU_WORKER:
-		return _starpu_cpu_driver_deinit(d);
+		return _starpu_cpu_driver_deinit(worker);
 #endif
 #ifdef STARPU_USE_CUDA
 	case STARPU_CUDA_WORKER:
-		return _starpu_cuda_driver_deinit(d);
+		return _starpu_cuda_driver_deinit(worker);
 #endif
 #ifdef STARPU_USE_OPENCL
 	case STARPU_OPENCL_WORKER:
-		return _starpu_opencl_driver_deinit(d);
+		return _starpu_opencl_driver_deinit(worker);
 #endif
 	default:
 		return -EINVAL;

+ 7 - 38
src/drivers/cpu/driver_cpu.c

@@ -184,15 +184,6 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 	return 0;
 }
 
-static struct _starpu_worker*
-_starpu_get_worker_from_driver(struct starpu_driver *d)
-{
-	int n = starpu_worker_get_by_devid(STARPU_CPU_WORKER, d->id.cpu_id);
-	if (n == -1)
-		return NULL;
-	return _starpu_get_worker_struct(n);
-}
-
 static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED, struct _starpu_machine_config *config)
 {
 	size_t global_mem;
@@ -236,12 +227,8 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 		return limit*1024*1024;
 }
 
-int _starpu_cpu_driver_init(struct starpu_driver *d)
+int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker)
 {
-	struct _starpu_worker *cpu_worker;
-	cpu_worker = _starpu_get_worker_from_driver(d);
-	STARPU_ASSERT(cpu_worker);
-
 	int devid = cpu_worker->devid;
 
 	_starpu_worker_start(cpu_worker, _STARPU_FUT_CPU_KEY);
@@ -263,12 +250,8 @@ int _starpu_cpu_driver_init(struct starpu_driver *d)
 	return 0;
 }
 
-int _starpu_cpu_driver_run_once(struct starpu_driver *d STARPU_ATTRIBUTE_UNUSED)
+int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker)
 {
-	struct _starpu_worker *cpu_worker;
-	cpu_worker = _starpu_get_local_worker_key();
-	STARPU_ASSERT(cpu_worker);
-
 	unsigned memnode = cpu_worker->memory_node;
 	int workerid = cpu_worker->workerid;
 
@@ -357,14 +340,10 @@ int _starpu_cpu_driver_run_once(struct starpu_driver *d STARPU_ATTRIBUTE_UNUSED)
 	return 0;
 }
 
-int _starpu_cpu_driver_deinit(struct starpu_driver *d STARPU_ATTRIBUTE_UNUSED)
+int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker)
 {
 	_STARPU_TRACE_WORKER_DEINIT_START;
 
-	struct _starpu_worker *cpu_worker;
-	cpu_worker = _starpu_get_local_worker_key();
-	STARPU_ASSERT(cpu_worker);
-
 	unsigned memnode = cpu_worker->memory_node;
 	_starpu_handle_all_pending_node_data_requests(memnode);
 
@@ -382,27 +361,17 @@ void *
 _starpu_cpu_worker(void *arg)
 {
 	struct _starpu_worker *args = arg;
-	struct starpu_driver d =
-	{
-		.type      = STARPU_CPU_WORKER,
-		.id.cpu_id = args->devid
-	};
 
-	_starpu_cpu_driver_init(&d);
+	_starpu_cpu_driver_init(args);
 	while (_starpu_machine_is_running())
-		_starpu_cpu_driver_run_once(&d);
-	_starpu_cpu_driver_deinit(&d);
+		_starpu_cpu_driver_run_once(args);
+	_starpu_cpu_driver_deinit(args);
 
 	return NULL;
 }
 
-int _starpu_run_cpu(struct starpu_driver *d)
+int _starpu_run_cpu(struct _starpu_worker *worker)
 {
-	STARPU_ASSERT(d && d->type == STARPU_CPU_WORKER);
-
-	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
-	STARPU_ASSERT(worker);
-
 	worker->set = NULL;
 	worker->worker_is_initialized = 0;
 	_starpu_cpu_worker(worker);

+ 6 - 5
src/drivers/cpu/driver_cpu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,10 +31,11 @@
 
 #ifdef STARPU_USE_CPU
 void *_starpu_cpu_worker(void *);
-int _starpu_run_cpu(struct starpu_driver *);
-int _starpu_cpu_driver_init(struct starpu_driver *);
-int _starpu_cpu_driver_run_once(struct starpu_driver *);
-int _starpu_cpu_driver_deinit(struct starpu_driver *);
+struct _starpu_worker;
+int _starpu_run_cpu(struct _starpu_worker *);
+int _starpu_cpu_driver_init(struct _starpu_worker *);
+int _starpu_cpu_driver_run_once(struct _starpu_worker *);
+int _starpu_cpu_driver_deinit(struct _starpu_worker *);
 void _starpu_cpu_discover_devices(struct _starpu_machine_config *config);
 #else
 #define _starpu_cpu_discover_devices(config) do { \

+ 7 - 51
src/drivers/cuda/driver_cuda.c

@@ -378,30 +378,9 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	return 0;
 }
 
-static struct _starpu_worker*
-_starpu_get_worker_from_driver(struct starpu_driver *d)
-{
-	unsigned nworkers = starpu_worker_get_count();
-	unsigned  workerid;
-	for (workerid = 0; workerid < nworkers; workerid++)
-	{
-		if (starpu_worker_get_type(workerid) == d->type)
-		{
-			struct _starpu_worker *worker;
-			worker = _starpu_get_worker_struct(workerid);
-			if (worker->devid == d->id.cuda_id)
-				return worker;
-		}
-	}
-
-	return NULL;
-}
-
 /* XXX Should this be merged with _starpu_init_cuda ? */
-int _starpu_cuda_driver_init(struct starpu_driver *d)
+int _starpu_cuda_driver_init(struct _starpu_worker *args)
 {
-	struct _starpu_worker* args = _starpu_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
 	unsigned devid = args->devid;
 
 	_starpu_worker_start(args, _STARPU_FUT_CUDA_KEY);
@@ -453,11 +432,8 @@ int _starpu_cuda_driver_init(struct starpu_driver *d)
 	return 0;
 }
 
-int _starpu_cuda_driver_run_once(struct starpu_driver *d)
+int _starpu_cuda_driver_run_once(struct _starpu_worker *args)
 {
-	struct _starpu_worker* args = _starpu_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
-
 	unsigned memnode = args->memory_node;
 	int workerid = args->workerid;
 
@@ -509,12 +485,9 @@ int _starpu_cuda_driver_run_once(struct starpu_driver *d)
 	return 0;
 }
 
-int _starpu_cuda_driver_deinit(struct starpu_driver *d)
+int _starpu_cuda_driver_deinit(struct _starpu_worker *args)
 {
-	struct _starpu_worker* args = _starpu_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
 	unsigned memnode = args->memory_node;
-
 	_STARPU_TRACE_WORKER_DEINIT_START;
 
 	_starpu_handle_all_pending_node_data_requests(memnode);
@@ -538,16 +511,11 @@ int _starpu_cuda_driver_deinit(struct starpu_driver *d)
 void *_starpu_cuda_worker(void *arg)
 {
 	struct _starpu_worker* args = arg;
-	struct starpu_driver d =
-		{
-			.type       = STARPU_CUDA_WORKER,
-			.id.cuda_id = args->devid
-		};
 
-	_starpu_cuda_driver_init(&d);
+	_starpu_cuda_driver_init(args);
 	while (_starpu_machine_is_running())
-		_starpu_cuda_driver_run_once(&d);
-	_starpu_cuda_driver_deinit(&d);
+		_starpu_cuda_driver_run_once(args);
+	_starpu_cuda_driver_deinit(args);
 
 	return NULL;
 }
@@ -665,25 +633,13 @@ starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node,
 }
 #endif /* STARPU_USE_CUDA */
 
-int _starpu_run_cuda(struct starpu_driver *d)
+int _starpu_run_cuda(struct _starpu_worker *workerarg)
 {
-	STARPU_ASSERT(d && d->type == STARPU_CUDA_WORKER);
-
-	int workerid = starpu_worker_get_by_devid(STARPU_CUDA_WORKER, d->id.cuda_id);
-
-	_STARPU_DEBUG("Running cuda %u from the application\n", d->id.cuda_id);
-
-	struct _starpu_worker *workerarg = _starpu_get_worker_struct(workerid);
-
 	workerarg->set = NULL;
 	workerarg->worker_is_initialized = 0;
 
 	/* Let's go ! */
 	_starpu_cuda_worker(workerarg);
 
-	/* XXX: Should we wait for the driver to be ready, as it is done when
-	 * launching it the usual way ? Cf. the end of _starpu_launch_drivers()
-	 */
-
 	return 0;
 }

+ 5 - 4
src/drivers/cuda/driver_cuda.h

@@ -52,10 +52,11 @@ cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned node);
 cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned node);
 cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node);
 
-int _starpu_run_cuda(struct starpu_driver *);
-int _starpu_cuda_driver_init(struct starpu_driver *);
-int _starpu_cuda_driver_run_once(struct starpu_driver *);
-int _starpu_cuda_driver_deinit(struct starpu_driver *);
+struct _starpu_worker;
+int _starpu_run_cuda(struct _starpu_worker *);
+int _starpu_cuda_driver_init(struct _starpu_worker *);
+int _starpu_cuda_driver_run_once(struct _starpu_worker *);
+int _starpu_cuda_driver_deinit(struct _starpu_worker *);
 #endif
 
 #endif //  __DRIVER_CUDA_H__

+ 7 - 102
src/drivers/opencl/driver_opencl.c

@@ -552,53 +552,8 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname);
 #endif
 static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_worker *args);
 
-static struct _starpu_worker*
-_starpu_opencl_get_worker_from_driver(struct starpu_driver *d)
+int _starpu_opencl_driver_init(struct _starpu_worker *args)
 {
-#ifdef STARPU_USE_OPENCL
-	int nworkers;
-	int workers[STARPU_MAXOPENCLDEVS];
-	nworkers = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, STARPU_MAXOPENCLDEVS);
-	if (nworkers == 0)
-		return NULL;
-
-	int i;
-	for (i = 0; i < nworkers; i++)
-	{
-		cl_device_id device;
-		int devid = starpu_worker_get_devid(workers[i]);
-		starpu_opencl_get_device(devid, &device);
-		if (device == d->id.opencl_id)
-			break;
-	}
-
-	if (i == nworkers)
-		return NULL;
-
-	return _starpu_get_worker_struct(workers[i]);
-#else
-	unsigned nworkers = starpu_worker_get_count();
-	unsigned  workerid;
-	for (workerid = 0; workerid < nworkers; workerid++)
-	{
-		if (starpu_worker_get_type(workerid) == d->type)
-		{
-			struct _starpu_worker *worker;
-			worker = _starpu_get_worker_struct(workerid);
-			if (worker->devid == d->id.opencl_id)
-				return worker;
-		}
-	}
-
-	return NULL;
-#endif
-}
-
-int _starpu_opencl_driver_init(struct starpu_driver *d)
-{
-	struct _starpu_worker* args;
-	args = _starpu_opencl_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
 	int devid = args->devid;
 
 	_starpu_worker_start(args, _STARPU_FUT_OPENCL_KEY);
@@ -641,12 +596,8 @@ int _starpu_opencl_driver_init(struct starpu_driver *d)
 	return 0;
 }
 
-int _starpu_opencl_driver_run_once(struct starpu_driver *d)
+int _starpu_opencl_driver_run_once(struct _starpu_worker *args)
 {
-	struct _starpu_worker* args;
-	args = _starpu_opencl_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
-
 	int workerid = args->workerid;
 	unsigned memnode = args->memory_node;
 
@@ -700,14 +651,10 @@ int _starpu_opencl_driver_run_once(struct starpu_driver *d)
 	return 0;
 }
 
-int _starpu_opencl_driver_deinit(struct starpu_driver *d)
+int _starpu_opencl_driver_deinit(struct _starpu_worker *args)
 {
 	_STARPU_TRACE_WORKER_DEINIT_START;
 
-	struct _starpu_worker* args;
-	args = _starpu_opencl_get_worker_from_driver(d);
-	STARPU_ASSERT(args);
-
 	unsigned memnode = args->memory_node;
 
 	_starpu_handle_all_pending_node_data_requests(memnode);
@@ -732,27 +679,11 @@ int _starpu_opencl_driver_deinit(struct starpu_driver *d)
 void *_starpu_opencl_worker(void *arg)
 {
 	struct _starpu_worker* args = arg;
-#ifdef STARPU_USE_OPENCL
-	cl_device_id id;
-
-	starpu_opencl_get_device(args->devid, &id);
-	struct starpu_driver d =
-		{
-			.type         = STARPU_OPENCL_WORKER,
-			.id.opencl_id = id
-		};
-#else
-	struct starpu_driver d =
-		{
-			.type         = STARPU_OPENCL_WORKER,
-			.id.opencl_id = args->devid
-		};
-#endif
 
-	_starpu_opencl_driver_init(&d);
+	_starpu_opencl_driver_init(args);
 	while (_starpu_machine_is_running())
-		_starpu_opencl_driver_run_once(&d);
-	_starpu_opencl_driver_deinit(&d);
+		_starpu_opencl_driver_run_once(args);
+	_starpu_opencl_driver_deinit(args);
 
 	return NULL;
 }
@@ -867,30 +798,8 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 }
 
 #ifdef STARPU_USE_OPENCL
-int _starpu_run_opencl(struct starpu_driver *d)
+int _starpu_run_opencl(struct _starpu_worker *workerarg)
 {
-	STARPU_ASSERT(d && d->type == STARPU_OPENCL_WORKER);
-
-	int nworkers;
-	int workers[STARPU_MAXOPENCLDEVS];
-	nworkers = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, STARPU_MAXOPENCLDEVS);
-	if (nworkers == 0)
-		return -ENODEV;
-
-	int i;
-	for (i = 0; i < nworkers; i++)
-	{
-		cl_device_id device;
-		int devid = starpu_worker_get_devid(workers[i]);
-		starpu_opencl_get_device(devid, &device);
-		if (device == d->id.opencl_id)
-			break;
-	}
-
-	if (i == nworkers)
-		return -ENODEV;
-
-	struct _starpu_worker *workerarg = _starpu_get_worker_struct(i);
 	_STARPU_DEBUG("Running OpenCL %u from the application\n", workerarg->devid);
 
 	workerarg->set = NULL;
@@ -899,10 +808,6 @@ int _starpu_run_opencl(struct starpu_driver *d)
 	/* Let's go ! */
 	_starpu_opencl_worker(workerarg);
 
-	/* XXX: Should we wait for the driver to be ready, as it is done when
-	 * launching it the usual way ? Cf. the end of _starpu_launch_drivers()
-	 */
-
 	return 0;
 }
 #endif /* STARPU_USE_OPENCL */

+ 6 - 5
src/drivers/opencl/driver_opencl.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -81,16 +81,17 @@ void *_starpu_opencl_worker(void *);
 #endif
 
 #ifdef STARPU_USE_OPENCL
+struct _starpu_worker;
 extern
-int _starpu_run_opencl(struct starpu_driver *);
+int _starpu_run_opencl(struct _starpu_worker *);
 
 extern
-int _starpu_opencl_driver_init(struct starpu_driver *);
+int _starpu_opencl_driver_init(struct _starpu_worker *);
 
 extern
-int _starpu_opencl_driver_run_once(struct starpu_driver *);
+int _starpu_opencl_driver_run_once(struct _starpu_worker *);
 
 extern
-int _starpu_opencl_driver_deinit(struct starpu_driver *);
+int _starpu_opencl_driver_deinit(struct _starpu_worker *);
 #endif // STARPU_USE_OPENCL
 #endif //  __DRIVER_OPENCL_H__