Browse Source

Add starpu_worker_get_by_type and starpu_worker_get_by_devid

Samuel Thibault 12 years ago
parent
commit
9d2589a240

+ 4 - 1
ChangeLog

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2009-2013  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -114,6 +114,9 @@ Changes:
   * StarPU can now use poti to generate paje traces.
   * StarPU can now use poti to generate paje traces.
   * Rename scheduling policy "parallel greedy" to "parallel eager"
   * Rename scheduling policy "parallel greedy" to "parallel eager"
 
 
+Small features:
+  * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
+
 Small changes:
 Small changes:
   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is
   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is
 	still available for compatibility reasons.
 	still available for compatibility reasons.

+ 11 - 0
doc/chapters/basic-api.texi

@@ -286,6 +286,17 @@ chosen by the means of the @code{starpu_worker_get_count_by_type} function, or
 by passing a value greater or equal to @code{STARPU_NMAXWORKERS}.
 by passing a value greater or equal to @code{STARPU_NMAXWORKERS}.
 @end deftypefun
 @end deftypefun
 
 
+@deftypefun int starpu_worker_get_by_type ({enum starpu_archtype} @var{type}, int @var{n})
+This returns the identifier of the @var{n}-th worker that has the specified type
+@var{type}. If there are no such worker, -1 is returned.
+@end deftypefun
+
+@deftypefun int starpu_worker_get_by_devid ({enum starpu_archtype} @var{type}, int @var{devid})
+This returns the identifier of the worker that has the specified type
+@var{type} and devid @var{devid} (which may not be the n-th, if some devices are
+skipped for instance). If there are no such worker, -1 is returned.
+@end deftypefun
+
 @deftypefun int starpu_worker_get_devid (int @var{id})
 @deftypefun int starpu_worker_get_devid (int @var{id})
 This functions returns the device id of the given worker. The worker
 This functions returns the device id of the given worker. The worker
 should be identified with the value returned by the @code{starpu_worker_get_id} function. In the case of a
 should be identified with the value returned by the @code{starpu_worker_get_id} function. In the case of a

+ 15 - 0
doc/chapters/perf-optimization.texi

@@ -19,6 +19,7 @@ TODO: improve!
 * Task distribution vs Data transfer::
 * Task distribution vs Data transfer::
 * Data prefetch::
 * Data prefetch::
 * Power-based scheduling::
 * Power-based scheduling::
+* Forcing scheduling::
 * Profiling::
 * Profiling::
 * CUDA-specific optimizations::
 * CUDA-specific optimizations::
 * Performance debugging::
 * Performance debugging::
@@ -402,6 +403,20 @@ consumption for that series of tasks, divide it by a thousand, repeat for
 varying kinds of tasks and task sizes, and eventually feed StarPU
 varying kinds of tasks and task sizes, and eventually feed StarPU
 with these manual measurements through @code{starpu_perfmodel_update_history}.
 with these manual measurements through @code{starpu_perfmodel_update_history}.
 
 
+@node Static scheduling
+@section Static scheduling
+
+In some cases, one may want to force some scheduling, for instance force a given
+set of tasks to GPU0, another set to GPU1, etc. while letting some other tasks
+be scheduled on any other device. This can indeed be useful to guide StarPU into
+some work distribution, while still letting some degree of dynamism. For
+instance, to force execution of a task on CUDA0:
+
+@example
+task->execute_on_a_specific_worker = 1;
+task->worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0);
+@end example
+
 @node Profiling
 @node Profiling
 @section Profiling
 @section Profiling
 
 

+ 7 - 1
include/starpu_worker.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -79,6 +79,12 @@ int starpu_worker_get_count_by_type(enum starpu_archtype type);
  * equal to STARPU_NMAXWORKERS. */
  * equal to STARPU_NMAXWORKERS. */
 int starpu_worker_get_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize);
 int starpu_worker_get_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize);
 
 
+/* Return the identifier of the n-th worker of a specific type */
+int starpu_worker_get_by_type(enum starpu_archtype type, int num);
+
+/* Return the identifier of the worker devid of a specific type */
+int starpu_worker_get_by_devid(enum starpu_archtype type, int devid);
+
 /* StarPU associates a unique human readable string to each processing unit.
 /* StarPU associates a unique human readable string to each processing unit.
  * This function copies at most the "maxlen" first bytes of the unique
  * This function copies at most the "maxlen" first bytes of the unique
  * string associated to a worker identified by its identifier "id" into
  * string associated to a worker identified by its identifier "id" into

+ 7 - 11
src/core/perfmodel/perfmodel.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -224,7 +224,6 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 					    unsigned nimpl)
 					    unsigned nimpl)
 {
 {
 	unsigned i;
 	unsigned i;
-	int err;
 	double sum = 0.0;
 	double sum = 0.0;
 	int node;
 	int node;
 
 
@@ -235,25 +234,22 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 	 */
 	 */
 #ifdef STARPU_USE_CPU
 #ifdef STARPU_USE_CPU
 	int cpu_worker, cpu_node;
 	int cpu_worker, cpu_node;
-	err = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER,
+	cpu_worker = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0);
-					    &cpu_worker, 1);
+	if (cpu_worker == -1)
-	if (err != 1 && err != -ERANGE)
 		return 0.0;
 		return 0.0;
 	cpu_node = starpu_worker_get_memory_node(cpu_worker);
 	cpu_node = starpu_worker_get_memory_node(cpu_worker);
 #endif
 #endif
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	int cuda_worker, cuda_node;
 	int cuda_worker, cuda_node;
-	err = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER,
+	cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0);
-					    &cuda_worker, 1);
+	if (cuda_worker == -1)
-	if (err != 1 && err != -ERANGE)
 		return 0.0;
 		return 0.0;
 	cuda_node = starpu_worker_get_memory_node(cuda_worker);
 	cuda_node = starpu_worker_get_memory_node(cuda_worker);
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 	int opencl_worker, opencl_node;
 	int opencl_worker, opencl_node;
-	err = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER,
+	opencl_worker = starpu_worker_get_by_type(STARPU_OPENCL_WORKER, 0);
-					    &opencl_worker, 1);
+	if (opencl_worker == -1)
-	if (err != 1 && err != -ERANGE)
 		return 0.0;
 		return 0.0;
 
 
 	opencl_node = starpu_worker_get_memory_node(opencl_worker);
 	opencl_node = starpu_worker_get_memory_node(opencl_worker);

+ 34 - 0
src/core/workers.c

@@ -1243,6 +1243,40 @@ int starpu_worker_get_ids_by_type(enum starpu_archtype type, int *workerids, int
 	return cnt;
 	return cnt;
 }
 }
 
 
+int starpu_worker_get_by_type(enum starpu_archtype type, int num)
+{
+	unsigned nworkers = starpu_worker_get_count();
+
+	int cnt = 0;
+
+	unsigned id;
+	for (id = 0; id < nworkers; id++)
+	{
+		if (starpu_worker_get_type(id) == type)
+		{
+			if (num == cnt)
+				return id;
+			cnt++;
+		}
+	}
+
+	/* Not found */
+	return -1;
+}
+
+int starpu_worker_get_by_devid(enum starpu_archtype type, int devid)
+{
+	unsigned nworkers = starpu_worker_get_count();
+
+	unsigned id;
+	for (id = 0; id < nworkers; id++)
+		if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid)
+			return id;
+
+	/* Not found */
+	return -1;
+}
+
 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 {
 {
 	char *name = config.workers[id].name;
 	char *name = config.workers[id].name;

+ 4 - 7
src/drivers/cpu/driver_cpu.c

@@ -178,13 +178,10 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 static struct _starpu_worker*
 static struct _starpu_worker*
 _starpu_get_worker_from_driver(struct starpu_driver *d)
 _starpu_get_worker_from_driver(struct starpu_driver *d)
 {
 {
-	int workers[d->id.cpu_id + 1];
+	int n = starpu_worker_get_by_devid(STARPU_CPU_WORKER, d->id.cpu_id);
-	int nworkers;
+	if (n == -1)
-	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, d->id.cpu_id+1);
+		return NULL;
-	if (nworkers >= 0 && (unsigned) nworkers < d->id.cpu_id)
+	return _starpu_get_worker_struct(n);
-		return NULL; // No device was found.
-
-	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
 }
 }
 
 
 int _starpu_cpu_driver_init(struct starpu_driver *d)
 int _starpu_cpu_driver_init(struct starpu_driver *d)

+ 4 - 8
src/drivers/cuda/driver_cuda.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
@@ -629,15 +629,11 @@ int _starpu_run_cuda(struct starpu_driver *d)
 {
 {
 	STARPU_ASSERT(d && d->type == STARPU_CUDA_WORKER);
 	STARPU_ASSERT(d && d->type == STARPU_CUDA_WORKER);
 
 
-	int workers[d->id.cuda_id + 1];
+	int workerid = starpu_worker_get_by_devid(STARPU_CUDA_WORKER, d->id.cuda_id);
-	int nworkers;
+
-	nworkers = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, workers, d->id.cuda_id+1);
-	if (nworkers >= 0 && (unsigned) nworkers < d->id.cuda_id)
-		return -ENODEV;
-	
 	_STARPU_DEBUG("Running cuda %u from the application\n", d->id.cuda_id);
 	_STARPU_DEBUG("Running cuda %u from the application\n", d->id.cuda_id);
 
 
-	struct _starpu_worker *workerarg = _starpu_get_worker_struct(workers[d->id.cuda_id]);
+	struct _starpu_worker *workerarg = _starpu_get_worker_struct(workerid);
 
 
 	workerarg->set = NULL;
 	workerarg->set = NULL;
 	workerarg->worker_is_initialized = 0;
 	workerarg->worker_is_initialized = 0;