Browse Source

Add a field starpu_task::where and the macro STARPU_TASK_WHERE

The field starpu_task::where is similar to starpu_codelet::where and
allows to restrict where to execute a task.
Also add STARPU_TASK_WHERE to be used when calling
starpu_task_insert().
Nathalie Furmento 8 years ago
parent
commit
9f5cc1888c

+ 3 - 0
ChangeLog

@@ -43,6 +43,9 @@ Small features:
   * New function starpu_worker_display_names to display the names of
     all the workers of a specified type.
   * Arbiters now support concurrent read access.
+  * Add a field starpu_task::where similar to starpu_codelet::where
+    which allows to restrict where to execute a task. Also add
+    STARPU_TASK_WHERE to be used when calling starpu_task_insert().
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 16 - 12
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -56,33 +56,33 @@ essentially used for synchronization tasks.
 
 \def STARPU_CPU
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where
-to specify the codelet may be executed on a CPU processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where)
+to specify the codelet (or the task) may be executed on a CPU processing unit.
 
 \def STARPU_CUDA
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where
-to specify the codelet may be executed on a CUDA processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where)
+to specify the codelet (or the task) may be executed on a CUDA processing unit.
 
 \def STARPU_OPENCL
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a OpenCL processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a OpenCL processing unit.
 
 \def STARPU_MIC
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a MIC processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a MIC processing unit.
 
 \def STARPU_MPI_MS
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a MPI Slave processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a MPI Slave processing unit.
 
 \def STARPU_SCC
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on an SCC processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on an SCC processing unit.
 
 \def STARPU_MAIN_RAM
 \ingroup API_Codelet_And_Tasks
@@ -443,6 +443,10 @@ the configuration of a task allocated with starpu_task_create().
     the task. The access modes are now defined in the field
     starpu_codelet::modes.
 
+\var uint32_t starpu_task::where
+    When set, specifies where the task is allowed to be executed.
+    When unset, it takes the value of starpu_codelet::where.
+
 \var int starpu_task::nbuffers
     Specifies the number of buffers. This is only used when
     starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.

+ 2 - 0
include/fstarpu_mod.f90

@@ -44,6 +44,7 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER
         type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER
+        type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE
         type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG
         type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL
         type(c_ptr), bind(C) :: FSTARPU_FLOPS
@@ -2280,6 +2281,7 @@ module fstarpu_mod
                         FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR)
                         FSTARPU_EXECUTE_ON_WORKER       = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR)
                         FSTARPU_WORKER_ORDER    = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR)
+                        FSTARPU_EXECUTE_WHERE       = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR)
                         FSTARPU_HYPERVISOR_TAG  = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR)
                         FSTARPU_POSSIBLY_PARALLEL       = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR)
                         FSTARPU_FLOPS   = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR)

+ 1 - 0
include/starpu_task.h

@@ -136,6 +136,7 @@ struct starpu_task
 	const char *name;
 
 	struct starpu_codelet *cl;
+	int32_t where;
 
 	int nbuffers;
 

+ 10 - 9
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2010-2014, 2016  CNRS
+ * Copyright (C) 2010-2014, 2016, 2017  CNRS
  * Copyright (C) 2014       INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 
 /* NOTE: when adding a value here, please make sure to update both
  * src/util/starpu_task_insert_utils.c (in two places) and
- * mpi/src/starpu_mpi_task_insert.c */
+ * mpi/src/starpu_mpi_task_insert.c and mpi/src/starpu_mpi_task_insert_fortran.c */
 #define STARPU_MODE_SHIFT	17
 #define STARPU_VALUE		 (1<<STARPU_MODE_SHIFT)
 #define STARPU_CALLBACK		 (2<<STARPU_MODE_SHIFT)
@@ -54,13 +54,14 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_PROLOGUE_CALLBACK_POP   (16<<STARPU_MODE_SHIFT)
 #define STARPU_PROLOGUE_CALLBACK_POP_ARG (17<<STARPU_MODE_SHIFT)
 #define STARPU_EXECUTE_ON_WORKER (18<<STARPU_MODE_SHIFT)
-#define STARPU_TAG_ONLY          (19<<STARPU_MODE_SHIFT)
-#define STARPU_POSSIBLY_PARALLEL    (20<<STARPU_MODE_SHIFT)
-#define STARPU_WORKER_ORDER      (21<<STARPU_MODE_SHIFT)
-#define STARPU_NODE_SELECTION_POLICY (22<<STARPU_MODE_SHIFT)
-#define STARPU_NAME		 (23<<STARPU_MODE_SHIFT)
-#define STARPU_CL_ARGS		(24<<STARPU_MODE_SHIFT)
-#define STARPU_SHIFTED_MODE_MAX (25<<STARPU_MODE_SHIFT)
+#define STARPU_EXECUTE_WHERE     (19<<STARPU_MODE_SHIFT)
+#define STARPU_TAG_ONLY          (20<<STARPU_MODE_SHIFT)
+#define STARPU_POSSIBLY_PARALLEL    (21<<STARPU_MODE_SHIFT)
+#define STARPU_WORKER_ORDER      (22<<STARPU_MODE_SHIFT)
+#define STARPU_NODE_SELECTION_POLICY (23<<STARPU_MODE_SHIFT)
+#define STARPU_NAME		 (24<<STARPU_MODE_SHIFT)
+#define STARPU_CL_ARGS		(25<<STARPU_MODE_SHIFT)
+#define STARPU_SHIFTED_MODE_MAX (26<<STARPU_MODE_SHIFT)
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 6 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -386,6 +386,12 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
                 {
                         (void)va_arg(varg_list_copy, void *);
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			// the flag is decoded and set later when
+			// calling function _starpu_task_insert_create()
+			(void)va_arg(varg_list_copy, uint32_t);
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
 			// the flag is decoded and set later when

+ 5 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -241,6 +241,11 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			/* void* */
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			arg_i++;
+			/* int* */
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
 			arg_i++;

+ 3 - 8
src/core/jobs.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011, 2014, 2016  INRIA
  *
@@ -272,12 +272,6 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		0
 #endif
 		;
-	/* Read cl fields before releasing dependencies, for the case of a
-	 * switch_cl which is freed by data_unregister happening as soon as
-	 * the dependencies are released.
-	 */
-	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE;
-
 #ifdef STARPU_DEBUG
 	STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex);
 	_starpu_job_multilist_erase_all_submitted(&all_jobs_list, j);
@@ -345,6 +339,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * data as it's not linked to an actual worker */
 	/* control task should not execute post_exec_hook */
+	unsigned nowhere = task->where == STARPU_NOWHERE;
 	if(j->task_size == 1 && !nowhere && !j->internal
 #ifdef STARPU_OPENMP
 	/* If this is a continuation, we do not execute the post_exec_hook. The
@@ -711,7 +706,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 {
 	/* Check that the worker is able to execute the task ! */
 	STARPU_ASSERT(task && task->cl);
-	if (STARPU_UNLIKELY(!(worker->worker_mask & task->cl->where)))
+	if (STARPU_UNLIKELY(!(worker->worker_mask & task->where)))
 		return -ENODEV;
 
 	_starpu_worker_lock(worker->workerid);

+ 1 - 1
src/core/sched_policy.c

@@ -425,7 +425,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 	task->status = STARPU_TASK_READY;
 	STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1);
 	/* if the context does not have any workers save the tasks in a temp list */
-	if ((task->cl != NULL && task->cl->where != STARPU_NOWHERE) && (!sched_ctx->is_initial_sched))
+	if ((task->cl != NULL && task->where != STARPU_NOWHERE) && (!sched_ctx->is_initial_sched))
 	{
 		/*if there are workers in the ctx that are not able to execute tasks
 		  we consider the ctx empty */

+ 3 - 0
src/core/task.c

@@ -81,6 +81,7 @@ void starpu_task_init(struct starpu_task *task)
 	memset(task, 0, sizeof(struct starpu_task));
 
 	task->sequential_consistency = 1;
+	task->where = -1;
 
 	/* Now we can initialise fields which recquire custom value */
 #if STARPU_DEFAULT_PRIO != 0
@@ -554,6 +555,8 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
 	_starpu_task_check_deprecated_fields(task);
 	_starpu_codelet_check_deprecated_fields(task->cl);
+	if (task->where== -1 && task->cl)
+		task->where = task->cl->where;
 
 	if (task->cl)
 	{

+ 12 - 12
src/core/workers.c

@@ -173,7 +173,7 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 uint32_t _starpu_worker_exists(struct starpu_task *task)
 {
 	_starpu_codelet_check_deprecated_fields(task->cl);
-	if (task->cl->where == STARPU_NOWHERE)
+	if (task->where == STARPU_NOWHERE)
 		return 1;
 
 	/* if the task belongs to the init context we can
@@ -182,7 +182,7 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 	   and verify if it exists a worker able to exec the task */
 	if(task->sched_ctx == 0)
 	{
-		if (!(task->cl->where & _starpu_config.worker_mask))
+		if (!(task->where & _starpu_config.worker_mask))
 			return 0;
 
 		if (!task->cl->can_execute)
@@ -190,32 +190,32 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 	}
 
 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
-	if ((task->cl->where & STARPU_CPU) &&
+	if ((task->where & STARPU_CPU) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
 		return 1;
 #endif
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
-	if ((task->cl->where & STARPU_CUDA) &&
+	if ((task->where & STARPU_CUDA) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
 		return 1;
 #endif
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
-	if ((task->cl->where & STARPU_OPENCL) &&
+	if ((task->where & STARPU_OPENCL) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
 		return 1;
 #endif
 #ifdef STARPU_USE_MIC
-	if ((task->cl->where & STARPU_MIC) &&
+	if ((task->where & STARPU_MIC) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
 		return 1;
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-	if ((task->cl->where & STARPU_MPI_MS) &&
+	if ((task->where & STARPU_MPI_MS) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_MPI_MS_WORKER))
 		return 1;
 #endif
 #ifdef STARPU_USE_SCC
-	if ((task->cl->where & STARPU_SCC) &&
+	if ((task->where & STARPU_SCC) &&
 	    _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
 		return 1;
 #endif
@@ -320,7 +320,7 @@ int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task,
 		return 0;
 
 	/* TODO: check that the task operand sizes will fit on that device */
-	return (task->cl->where & _starpu_config.workers[workerid].worker_mask) &&
+	return (task->where & _starpu_config.workers[workerid].worker_mask) &&
 		_starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) &&
 		(!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
 }
@@ -338,7 +338,7 @@ int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *t
 	struct starpu_codelet *cl;
 	/* TODO: check that the task operand sizes will fit on that device */
 	cl = task->cl;
-	if (!(cl->where & _starpu_config.workers[workerid].worker_mask)) return 0;
+	if (!(task->where & _starpu_config.workers[workerid].worker_mask)) return 0;
 
 	mask = 0;
 	arch = _starpu_config.workers[workerid].arch;
@@ -379,7 +379,7 @@ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_t
 	struct starpu_codelet *cl;
 	/* TODO: check that the task operand sizes will fit on that device */
 	cl = task->cl;
-	if (!(cl->where & _starpu_config.workers[workerid].worker_mask)) return 0;
+	if (!(task->where & _starpu_config.workers[workerid].worker_mask)) return 0;
 
 	arch = _starpu_config.workers[workerid].arch;
 	if (!task->cl->can_execute)
@@ -418,7 +418,7 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 	/* Is this a parallel worker ? */
 	if (workerid < nworkers)
 	{
-		return !!((task->cl->where & _starpu_config.workers[workerid].worker_mask) &&
+		return !!((task->where & _starpu_config.workers[workerid].worker_mask) &&
 				_starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) &&
 				(!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
 	}

+ 1 - 1
src/datawizard/coherency.c

@@ -1150,7 +1150,7 @@ void __starpu_push_task_output(struct _starpu_job *j)
 		starpu_data_handle_t handle = descrs[index].handle;
 		enum starpu_data_access_mode mode = descrs[index].mode;
 		int node = descrs[index].node;
-		if (node == -1 && task->cl->where != STARPU_NOWHERE)
+		if (node == -1 && task->where != STARPU_NOWHERE)
 			node = local_memory_node;
 
 		struct _starpu_data_replicate *local_replicate = NULL;

+ 4 - 4
src/sched_policies/heteroprio.c

@@ -44,7 +44,7 @@
  * All the tasks stored in the fifo should be computable by the arch
  * in valid_archs.
  * For example if valid_archs = (STARPU_CPU|STARPU_CUDA)
- * Then task->task->cl->where should be at least (STARPU_CPU|STARPU_CUDA)
+ * Then task->task->where should be at least (STARPU_CPU|STARPU_CUDA)
  */
 struct _heteroprio_bucket
 {
@@ -379,7 +379,7 @@ static int push_task_heteroprio_policy(struct starpu_task *task)
 	struct _heteroprio_bucket* bucket = &hp->buckets[task->priority];
 	/* Ensure that any worker that check that list can compute the task */
 	STARPU_ASSERT_MSG(bucket->valid_archs, "The bucket %d does not have any archs\n", task->priority);
-	STARPU_ASSERT(((bucket->valid_archs ^ task->cl->where) & bucket->valid_archs) == 0);
+	STARPU_ASSERT(((bucket->valid_archs ^ task->where) & bucket->valid_archs) == 0);
 
 	/* save the task */
 	_starpu_fifo_push_back_task(bucket->tasks_queue,task);
@@ -388,7 +388,7 @@ static int push_task_heteroprio_policy(struct starpu_task *task)
 	unsigned arch_index;
 	for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index)
 	{
-		/* We test the archs on the bucket and not on task->cl->where since it is restrictive */
+		/* We test the archs on the bucket and not on task->where since it is restrictive */
 		if(bucket->valid_archs & starpu_heteroprio_types_to_arch[arch_index])
 			hp->nb_remaining_tasks_per_arch_index[arch_index] += 1;
 	}
@@ -521,7 +521,7 @@ static struct starpu_task *pop_task_heteroprio_policy(unsigned sched_ctx_id)
 
 				for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index)
 				{
-					/* We test the archs on the bucket and not on task->cl->where since it is restrictive */
+					/* We test the archs on the bucket and not on task->where since it is restrictive */
 					if(bucket->valid_archs & starpu_heteroprio_types_to_arch[arch_index])
 					{
 						hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1;

+ 2 - 0
src/util/fstarpu.c

@@ -45,6 +45,7 @@ static const intptr_t fstarpu_prologue_callback_pop_arg	= STARPU_PROLOGUE_CALLBA
 static const intptr_t fstarpu_priority	= STARPU_PRIORITY;
 static const intptr_t fstarpu_execute_on_node	= STARPU_EXECUTE_ON_NODE;
 static const intptr_t fstarpu_execute_on_data	= STARPU_EXECUTE_ON_DATA;
+static const intptr_t fstarpu_execute_where	= STARPU_EXECUTE_WHERE;
 static const intptr_t fstarpu_execute_on_worker	= STARPU_EXECUTE_ON_WORKER;
 static const intptr_t fstarpu_worker_order	= STARPU_WORKER_ORDER;
 static const intptr_t fstarpu_hypervisor_tag	= STARPU_HYPERVISOR_TAG;
@@ -115,6 +116,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if	(!strcmp(s, "FSTARPU_PRIORITY"))	{ return fstarpu_priority; }
 	else if	(!strcmp(s, "FSTARPU_EXECUTE_ON_NODE"))	{ return fstarpu_execute_on_node; }
 	else if	(!strcmp(s, "FSTARPU_EXECUTE_ON_DATA"))	{ return fstarpu_execute_on_data; }
+	else if	(!strcmp(s, "FSTARPU_EXECUTE_WHERE"))	{ return fstarpu_execute_where; }
 	else if	(!strcmp(s, "FSTARPU_EXECUTE_ON_WORKER"))	{ return fstarpu_execute_on_worker; }
 	else if	(!strcmp(s, "FSTARPU_WORKER_ORDER"))	{ return fstarpu_worker_order; }
 	else if	(!strcmp(s, "FSTARPU_HYPERVISOR_TAG"))	{ return fstarpu_hypervisor_tag; }

+ 22 - 2
src/util/starpu_task_insert_utils.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011, 2013-2016   Université Bordeaux
- * Copyright (C) 2011-2016         CNRS
+ * Copyright (C) 2011-2017         CNRS
  * Copyright (C) 2011, 2014        INRIA
  * Copyright (C) 2016 Inria
  *
@@ -119,9 +119,13 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			(void)va_arg(varg_list, uint32_t);
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
-			va_arg(varg_list, int);
+			(void)va_arg(varg_list, int);
 		}
 		else if (arg_type==STARPU_WORKER_ORDER)
 		{
@@ -382,6 +386,11 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **t
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			(*task)->where = va_arg(varg_list, uint32_t);
+			fprintf(stderr, "where %d (cpu %d)\n", (*task)->where, STARPU_CPU);
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
 			int worker = va_arg(varg_list, int);
@@ -592,6 +601,17 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 			arg_i++;
 			(void)arglist[arg_i];
 		}
+		else if (arg_type == STARPU_EXECUTE_WHERE)
+		{
+			assert(0);
+			arg_i++;
+			int worker = *(int *)arglist[arg_i];
+			if (worker != -1)
+			{
+				(*task)->workerid = worker;
+				(*task)->execute_on_a_specific_worker = 1;
+			}
+		}
 		else if (arg_type == STARPU_EXECUTE_ON_WORKER)
 		{
 			arg_i++;

+ 8 - 0
tests/Makefile.am

@@ -175,6 +175,7 @@ myPROGRAMS +=				\
 	main/insert_task_dyn_handles		\
 	main/insert_task_array			\
 	main/insert_task_many			\
+	main/insert_task_where			\
 	main/multithreaded			\
 	main/starpu_task_bundle			\
 	main/starpu_task_wait_for_all		\
@@ -572,6 +573,13 @@ datawizard_test_arbiter_SOURCES =	\
 
 main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations
 
+main_insert_task_where_SOURCES =		\
+	main/insert_task_where.c
+if STARPU_USE_CUDA
+main_insert_task_where_SOURCES +=		\
+	main/increment.cu
+endif
+
 main_subgraph_repeat_SOURCES =		\
 	main/subgraph_repeat.c
 if STARPU_USE_CUDA

+ 79 - 0
tests/main/insert_task_where.c

@@ -0,0 +1,79 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012, 2013, 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu.h>
+#include "../helper.h"
+
+extern void cuda_host_increment(void *descr[], void *_args);
+
+void cpu_increment(void *descr[], void *arg STARPU_ATTRIBUTE_UNUSED)
+{
+	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	(*var) += 2;
+}
+
+static struct starpu_codelet my_codelet =
+{
+	.cpu_funcs = {cpu_increment},
+	.cpu_funcs_name = {"cpu_increment"},
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {cuda_host_increment},
+	.cuda_flags = {STARPU_CUDA_ASYNC},
+#endif
+	.modes = { STARPU_RW },
+	.nbuffers = 1
+};
+
+int main(int argc, char **argv)
+{
+        starpu_data_handle_t data_handles[2];
+	int x = 12;
+	int y = 12;
+	int ret, ret1, ret2;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
+	starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
+
+	ret1 = starpu_task_insert(&my_codelet,
+				  STARPU_EXECUTE_WHERE, STARPU_CPU,
+				  STARPU_RW, data_handles[0],
+				  0);
+	if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert");
+
+	ret2 = starpu_task_insert(&my_codelet,
+				  STARPU_EXECUTE_WHERE, STARPU_CUDA,
+				  STARPU_RW, data_handles[1],
+				  0);
+	if (ret2 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret2, "starpu_task_insert");
+
+	starpu_data_unregister(data_handles[0]);
+	starpu_data_unregister(data_handles[1]);
+
+	starpu_shutdown();
+
+	if (ret1 != -ENODEV && x != 14) ret = 1;
+	if (ret2 != -ENODEV && y != 13) ret = 1;
+
+	FPRINTF(stderr, "Value x = %d (expected 14)\n", x);
+	FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
+
+	STARPU_RETURN(ret);
+}