Sfoglia il codice sorgente

Allow to have a dynamically allocated number of buffers per task, and
so overwrite the value defined --enable-maxbuffers=XXX

Nathalie Furmento 12 anni fa
parent
commit
097a832e3c

+ 2 - 0
ChangeLog

@@ -111,6 +111,8 @@ New features:
     pthread API. It is provided with 2 implementations: a pthread one
     pthread API. It is provided with 2 implementations: a pthread one
     and a Simgrid one. Applications using StarPU and wishing to use
     and a Simgrid one. Applications using StarPU and wishing to use
     the Simgrid StarPU features should use it.
     the Simgrid StarPU features should use it.
+  * Allow to have a dynamically allocated number of buffers per task,
+    and so overwrite the value defined --enable-maxbuffers=XXX
 
 
 Small features:
 Small features:
   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid

+ 54 - 0
doc/chapters/advanced-examples.texi

@@ -23,6 +23,7 @@
 * Defining a New Scheduling Policy::
 * Defining a New Scheduling Policy::
 * On-GPU rendering::
 * On-GPU rendering::
 * Defining a New Data Interface::
 * Defining a New Data Interface::
+* Setting the Data Handles for a Task::
 * More examples::               More examples shipped with StarPU
 * More examples::               More examples shipped with StarPU
 @end menu
 @end menu
 
 
@@ -1264,6 +1265,59 @@ void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args
 
 
 The whole code for this complex data interface is available in the
 The whole code for this complex data interface is available in the
 directory @code{examples/interface/}.
 directory @code{examples/interface/}.
+
+@node Setting the Data Handles for a Task
+@section Setting the Data Handles for a Task
+
+The number of data a task can manage is fixed by the
+@code{STARPU_NMAXBUFS} which has a default value which can be changed
+through the configure option @code{--enable-maxbuffers} (see
+@ref{--enable-maxbuffers}).
+
+However, it is possible to define tasks managing more data by using
+the field @code{dyn_handles} when defining a task and the field
+@code{dyn_modes} when defining the corresponding codelet.
+
+@c modifier la doc pour starpu_task et starpu_codelet
+
+@cartouche
+@smallexample
+struct starpu_codelet dummy_big_cl =
+@{
+	.cuda_funcs = @{dummy_big_kernel, NULL@},
+	.opencl_funcs = @{dummy_big_kernel, NULL@},
+	.cpu_funcs = @{dummy_big_kernel, NULL@},
+	.nbuffers = STARPU_NMAXBUFS+1
+@};
+
+task = starpu_task_create();
+task->cl = &dummy_big_cl;
+task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t));
+for(i=0 ; i<task->cl->nbuffers ; i++)
+@{
+	task->dyn_handles[i] = handle;
+@}
+starpu_task_submit(task);
+@end smallexample
+@end cartouche
+
+@cartouche
+@smallexample
+starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
+for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
+@{
+	handles[i] = handle;
+@}
+starpu_insert_task(&dummy_big_cl,
+        	 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
+		 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
+		 0);
+@end smallexample
+@end cartouche
+
+The whole code for this complex data interface is available in the
+directory @code{examples/basic_examples/dynamic_handles.c}.
+
 @node More examples
 @node More examples
 @section More examples
 @section More examples
 
 

+ 26 - 0
doc/chapters/api.texi

@@ -1897,6 +1897,17 @@ exceed @code{STARPU_NMAXBUFS}.
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 option when configuring StarPU.
 option when configuring StarPU.
 
 
+@item @code{enum starpu_access_mode *dyn_modes}
+Is an array of @code{enum starpu_access_mode}. It describes the
+required access modes to the data neeeded by the codelet (e.g.
+@code{STARPU_RW}). The number of entries in this array must be
+specified in the @code{nbuffers} field (defined above).
+This field should be used for codelets having a number of datas
+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
+for a Task}).
+When defining a codelet, one should either define this field or the
+field @code{modes} defined above. 
+
 @item @code{struct starpu_perfmodel *model} (optional)
 @item @code{struct starpu_perfmodel *model} (optional)
 This is a pointer to the task duration performance model associated to this
 This is a pointer to the task duration performance model associated to this
 codelet. This optional field is ignored when set to @code{NULL} or
 codelet. This optional field is ignored when set to @code{NULL} or
@@ -1982,10 +1993,25 @@ of entries in this array must be specified in the @code{nbuffers} field of the
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 option when configuring StarPU.
 option when configuring StarPU.
 
 
+@item @code{starpu_data_handle_t *dyn_handles}
+Is an array of @code{starpu_data_handle_t}. It specifies the handles
+to the different pieces of data accessed by the task. The number
+of entries in this array must be specified in the @code{nbuffers} field of the
+@code{struct starpu_codelet} structure.
+This field should be used for tasks having a number of datas
+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
+for a Task}).
+When defining a task, one should either define this field or the
+field @code{handles} defined above.
+
 @item @code{void *interfaces[STARPU_NMAXBUFS]}
 @item @code{void *interfaces[STARPU_NMAXBUFS]}
 The actual data pointers to the memory node where execution will happen, managed
 The actual data pointers to the memory node where execution will happen, managed
 by the DSM.
 by the DSM.
 
 
+@item @code{void **dyn_interfaces}
+The actual data pointers to the memory node where execution will happen, managed
+by the DSM. Is used when the field @code{dyn_handles} is defined.
+
 @item @code{void *cl_arg} (optional; default: @code{NULL})
 @item @code{void *cl_arg} (optional; default: @code{NULL})
 This pointer is passed to the codelet through the second argument
 This pointer is passed to the codelet through the second argument
 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).

+ 1 - 0
doc/chapters/configuration.texi

@@ -234,6 +234,7 @@ Enable gathering of various data statistics (@pxref{Data statistics}).
 @end defvr
 @end defvr
 
 
 @defvr {Configure option} --enable-maxbuffers
 @defvr {Configure option} --enable-maxbuffers
+@anchor{--enable-maxbuffers}
 Define the maximum number of buffers that tasks will be able to take
 Define the maximum number of buffers that tasks will be able to take
 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
 @end defvr
 @end defvr

+ 7 - 1
include/starpu_task.h

@@ -96,6 +96,7 @@ struct starpu_codelet
 	unsigned nbuffers;
 	unsigned nbuffers;
 	/* which are the access modes for these buffers */
 	/* which are the access modes for these buffers */
 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
+	enum starpu_access_mode *dyn_modes;
 
 
 	/* performance model of the codelet */
 	/* performance model of the codelet */
 	struct starpu_perfmodel *model;
 	struct starpu_perfmodel *model;
@@ -120,6 +121,9 @@ struct starpu_task
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
 
 
+	starpu_data_handle_t *dyn_handles;
+	void **dyn_interfaces;
+
 	/* arguments not managed by the DSM are given as a buffer */
 	/* arguments not managed by the DSM are given as a buffer */
 	void *cl_arg;
 	void *cl_arg;
 	/* in case the argument buffer has to be uploaded explicitely */
 	/* in case the argument buffer has to be uploaded explicitely */
@@ -240,7 +244,9 @@ struct starpu_task
 	.sched_ctx = 0,					\
 	.sched_ctx = 0,					\
 	.hypervisor_tag = 0,				\
 	.hypervisor_tag = 0,				\
 	.flops = 0.0,					\
 	.flops = 0.0,					\
-		.scheduled = 0				\
+	.scheduled = 0,					\
+	.dyn_handles = NULL,				\
+	.dyn_interfaces = NULL				\
 }
 }
 
 
 /*
 /*

+ 4 - 0
mpi/src/starpu_mpi_insert_task.c

@@ -596,6 +596,10 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
 		va_start(varg_list, codelet);
 		va_start(varg_list, codelet);
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
+		if (codelet->nbuffers > STARPU_NMAXBUFS)
+		{
+			task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
+		}
 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
 	}
 	}

+ 18 - 11
src/core/dependencies/data_concurrency.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -192,8 +192,8 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 {
 {
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	 * which is a sorted copy of it. */
 	 * which is a sorted copy of it. */
-	starpu_data_handle_t handle = j->ordered_buffers[buffer_index].handle;
-	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
+	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
+	enum starpu_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
 
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 }
 }
@@ -205,11 +205,16 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 	unsigned nbuffers = j->task->cl->nbuffers;
 	unsigned nbuffers = j->task->cl->nbuffers;
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	{
 	{
-		if (buf && j->ordered_buffers[buf-1].handle == j->ordered_buffers[buf].handle)
-			/* We have already requested this data, skip it. This
-			 * depends on ordering putting writes before reads, see
-			 * _starpu_compar_handles.  */
-			continue;
+		if (buf)
+		{
+			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf-1);
+			starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf);
+			if (handle_m1 == handle)
+				/* We have already requested this data, skip it. This
+				 * depends on ordering putting writes before reads, see
+				 * _starpu_compar_handles.  */
+				continue;
+		}
 
 
                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
                 if (attempt_to_submit_data_request_from_job(j, buf))
                 if (attempt_to_submit_data_request_from_job(j, buf))
@@ -238,11 +243,13 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 	unsigned i;
 	unsigned i;
 	for (i=0 ; i<cl->nbuffers ; i++)
 	for (i=0 ; i<cl->nbuffers ; i++)
 	{
 	{
-		j->ordered_buffers[i].handle = j->task->handles[i];
-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(j->task, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(j->task->cl, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 	}
 	}
 
 
-	_starpu_sort_task_handles(j->ordered_buffers, cl->nbuffers);
+	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), cl->nbuffers);
 
 
 	return _submit_job_enforce_data_deps(j, 0);
 	return _submit_job_enforce_data_deps(j, 0);
 }
 }

+ 3 - 3
src/core/dependencies/implicit_data_deps.c

@@ -336,8 +336,8 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 	unsigned buffer;
 	unsigned buffer;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[buffer];
-		enum starpu_access_mode mode = task->cl->modes[buffer];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, buffer);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(task->cl, buffer);
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
 
 
 		/* Scratch memory does not introduce any deps */
 		/* Scratch memory does not introduce any deps */
@@ -457,7 +457,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 
 
 	if (!task->cl)
 	if (!task->cl)
 		return;
 		return;

+ 14 - 3
src/core/jobs.c

@@ -52,6 +52,9 @@ struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_tas
 	 * everywhere */
 	 * everywhere */
 	memset(job, 0, sizeof(*job));
 	memset(job, 0, sizeof(*job));
 
 
+	if (task->dyn_handles)
+	     job->dyn_ordered_buffers = malloc(task->cl->nbuffers * sizeof(struct starpu_buffer_descr));
+
 	job->task = task;
 	job->task = task;
 
 
 #ifndef STARPU_USE_FXT
 #ifndef STARPU_USE_FXT
@@ -104,6 +107,11 @@ void _starpu_job_destroy(struct _starpu_job *j)
 	}
 	}
 
 
 	_starpu_cg_list_deinit(&j->job_successors);
 	_starpu_cg_list_deinit(&j->job_successors);
+	if (j->dyn_ordered_buffers)
+	{
+	     free(j->dyn_ordered_buffers);
+	     j->dyn_ordered_buffers = NULL;
+	}
 
 
 	_starpu_job_delete(j);
 	_starpu_job_delete(j);
 }
 }
@@ -149,8 +157,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	int i;
 	int i;
 	size_t data_size = 0;
 	size_t data_size = 0;
 	for(i = 0; i < STARPU_NMAXBUFS; i++)
 	for(i = 0; i < STARPU_NMAXBUFS; i++)
-		if(task->handles[i] != NULL)
-			data_size += _starpu_data_get_size(task->handles[i]);
+	{
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
+		if (handle != NULL)
+			data_size += _starpu_data_get_size(handle);
+	}
 #endif //STARPU_USE_SC_HYPERVISOR
 #endif //STARPU_USE_SC_HYPERVISOR
 
 
 	/* We release handle reference count */
 	/* We release handle reference count */
@@ -159,7 +170,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		unsigned i;
 		unsigned i;
 		for (i=0; i<task->cl->nbuffers; i++)
 		for (i=0; i<task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);
 			_starpu_spin_lock(&handle->header_lock);
 			handle->busy_count--;
 			handle->busy_count--;
 			if (!_starpu_data_check_not_busy(handle))
 			if (!_starpu_data_check_not_busy(handle))

+ 10 - 0
src/core/jobs.h

@@ -70,6 +70,7 @@ LIST_TYPE(_starpu_job,
 	 * the task so that we always grab the rw-lock associated to the
 	 * the task so that we always grab the rw-lock associated to the
 	 * handles in the same order. */
 	 * handles in the same order. */
 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
+	struct starpu_buffer_descr *dyn_ordered_buffers;
 
 
 	/* If a tag is associated to the job, this points to the internal data
 	/* If a tag is associated to the job, this points to the internal data
 	 * structure that describes the tag status. */
 	 * structure that describes the tag status. */
@@ -172,4 +173,13 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
  * enforce a FIFO ordering. */
  * enforce a FIFO ordering. */
 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
 
 
+#define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
+#define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode)
+
+#define _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(job, handle, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].handle = (handle); else job->ordered_buffers[i].handle = (handle);} while(0)
+#define _STARPU_JOB_SET_ORDERED_BUFFER_MODE(job, mode, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].mode = mode; else job->ordered_buffers[i].mode = mode;} while(0)
+
+#define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0)
+#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) (job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : job->ordered_buffers
+
 #endif // __JOBS_H__
 #endif // __JOBS_H__

+ 5 - 5
src/core/perfmodel/perfmodel.c

@@ -227,7 +227,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 		struct starpu_task *conversion_task;
 		struct starpu_task *conversion_task;
 
 
-		handle = task->handles[i];
+		handle = _STARPU_TASK_GET_HANDLE(task, i);
 		if (!_starpu_data_is_multiformat_handle(handle))
 		if (!_starpu_data_is_multiformat_handle(handle))
 			continue;
 			continue;
 
 
@@ -287,8 +287,8 @@ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct star
 
 
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[buffer];
-		enum starpu_access_mode mode = task->cl->modes[buffer];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, buffer);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(task->cl, buffer);
 
 
 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
 	}
 	}
@@ -375,8 +375,8 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 			unsigned b;
 			unsigned b;
 			for (b = 0; b < task->cl->nbuffers; b++)
 			for (b = 0; b < task->cl->nbuffers; b++)
 			{
 			{
-				starpu_data_handle_t handle = task->handles[b];
-				enum starpu_access_mode mode = task->cl->modes[b];
+				starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, b);
+				enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(task->cl, b);
 
 
 				if (!(mode & STARPU_R))
 				if (!(mode & STARPU_R))
 					continue;
 					continue;

+ 2 - 2
src/core/perfmodel/perfmodel_history.c

@@ -72,7 +72,7 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_per
 		unsigned buffer;
 		unsigned buffer;
 		for (buffer = 0; buffer < nbuffers; buffer++)
 		for (buffer = 0; buffer < nbuffers; buffer++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[buffer];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, buffer);
 			size += _starpu_data_get_size(handle);
 			size += _starpu_data_get_size(handle);
 		}
 		}
 		return size;
 		return size;
@@ -1267,7 +1267,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 
 		for (i = 0; i < task->cl->nbuffers; i++)
 		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
 
 
 			STARPU_ASSERT(handle->ops);
 			STARPU_ASSERT(handle->ops);
 			STARPU_ASSERT(handle->ops->display);
 			STARPU_ASSERT(handle->ops->display);

+ 8 - 5
src/core/sched_policy.c

@@ -236,7 +236,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 				struct starpu_task *conversion_task;
 				struct starpu_task *conversion_task;
 				starpu_data_handle_t handle;
 				starpu_data_handle_t handle;
 
 
-				handle = task->handles[i];
+				handle = _STARPU_TASK_GET_HANDLE(task, i);
 				if (!_starpu_handle_needs_conversion_task(handle, node))
 				if (!_starpu_handle_needs_conversion_task(handle, node))
 					continue;
 					continue;
 
 
@@ -249,7 +249,10 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 			}
 			}
 
 
 			for (i = 0; i < task->cl->nbuffers; i++)
 			for (i = 0; i < task->cl->nbuffers; i++)
-				task->handles[i]->mf_node = node;
+			{
+				starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
+				handle->mf_node = node;
+			}
 		}
 		}
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 
 
@@ -447,7 +450,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
 
 	conversion_task = starpu_task_create();
 	conversion_task = starpu_task_create();
 	conversion_task->synchronous = 0;
 	conversion_task->synchronous = 0;
-	conversion_task->handles[0] = handle;
+	_STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
 
 
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 	/* The node does not really matter here */
 	/* The node does not really matter here */
@@ -510,7 +513,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 		STARPU_ABORT();
 		STARPU_ABORT();
 	}
 	}
 
 
-	conversion_task->cl->modes[0] = STARPU_RW;
+	_STARPU_CODELET_SET_MODE(conversion_task->cl, STARPU_RW, 0);
 	return conversion_task;
 	return conversion_task;
 }
 }
 
 
@@ -663,7 +666,7 @@ pick:
 		struct starpu_task *conversion_task;
 		struct starpu_task *conversion_task;
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 
 
-		handle = task->handles[i];
+		handle = _STARPU_TASK_GET_HANDLE(task, i);
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 			continue;
 			continue;
 		conversion_task = _starpu_create_conversion_task(handle, node);
 		conversion_task = _starpu_create_conversion_task(handle, node);

+ 34 - 10
src/core/task.c

@@ -77,6 +77,11 @@ void starpu_task_init(struct starpu_task *task)
 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
 
 
 	task->flops = 0.0;
 	task->flops = 0.0;
+
+	task->scheduled = 0;
+
+	task->dyn_handles = NULL;
+	task->dyn_interfaces = NULL;
 }
 }
 
 
 /* Free all the ressources allocated for a task, without deallocating the task
 /* Free all the ressources allocated for a task, without deallocating the task
@@ -99,6 +104,14 @@ void starpu_task_clean(struct starpu_task *task)
 	if (bundle)
 	if (bundle)
 		starpu_task_bundle_remove(bundle, task);
 		starpu_task_bundle_remove(bundle, task);
 
 
+	if (task->dyn_handles)
+	{
+		free(task->dyn_handles);
+		task->dyn_handles = NULL;
+		free(task->dyn_interfaces);
+		task->dyn_interfaces = NULL;
+	}
+
 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
 
 
 	if (j)
 	if (j)
@@ -229,7 +242,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 		unsigned i;
 		unsigned i;
 		for (i=0; i<task->cl->nbuffers; i++)
 		for (i=0; i<task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);
 			_starpu_spin_lock(&handle->header_lock);
 			handle->busy_count++;
 			handle->busy_count++;
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
@@ -393,16 +406,23 @@ int starpu_task_submit(struct starpu_task *task)
 		unsigned i;
 		unsigned i;
 
 
 		/* Check buffers */
 		/* Check buffers */
-		STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
+		if (task->dyn_handles == NULL)
+			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
+
+		if (task->dyn_handles)
+		{
+			task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
+		}
+
 		for (i = 0; i < task->cl->nbuffers; i++)
 		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
 			/* Make sure handles are not partitioned */
 			/* Make sure handles are not partitioned */
 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
 			/* Provide the home interface for now if any,
 			/* Provide the home interface for now if any,
 			 * for can_execute hooks */
 			 * for can_execute hooks */
 			if (handle->home_node != -1)
 			if (handle->home_node != -1)
-				task->interfaces[i] = starpu_data_get_interface_on_node(task->handles[i], handle->home_node);
+				_STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
 		}
 		}
 
 
 		/* Check the type of worker(s) required by the task exist */
 		/* Check the type of worker(s) required by the task exist */
@@ -526,8 +546,10 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 		unsigned i;
 		unsigned i;
 		for (i=0 ; i<task->cl->nbuffers ; i++)
 		for (i=0 ; i<task->cl->nbuffers ; i++)
 		{
 		{
-			j->ordered_buffers[i].handle = j->task->handles[i];
-			j->ordered_buffers[i].mode = j->task->cl->modes[i];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(j->task, i);
+			_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+			enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(j->task->cl, i);
+			_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 		}
 		}
 	}
 	}
 
 
@@ -559,7 +581,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	unsigned i;
 	unsigned i;
 	for (i=0; i<task->cl->nbuffers; i++)
 	for (i=0; i<task->cl->nbuffers; i++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[i];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, i);
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
 		handle->busy_count++;
 		handle->busy_count++;
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
@@ -574,8 +596,10 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
 
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	{
 	{
-		j->ordered_buffers[i].handle = j->task->handles[i];
-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(j->task, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(j->task->cl, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 	}
 	}
 
 
         _STARPU_LOG_IN();
         _STARPU_LOG_IN();
@@ -811,7 +835,7 @@ _starpu_task_uses_multiformat_handles(struct starpu_task *task)
 	unsigned i;
 	unsigned i;
 	for (i = 0; i < task->cl->nbuffers; i++)
 	for (i = 0; i < task->cl->nbuffers; i++)
 	{
 	{
-		if (_starpu_data_is_multiformat_handle(task->handles[i]))
+		if (_starpu_data_is_multiformat_handle(_STARPU_TASK_GET_HANDLE(task, i)))
 			return 1;
 			return 1;
 	}
 	}
 
 

+ 9 - 0
src/core/task.h

@@ -73,4 +73,13 @@ starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet
 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 
 
+#define _STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
+#define _STARPU_TASK_SET_HANDLE(task, handle, i) do { if (task->dyn_handles) task->dyn_handles[i] = handle; else task->handles[i] = handle; } while(0)
+
+#define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
+#define _STARPU_TASK_GET_INTERFACES(task) (task->dyn_handles) ? task->dyn_interfaces : task->interfaces
+
+#define _STARPU_CODELET_GET_MODE(codelet, i) (codelet->dyn_modes) ? codelet->dyn_modes[i] : codelet->modes[i]
+#define _STARPU_CODELET_SET_MODE(codelet, mode, i) do { if (codelet->dyn_modes) codelet->dyn_modes[i] = mode; else codelet->modes[i] = mode; } while(0)
+
 #endif // __CORE_TASK_H__
 #endif // __CORE_TASK_H__

+ 8 - 7
src/datawizard/coherency.c

@@ -22,6 +22,7 @@
 #include <core/dependencies/data_concurrency.h>
 #include <core/dependencies/data_concurrency.h>
 #include <profiling/profiling.h>
 #include <profiling/profiling.h>
 #include <math.h>
 #include <math.h>
+#include <core/task.h>
 
 
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
@@ -591,8 +592,8 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 
 
 	for (index = 0; index < nbuffers; index++)
 	for (index = 0; index < nbuffers; index++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[index];
-		enum starpu_access_mode mode = task->cl->modes[index];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, index);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(task->cl, index);
 
 
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 			continue;
 			continue;
@@ -624,7 +625,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 	if (profiling && task->profiling_info)
 	if (profiling && task->profiling_info)
 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
 
 
-	struct starpu_buffer_descr *descrs = j->ordered_buffers;
+	struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 	unsigned nbuffers = task->cl->nbuffers;
 	unsigned nbuffers = task->cl->nbuffers;
 
 
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
@@ -656,14 +657,14 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
 	for (index = 0; index < nbuffers; index++)
 	for (index = 0; index < nbuffers; index++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[index];
-		enum starpu_access_mode mode = task->cl->modes[index];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, index);
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(task->cl, index);
 
 
 		struct _starpu_data_replicate *local_replicate;
 		struct _starpu_data_replicate *local_replicate;
 
 
 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
 
 
-		task->interfaces[index] = local_replicate->data_interface;
+		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
 
 
 		if (mode & STARPU_REDUX)
 		if (mode & STARPU_REDUX)
 		{
 		{
@@ -699,7 +700,7 @@ void _starpu_push_task_output(struct _starpu_job *j, uint32_t mask)
 	if (profiling && task->profiling_info)
 	if (profiling && task->profiling_info)
 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
 
 
-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
         unsigned nbuffers = task->cl->nbuffers;
         unsigned nbuffers = task->cl->nbuffers;
 
 
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();

+ 1 - 1
src/datawizard/filters.c

@@ -305,7 +305,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 				.nbuffers = 1
 				.nbuffers = 1
 			};
 			};
 			struct starpu_task *task = starpu_task_create();
 			struct starpu_task *task = starpu_task_create();
-			task->handles[0] = child_handle;
+			_STARPU_TASK_SET_HANDLE(task, child_handle, 0);
 			task->cl = &cl;
 			task->cl = &cl;
 			task->synchronous = 1;
 			task->synchronous = 1;
 			if (_starpu_task_submit_internally(task) != 0)
 			if (_starpu_task_submit_internally(task) != 0)

+ 1 - 1
src/datawizard/footprint.c

@@ -43,7 +43,7 @@ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum
 	{
 	{
 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[buffer];
+			starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, buffer);
 
 
 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
 
 

+ 5 - 5
src/datawizard/reduction.c

@@ -225,8 +225,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 					STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 					STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 					STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 					STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 
 
-					redux_task->handles[0] = replicate_array[i];
-					redux_task->handles[1] = replicate_array[i+step];
+					_STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
+					_STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
 
 
 					int ndeps = 0;
 					int ndeps = 0;
 					struct starpu_task *task_deps[2];
 					struct starpu_task *task_deps[2];
@@ -281,7 +281,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 			if (!redux_task->cl->modes[0])
 			if (!redux_task->cl->modes[0])
 				redux_task->cl->modes[0] = STARPU_W;
 				redux_task->cl->modes[0] = STARPU_W;
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_W, "Parameter of initialization codelet has to be W");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_W, "Parameter of initialization codelet has to be W");
-			redux_task->handles[0] = handle;
+			_STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
 
 
 			int ret = _starpu_task_submit_internally(redux_task);
 			int ret = _starpu_task_submit_internally(redux_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);
@@ -311,8 +311,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 
 
-			redux_task->handles[0] = handle;
-			redux_task->handles[1] = replicate_array[replicate];
+			_STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
+			_STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
 
 
 			int ret = _starpu_task_submit_internally(redux_task);
 			int ret = _starpu_task_submit_internally(redux_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -158,7 +158,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
 #else
 #else
-		func(task->interfaces, task->cl_arg);
+		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
 			/* rebind to single CPU */
 			/* rebind to single CPU */

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -353,7 +353,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
 #else
 #else
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 
 
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);

+ 3 - 3
src/drivers/gordon/driver_gordon.c

@@ -102,7 +102,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	unsigned nbuffers = cl->nbuffers;
 	unsigned nbuffers = cl->nbuffers;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		enum starpu_access_mode mode = cl->modes[buffer];
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(cl, buffer);
 
 
 		switch (mode)
 		switch (mode)
 		{
 		{
@@ -122,7 +122,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
 		unsigned gordon_buffer;
 		unsigned gordon_buffer;
-		enum starpu_access_mode mode = cl->modes[buffer];
+		enum starpu_access_mode mode = _STARPU_CODELET_GET_MODE(cl, buffer);
 
 
 		switch (mode)
 		switch (mode)
 		{
 		{
@@ -138,7 +138,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 				break;
 				break;
 		}
 		}
 
 
-		starpu_data_handle_t handle = task->handles[buffer];
+		starpu_data_handle_t handle = _STARPU_TASK_GET_HANDLE(task, buffer);
 
 
 		gordon_job->nalloc = 0;
 		gordon_job->nalloc = 0;
 		gordon_job->nin = nin;
 		gordon_job->nin = nin;

+ 2 - 2
src/drivers/opencl/driver_opencl.c

@@ -824,7 +824,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	double length = NAN;
 	double length = NAN;
   #ifdef STARPU_OPENCL_SIMULATOR
   #ifdef STARPU_OPENCL_SIMULATOR
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
@@ -838,7 +838,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
   #endif
   #endif
 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
 #else
 #else
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 
 
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);

+ 1 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -80,7 +80,7 @@ static int count_non_ready_buffers(struct starpu_task *task, unsigned node)
 	{
 	{
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 
 
-		handle = task->handles[index];
+		handle = _STARPU_TASK_GET_HANDLE(task, index);
 
 
 		int is_valid;
 		int is_valid;
 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);

+ 2 - 2
src/util/starpu_data_cpy.c

@@ -103,8 +103,8 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 	task->callback_func = callback_func;
 	task->callback_func = callback_func;
 	task->callback_arg = callback_arg;
 	task->callback_arg = callback_arg;
 
 
-	task->handles[0] = dst_handle;
-	task->handles[1] = src_handle;
+	_STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
+	_STARPU_TASK_SET_HANDLE(task, src_handle, 1);
 
 
 	task->synchronous = !asynchronous;
 	task->synchronous = !asynchronous;
 
 

+ 7 - 1
src/util/starpu_insert_task.c

@@ -79,8 +79,14 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 		_starpu_codelet_pack_args((char **)&arg_buffer, arg_buffer_size, varg_list);
 		_starpu_codelet_pack_args((char **)&arg_buffer, arg_buffer_size, varg_list);
 	}
 	}
 
 
-	va_start(varg_list, cl);
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
+
+	if (cl->nbuffers > STARPU_NMAXBUFS)
+	{
+		task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
+	}
+
+	va_start(varg_list, cl);
 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
 
 
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)

+ 3 - 2
src/util/starpu_insert_task_utils.c

@@ -18,6 +18,7 @@
 #include <util/starpu_insert_task_utils.h>
 #include <util/starpu_insert_task_utils.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/utils.h>
+#include <core/task.h>
 
 
 typedef void (*_starpu_callback_func_t)(void *);
 typedef void (*_starpu_callback_func_t)(void *);
 
 
@@ -239,7 +240,7 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 
 
 			STARPU_ASSERT(cl != NULL);
 			STARPU_ASSERT(cl != NULL);
 
 
-			(*task)->handles[current_buffer] = handle;
+			_STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
 			if (cl->modes[current_buffer])
 			if (cl->modes[current_buffer])
 			{
 			{
 				STARPU_ASSERT_MSG(cl->modes[current_buffer] == mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
 				STARPU_ASSERT_MSG(cl->modes[current_buffer] == mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
@@ -264,7 +265,7 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 			int i;
 			int i;
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				(*task)->handles[current_buffer] = handles[i];
+				_STARPU_TASK_SET_HANDLE((*task), handles[i], current_buffer);
 				current_buffer++;
 				current_buffer++;
 			}
 			}