瀏覽代碼

merge trunk

Nathalie Furmento 10 年之前
父節點
當前提交
940e31bb36

+ 4 - 0
ChangeLog

@@ -48,6 +48,9 @@ New features:
   * Add CUDA concurrent kernel execution support through
     the STARPU_NWORKER_PER_CUDA environment variable.
   * New locality work stealing scheduler (lws).
+  * Add STARPU_VARIABLE_NBUFFERS to be set in cl.nbuffers, and nbuffers and
+    modes field to the task structure, which permit to define codelets taking a
+    variable number of data.
 
 Small features:
   * Tasks can now have a name (via the field const char *name of
@@ -83,6 +86,7 @@ Changes:
     starpu_data_set_tag(), data are received as a raw memory)
   * StarPU-MPI: Fix for being able to receive data with the same tag
     from several nodes (see mpi/tests/gather.c)
+  * Remove the long-deprecated cost_model fields and task->buffers field.
 
 Small changes:
   * Rename function starpu_trace_user_event() as

+ 2 - 2
configure.ac

@@ -54,8 +54,8 @@ AC_CANONICAL_SYSTEM
 dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
 dnl when they're available.
 m4_ifdef([AM_SILENT_RULES],
-  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests subdir-objects])],
-  [AM_INIT_AUTOMAKE([1.10 -Wall foreign subdir-objects])])
+  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests])],
+  [AM_INIT_AUTOMAKE([1.10 -Wall foreign])])
 
 m4_ifdef([AM_SILENT_RULES],
   [AM_SILENT_RULES(yes)])

+ 63 - 7
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -115,6 +115,11 @@ Defines the maximum number of buffers that tasks will be able to take
 as parameters. The default value is 8, it can be changed by using the
 configure option \ref enable-maxbuffers "--enable-maxbuffers".
 
+\def STARPU_VARIABLE_NBUFFERS
+\ingroup API_Codelet_And_Tasks
+Value to set in starpu_codelet::nbuffers to specify that the codelet can accept
+a variable number of buffers, specified in starpu_task::nbuffers.
+
 \typedef starpu_cpu_func_t
 \ingroup API_Codelet_And_Tasks
 CPU implementation of a codelet.
@@ -270,11 +275,14 @@ starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
 simply make a symbol lookup to get the implementation.
 
 \var starpu_codelet::nbuffers
-Specify the number of arguments taken by the codelet. These arguments
-are managed by the DSM and are accessed from the <c>void *buffers[]</c>
-array. The constant argument passed with the field starpu_task::cl_arg
-is not counted in this number. This value should not be above
-\ref STARPU_NMAXBUFS.
+Specify the number of arguments taken by the codelet. These arguments are
+managed by the DSM and are accessed from the <c>void *buffers[]</c> array. The
+constant argument passed with the field starpu_task::cl_arg is not counted in
+this number. This value should not be above \ref STARPU_NMAXBUFS. It may be set
+to STARPU_VARIABLE_NBUFFERS to specify that the number of buffers and their
+access modes will be set in starpu_task::nbuffers and starpu_task::modes or
+starpu_task::dyn_modes, which thus permits to define codelets with a varying
+number of data.
 
 \var starpu_codelet::modes
 Is an array of ::starpu_data_access_mode. It describes the required
@@ -362,27 +370,33 @@ starpu_task_create(), or declared statically. In the latter case, the
 programmer has to zero the structure starpu_task and to fill the
 different fields properly. The indicated default values correspond to
 the configuration of a task allocated with starpu_task_create().
+
 \var starpu_task::name
 Optional name of the task. This can be useful for debugging
 purposes.
+
 \var starpu_task::cl
 Is a pointer to the corresponding structure starpu_codelet. This
 describes where the kernel should be executed, and supplies the
 appropriate implementations. When set to NULL, no code is executed
 during the tasks, such empty tasks can be useful for synchronization
 purposes.
-\var starpu_task::buffers
-\deprecated
 This field has been made deprecated. One should use instead the
 field starpu_task::handles to specify the data handles accessed
 by the task. The access modes are now defined in the field
 starpu_codelet::modes.
+
+\var starpu_task::nbuffers
+Specifies the number of buffers. This is only used when starpu_codelet::nbuffers
+is STARPU_VARIABLE_NBUFFERS.
+
 \var starpu_task::handles
 Is an array of ::starpu_data_handle_t. It specifies the handles to the
 different pieces of data accessed by the task. The number of entries
 in this array must be specified in the field starpu_codelet::nbuffers,
 and should not exceed \ref STARPU_NMAXBUFS. If unsufficient, this value can
 be set with the configure option \ref enable-maxbuffers "--enable-maxbuffers".
+
 \var starpu_task::dyn_handles
 Is an array of ::starpu_data_handle_t. It specifies the handles to the
 different pieces of data accessed by the task. The number of entries
@@ -401,6 +415,25 @@ The actual data pointers to the memory node where execution will
 happen, managed by the DSM. Is used when the field
 starpu_task::dyn_handles is defined.
 
+\var starpu_task::modes
+Is used only when starpu_codelet::nbuffers is STARPU_VARIABLE_NBUFFERS.
+It is an array of ::starpu_data_access_mode. It describes the required
+access modes to the data neeeded by the codelet (e.g. ::STARPU_RW). The
+number of entries in this array must be specified in the field
+starpu_task::nbuffers, and should not exceed \ref STARPU_NMAXBUFS. If
+unsufficient, this value can be set with the configure option
+\ref enable-maxbuffers "--enable-maxbuffers".
+
+\var starpu_task::dyn_modes
+Is used only when starpu_codelet::nbuffers is STARPU_VARIABLE_NBUFFERS.
+It is an array of ::starpu_data_access_mode. It describes the required
+access modes to the data needed by the codelet (e.g. ::STARPU_RW).
+The number of entries in this array must be specified in the field
+starpu_codelet::nbuffers. This field should be used for codelets having a
+number of datas greater than \ref STARPU_NMAXBUFS (see \ref
+SettingTheDataHandlesForATask). When defining a codelet, one
+should either define this field or the field starpu_task::modes defined above.
+
 \var starpu_task::cl_arg
 Optional pointer which is passed to the codelet through the second
 argument of the codelet implementation (e.g. starpu_codelet::cpu_func
@@ -612,6 +645,11 @@ It is possible to initialize statically allocated tasks with
 this value. This is equivalent to initializing a structure starpu_task
 with the function starpu_task_init() function.
 
+\def STARPU_TASK_GET_NBUFFERS(task)
+\ingroup API_Codelet_And_Tasks
+Return the number of buffers for this task, i.e. starpu_codelet::nbuffers, or
+starpu_task::nbuffers if the former is STARPU_VARIABLE_BUFFERS.
+
 \def STARPU_TASK_GET_HANDLE(task, i)
 \ingroup API_Codelet_And_Tasks
 Return the \p i th data handle of the given task. If the task
@@ -647,6 +685,24 @@ starpu_codelet::modes or the \p i th element of the field
 starpu_codelet::dyn_modes (see \ref
 SettingTheDataHandlesForATask)
 
+\def STARPU_TASK_GET_MODE(codelet, i)
+\ingroup API_Codelet_And_Tasks
+Return the access mode of the \p i th data handle of the given
+task. If the task is defined with a static or dynamic number of
+handles, will either return the \p i th element of the field
+starpu_task::modes or the \p i th element of the field
+starpu_task::dyn_modes (see \ref
+SettingTheDataHandlesForATask)
+
+\def STARPU_TASK_SET_MODE(task, mode, i)
+\ingroup API_Codelet_And_Tasks
+Set the access mode of the \p i th data handle of the given
+task. If the task is defined with a static or dynamic number of
+handles, will either set the \p i th element of the field
+starpu_task::modes or the \p i th element of the field
+starpu_task::dyn_modes (see \ref
+SettingTheDataHandlesForATask)
+
 \fn struct starpu_task *starpu_task_create(void)
 \ingroup API_Codelet_And_Tasks
 Allocate a task structure and initialize it with default

+ 0 - 4
doc/doxygen/chapters/api/performance_model.doxy

@@ -94,8 +94,6 @@ arch-specific factor.
 is the symbol name for the performance model, which will be used as
 file name to store the model. It must be set otherwise the model will
 be ignored.
-\var starpu_perfmodel::cost_model
-\deprecated
 This field is deprecated. Use instead the field starpu_perfmodel::cost_function field.
 \var starpu_perfmodel::cost_function
 Used by ::STARPU_COMMON: takes a task and implementation number, and
@@ -158,8 +156,6 @@ number of sample values for non-linear regression
 contains information about the performance model of a given
 arch.
 \ingroup API_Performance_Model
-\var starpu_perfmodel_per_arch::cost_model
-\deprecated
 This field is deprecated. Use instead the field
 starpu_perfmodel_per_arch::cost_function.
 \var starpu_perfmodel_per_arch::cost_function

+ 0 - 2
include/starpu_perfmodel.h

@@ -103,7 +103,6 @@ typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task,
 
 struct starpu_perfmodel_per_arch
 {
-	double (*cost_model)(struct starpu_data_descr *t) STARPU_DEPRECATED;
 	starpu_perfmodel_per_arch_cost_function cost_function;
 	starpu_perfmodel_per_arch_size_base size_base;
 
@@ -129,7 +128,6 @@ struct starpu_perfmodel
 {
 	enum starpu_perfmodel_type type;
 
-	double (*cost_model)(struct starpu_data_descr *) STARPU_DEPRECATED;
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);

+ 25 - 9
include/starpu_task.h

@@ -79,6 +79,8 @@ typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
 #define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS   ((starpu_cuda_func_t) -1)
 #define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
 
+#define STARPU_VARIABLE_NBUFFERS (-1)
+
 struct starpu_task;
 struct starpu_codelet
 {
@@ -101,7 +103,7 @@ struct starpu_codelet
 
 	char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 
-	unsigned nbuffers;
+	int nbuffers;
 	enum starpu_data_access_mode modes[STARPU_NMAXBUFS];
 	enum starpu_data_access_mode *dyn_modes;
 
@@ -123,14 +125,15 @@ struct starpu_task
 
 	struct starpu_codelet *cl;
 
-	/* TODO: remove someday, this is costly */
-	struct starpu_data_descr buffers[STARPU_NMAXBUFS] STARPU_DEPRECATED;
+	int nbuffers;
 
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
+	enum starpu_data_access_mode modes[STARPU_NMAXBUFS];
 
 	starpu_data_handle_t *dyn_handles;
 	void **dyn_interfaces;
+	enum starpu_data_access_mode *dyn_modes;
 
 	void *cl_arg;
 	size_t cl_arg_size;
@@ -218,17 +221,30 @@ struct starpu_task
 	.scheduled = 0,					\
 	.dyn_handles = NULL,				\
 	.dyn_interfaces = NULL,				\
+	.dyn_modes = NULL,				\
 	.name = NULL                        		\
 }
 
-#define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
-#define STARPU_TASK_SET_HANDLE(task, handle, i) do { if (task->dyn_handles) task->dyn_handles[i] = handle; else task->handles[i] = handle; } while(0)
+#define STARPU_TASK_GET_NBUFFERS(task) ((unsigned)((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS ? ((task)->nbuffers) : ((task)->cl->nbuffers)))
+
+#define STARPU_TASK_GET_HANDLE(task, i) (((task)->dyn_handles) ? (task)->dyn_handles[i] : (task)->handles[i])
+#define STARPU_TASK_SET_HANDLE(task, handle, i) do { if ((task)->dyn_handles) (task)->dyn_handles[i] = handle; else (task)->handles[i] = handle; } while(0)
+
+#define STARPU_CODELET_GET_MODE(codelet, i) (((codelet)->dyn_modes) ? (codelet)->dyn_modes[i] : (codelet)->modes[i])
+#define STARPU_CODELET_SET_MODE(codelet, mode, i) do { if ((codelet)->dyn_modes) (codelet)->dyn_modes[i] = mode; else (codelet)->modes[i] = mode; } while(0)
 
-#define STARPU_CODELET_GET_MODE(codelet, i) ((codelet->dyn_modes) ? codelet->dyn_modes[i] : codelet->modes[i])
-#define STARPU_CODELET_SET_MODE(codelet, mode, i) do { if (codelet->dyn_modes) codelet->dyn_modes[i] = mode; else codelet->modes[i] = mode; } while(0)
+#define STARPU_TASK_GET_MODE(task, i) ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS ? \
+						(((task)->dyn_modes) ? (task)->dyn_modes[i] : (task)->modes[i]) : \
+						STARPU_CODELET_GET_MODE((task)->cl, i) )
+#define STARPU_TASK_SET_MODE(task, mode, i) do { \
+					if ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS) \
+						if ((task)->dyn_modes) (task)->dyn_modes[i] = mode; else (task)->modes[i] = mode; \
+					else \
+						STARPU_CODELET_SET_MODE((task)->cl, mode, i); \
+					} while(0)
 
-#define STARPU_CODELET_GET_NODE(codelet, i) ((codelet->dyn_nodes) ? codelet->dyn_nodes[i] : codelet->nodes[i])
-#define STARPU_CODELET_SET_NODE(codelet, __node, i) do { if (codelet->dyn_nodes) codelet->dyn_nodes[i] = __node; else codelet->nodes[i] = __node; } while(0)
+#define STARPU_CODELET_GET_NODE(codelet, i) (((codelet)->dyn_nodes) ? (codelet)->dyn_nodes[i] : (codelet)->nodes[i])
+#define STARPU_CODELET_SET_NODE(codelet, __node, i) do { if ((codelet)->dyn_nodes) (codelet)->dyn_nodes[i] = __node; else (codelet)->nodes[i] = __node; } while(0)
 
 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
 void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);

+ 1 - 1
mpi/src/starpu_mpi_task_insert.c

@@ -494,7 +494,7 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 	{
 		/* Get the number of buffers and the size of the arguments */
 		va_copy(varg_list_copy, varg_list);
-		arg_buffer_size = _starpu_task_insert_get_arg_size(varg_list_copy);
+		_starpu_task_insert_get_args_size(varg_list_copy, NULL, &arg_buffer_size);
 		va_end(varg_list_copy);
 
 		/* Pack arguments if needed */

+ 1 - 1
src/common/fxt.h

@@ -424,7 +424,7 @@ do {									\
 	{								\
 		if ((job)->task->cl)					\
 		{							\
-			const int __nbuffers = (job)->task->cl->nbuffers;	\
+			const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task);	\
 			char __buf[FXT_MAX_PARAMS*sizeof(long)];	\
 			int __i;					\
 			for (__i = 0; __i < __nbuffers; __i++)		\

+ 6 - 6
src/core/dependencies/data_concurrency.c

@@ -209,7 +209,7 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 {
 	unsigned buf;
 
-	unsigned nbuffers = j->task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	{
 		if (buf)
@@ -241,18 +241,18 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 {
 	struct starpu_codelet *cl = j->task->cl;
 
-	if ((cl == NULL) || (cl->nbuffers == 0))
+	if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0))
 		return 0;
 
 	/* Compute an ordered list of the different pieces of data so that we
 	 * grab then according to a total order, thus avoiding a deadlock
 	 * condition */
 	unsigned i;
-	for (i=0 ; i<cl->nbuffers ; i++)
+	for (i=0 ; i<STARPU_TASK_GET_NBUFFERS(j->task); i++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
 		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i);
 		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 		int node = -1;
 		if (j->task->cl->specific_nodes)
@@ -260,7 +260,7 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 		_STARPU_JOB_SET_ORDERED_BUFFER_NODE(j, node, i);
 	}
 
-	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), cl->nbuffers);
+	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), STARPU_TASK_GET_NBUFFERS(j->task));
 
 	return _submit_job_enforce_data_deps(j, 0);
 }
@@ -268,7 +268,7 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 {
 	struct _starpu_job *j = r->j;
-	unsigned nbuffers = j->task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
 	unsigned buffer_index = r->buffer_index;
 
 	if (buffer_index + 1 < nbuffers)

+ 3 - 3
src/core/dependencies/implicit_data_deps.c

@@ -307,13 +307,13 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 	if (j->reduction_task)
 		return;
 
-	unsigned nbuffers = task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
 	unsigned buffer;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer);
 		struct starpu_task *new_task;
 
 		/* Scratch memory does not introduce any deps */
@@ -438,7 +438,7 @@ void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 	if (!task->cl)
 		return;
 
-        unsigned nbuffers = task->cl->nbuffers;
+        unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
 	unsigned index;
 	for (index = 0; index < nbuffers; index++)

+ 4 - 3
src/core/jobs.c

@@ -53,7 +53,7 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 	memset(job, 0, sizeof(*job));
 
 	if (task->dyn_handles)
-	     job->dyn_ordered_buffers = malloc(task->cl->nbuffers * sizeof(job->dyn_ordered_buffers[0]));
+	     job->dyn_ordered_buffers = malloc(STARPU_TASK_GET_NBUFFERS(task) * sizeof(job->dyn_ordered_buffers[0]));
 
 	job->task = task;
 
@@ -174,8 +174,9 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	if (task->cl)
 	{
 		unsigned i;
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 #ifdef STARPU_USE_SC_HYPERVISOR
-		for(i = 0; i < task->cl->nbuffers; i++)
+		for(i = 0; i < nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			if (handle != NULL)
@@ -183,7 +184,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		}
 #endif //STARPU_USE_SC_HYPERVISOR
 
-		for (i = 0; i < task->cl->nbuffers; i++)
+		for (i = 0; i < nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);

+ 14 - 31
src/core/perfmodel/perfmodel.c

@@ -78,9 +78,7 @@ struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype(int workerid, unsi
 static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, struct starpu_task *task, unsigned nimpl)
 {
 	int comb;
-	double exp = NAN;
 	double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
-	double (*per_arch_cost_model)(struct starpu_data_descr *);
 
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if (comb == -1)
@@ -90,14 +88,9 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 		return NAN;
 
 	per_arch_cost_function = model->per_arch[comb][nimpl].cost_function;
-	per_arch_cost_model = model->per_arch[comb][nimpl].cost_model;
+	STARPU_ASSERT_MSG(per_arch_cost_function, "STARPU_PER_ARCH needs per-arch cost_function to be defined");
 
-	if (per_arch_cost_function)
-		exp = per_arch_cost_function(task, arch, nimpl);
-	else if (per_arch_cost_model)
-		exp = per_arch_cost_model(task->buffers);
-
-	return exp;
+	return per_arch_cost_function(task, arch, nimpl);
 }
 
 /*
@@ -130,26 +123,14 @@ static double common_task_expected_perf(struct starpu_perfmodel *model, struct s
 	double exp;
 	double alpha;
 
-	if (model->cost_function)
-	{
-		exp = model->cost_function(task, nimpl);
-		alpha = starpu_worker_get_relative_speedup(arch);
+	STARPU_ASSERT_MSG(model->cost_function, "STARPU_COMMON requires common cost_function to be defined");
 
-		STARPU_ASSERT(!_STARPU_IS_ZERO(alpha));
+	exp = model->cost_function(task, nimpl);
+	alpha = starpu_worker_get_relative_speedup(arch);
 
-		return (exp/alpha);
-	}
-	else if (model->cost_model)
-	{
-		exp = model->cost_model(task->buffers);
-		alpha = starpu_worker_get_relative_speedup(arch);
-
-		STARPU_ASSERT(!_STARPU_IS_ZERO(alpha));
-
-		return (exp/alpha);
-	}
+	STARPU_ASSERT(!_STARPU_IS_ZERO(alpha));
 
-	return NAN;
+	return (exp/alpha);
 }
 
 void _starpu_load_perfmodel(struct starpu_perfmodel *model)
@@ -233,8 +214,9 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 	unsigned i;
 	double sum = 0.0;
 	enum starpu_node_kind node_kind;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
-	for (i = 0; i < task->cl->nbuffers; i++)
+	for (i = 0; i < nbuffers; i++)
 	{
 		starpu_data_handle_t handle;
 		struct starpu_task *conversion_task;
@@ -311,7 +293,7 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 /* Data transfer performance modeling */
 double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task)
 {
-	unsigned nbuffers = task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned buffer;
 
 	double penalty = 0.0;
@@ -319,7 +301,7 @@ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct star
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer);
 
 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
 	}
@@ -404,10 +386,11 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 		if (task->cl)
 		{
 			unsigned b;
-			for (b = 0; b < task->cl->nbuffers; b++)
+			unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+			for (b = 0; b < nbuffers; b++)
 			{
 				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b);
-				enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, b);
+				enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, b);
 
 				if (!(mode & STARPU_R))
 					continue;

+ 3 - 2
src/core/perfmodel/perfmodel_history.c

@@ -144,7 +144,7 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 	}
 	else
 	{
-		unsigned nbuffers = task->cl->nbuffers;
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 		size_t size = 0;
 
 		unsigned buffer;
@@ -1402,8 +1402,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 		fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, impl, j), measured, task->predicted, task->predicted_transfer, cpuid);
 		unsigned i;
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
-		for (i = 0; i < task->cl->nbuffers; i++)
+		for (i = 0; i < nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 

+ 6 - 4
src/core/sched_policy.c

@@ -261,7 +261,8 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		unsigned node = starpu_worker_get_memory_node(workerid);
 		if (_starpu_task_uses_multiformat_handles(task))
 		{
-			for (i = 0; i < task->cl->nbuffers; i++)
+			unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+			for (i = 0; i < nbuffers; i++)
 			{
 				struct starpu_task *conversion_task;
 				starpu_data_handle_t handle;
@@ -278,7 +279,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 				//_STARPU_DEBUG("Pushing a conversion task\n");
 			}
 
-			for (i = 0; i < task->cl->nbuffers; i++)
+			for (i = 0; i < nbuffers; i++)
 			{
 				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 				handle->mf_node = node;
@@ -607,7 +608,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 		STARPU_ABORT();
 	}
 
-	STARPU_CODELET_SET_MODE(conversion_task->cl, STARPU_RW, 0);
+	STARPU_TASK_SET_MODE(conversion_task, STARPU_RW, 0);
 	return conversion_task;
 }
 
@@ -830,7 +831,8 @@ pick:
 	 * required conversion tasks.
 	 */
 	unsigned i;
-	for (i = 0; i < task->cl->nbuffers; i++)
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+	for (i = 0; i < nbuffers; i++)
 	{
 		struct starpu_task *conversion_task;
 		starpu_data_handle_t handle;

+ 21 - 30
src/core/task.c

@@ -265,12 +265,15 @@ int _starpu_submit_job(struct _starpu_job *j)
 		int i;
 		size_t data_size = 0;
 		if (j->task->cl)
-			for(i = 0; i < j->task->cl->nbuffers; i++)
+		{
+			unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
+			for(i = 0; i < nbuffers; i++)
 			{
 				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 				if (handle != NULL)
 					data_size += _starpu_data_get_size(handle);
 			}
+		}
 
 		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
@@ -282,7 +285,8 @@ int _starpu_submit_job(struct _starpu_job *j)
 	if (task->cl)
 	{
 		unsigned i;
-		for (i=0; i<task->cl->nbuffers; i++)
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+		for (i=0; i<nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);
@@ -395,26 +399,9 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 	}
 }
 
-void _starpu_task_check_deprecated_fields(struct starpu_task *task)
+void _starpu_task_check_deprecated_fields(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED)
 {
-	if (task->cl)
-	{
-		unsigned i;
-		for(i=0; i<STARPU_MIN(task->cl->nbuffers, STARPU_NMAXBUFS) ; i++)
-		{
-			if (task->buffers[i].handle && task->handles[i])
-			{
-				_STARPU_DISP("[warning][struct starpu_task] task->buffers[%u] and task->handles[%u] both set. Ignoring task->buffers[%u] ?\n", i, i, i);
-				STARPU_ASSERT(task->buffers[i].mode == task->cl->modes[i]);
-				STARPU_ABORT();
-			}
-			if (task->buffers[i].handle)
-			{
-				task->handles[i] = task->buffers[i].handle;
-				task->cl->modes[i] = task->buffers[i].mode;
-			}
-		}
-	}
+	/* None any more */
 }
 
 /* application should submit new tasks to StarPU through this function */
@@ -458,17 +445,18 @@ int starpu_task_submit(struct starpu_task *task)
 	if (task->cl)
 	{
 		unsigned i;
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
 		/* Check buffers */
 		if (task->dyn_handles == NULL)
-			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
+			STARPU_ASSERT_MSG(STARPU_TASK_GET_NBUFFERS(task) <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
 
 		if (task->dyn_handles)
 		{
-			task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
+			task->dyn_interfaces = malloc(nbuffers * sizeof(void *));
 		}
 
-		for (i = 0; i < task->cl->nbuffers; i++)
+		for (i = 0; i < nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			/* Make sure handles are not partitioned */
@@ -609,11 +597,12 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 	{
 		/* This would be done by data dependencies checking */
 		unsigned i;
-		for (i=0 ; i<task->cl->nbuffers ; i++)
+		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+		for (i=0 ; i<nbuffers ; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
 			_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
-			enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+			enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i);
 			_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 			int node = -1;
 			if (j->task->cl->specific_nodes)
@@ -648,7 +637,8 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
 	/* We retain handle reference count */
 	unsigned i;
-	for (i=0; i<task->cl->nbuffers; i++)
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+	for (i=0; i<nbuffers; i++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 		_starpu_spin_lock(&handle->header_lock);
@@ -673,11 +663,11 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	j->submitted = 1;
 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
-	for (i=0 ; i<task->cl->nbuffers ; i++)
+	for (i=0 ; i<nbuffers ; i++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
 		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i);
 		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 		int node = -1;
 		if (j->task->cl->specific_nodes)
@@ -908,7 +898,8 @@ int
 _starpu_task_uses_multiformat_handles(struct starpu_task *task)
 {
 	unsigned i;
-	for (i = 0; i < task->cl->nbuffers; i++)
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+	for (i = 0; i < nbuffers; i++)
 	{
 		if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
 			return 1;

+ 5 - 5
src/datawizard/coherency.c

@@ -678,13 +678,13 @@ static void _starpu_set_data_requested_flag_if_needed(starpu_data_handle_t handl
 
 int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 {
-	unsigned nbuffers = task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned index;
 
 	for (index = 0; index < nbuffers; index++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
 
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 			continue;
@@ -717,7 +717,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
 
 	struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
-	unsigned nbuffers = task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
 
@@ -761,7 +761,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 	for (index = 0; index < nbuffers; index++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
 		int node = descrs[index].node;
 		if (node == -1)
 			node = local_memory_node;
@@ -824,7 +824,7 @@ void _starpu_push_task_output(struct _starpu_job *j)
 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
 
         struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
-        unsigned nbuffers = task->cl->nbuffers;
+        unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
 	int workerid = starpu_worker_get_id();
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();

+ 2 - 1
src/datawizard/footprint.c

@@ -24,8 +24,9 @@ uint32_t starpu_task_data_footprint(struct starpu_task *task)
 {
 	uint32_t footprint = 0;
 	unsigned buffer;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
-	for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
+	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
 

+ 2 - 1
src/datawizard/user_interactions.c

@@ -322,7 +322,8 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 	 * We enqueue the "post" sync task in the list associated to the handle
 	 * so that it is submitted by the starpu_data_release
 	 * function. */
-	_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);
+	if (sequential_consistency)
+		_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);
 
         _STARPU_LOG_OUT();
 	return 0;

+ 3 - 3
src/drivers/gordon/driver_gordon.c

@@ -99,10 +99,10 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	}
 
 	/* count the number of in/inout/out buffers */
-	unsigned nbuffers = cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer);
 
 		switch (mode)
 		{
@@ -122,7 +122,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 		unsigned gordon_buffer;
-		enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer);
 
 		switch (mode)
 		{

+ 1 - 1
src/drivers/mp_common/source_common.c

@@ -429,7 +429,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
 	_starpu_src_common_execute_kernel(node, kernel, worker->devid, task->cl->type,
 			(j->task_size > 1),
 			j->combined_workerid, task->handles,
-			task->interfaces, task->cl->nbuffers,
+			task->interfaces, STARPU_TASK_GET_NBUFFERS(task),
 			task->cl_arg, task->cl_arg_size);
 
 

+ 1 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -82,7 +82,7 @@ static const float idle_power_maximum=10000.0;
 static int count_non_ready_buffers(struct starpu_task *task, unsigned node)
 {
 	int cnt = 0;
-	unsigned nbuffers = task->cl->nbuffers;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned index;
 
 	for (index = 0; index < nbuffers; index++)

+ 10 - 5
src/util/starpu_task_insert.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,7 +29,7 @@ void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
 
 	/* Compute the size */
 	va_start(varg_list, arg_buffer_size);
-	*arg_buffer_size = _starpu_task_insert_get_arg_size(varg_list);
+	_starpu_task_insert_get_args_size(varg_list, NULL, arg_buffer_size);
 	va_end(varg_list);
 
 	va_start(varg_list, arg_buffer_size);
@@ -71,11 +71,12 @@ struct starpu_task *_starpu_task_build_v(struct starpu_codelet *cl, const char*
 	void *arg_buffer = NULL;
 	va_list varg_list_copy;
 	size_t arg_buffer_size = 0;
+	unsigned nbuffers;
 
 	/* Compute the size */
 
 	va_copy(varg_list_copy, varg_list);
-	arg_buffer_size = _starpu_task_insert_get_arg_size(varg_list_copy);
+	_starpu_task_insert_get_args_size(varg_list_copy, &nbuffers, &arg_buffer_size);
 	va_end(varg_list_copy);
 
 	if (arg_buffer_size)
@@ -89,9 +90,13 @@ struct starpu_task *_starpu_task_build_v(struct starpu_codelet *cl, const char*
 	task->name = task_name;
 	task->cl_arg_free = cl_arg_free;
 
-	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
+	if (cl && cl->nbuffers != STARPU_VARIABLE_NBUFFERS)
 	{
-		task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
+		STARPU_ASSERT_MSG(nbuffers == (unsigned) cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%u)", cl->nbuffers, nbuffers);
+	}
+	if (nbuffers > STARPU_NMAXBUFS)
+	{
+		task->dyn_handles = malloc(nbuffers * sizeof(starpu_data_handle_t));
 	}
 
 	va_copy(varg_list_copy, varg_list);

+ 17 - 6
src/util/starpu_task_insert_utils.c

@@ -41,12 +41,14 @@ void starpu_task_insert_callback_wrapper(void *_cl_arg_wrapper)
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 }
 
-size_t _starpu_task_insert_get_arg_size(va_list varg_list)
+void _starpu_task_insert_get_args_size(va_list varg_list, unsigned *nbuffers, size_t *cl_arg_size)
 {
 	int arg_type;
 	size_t arg_buffer_size;
+	unsigned n;
 
 	arg_buffer_size = 0;
+	n = 0;
 
 	arg_buffer_size += sizeof(char);
 
@@ -55,11 +57,13 @@ size_t _starpu_task_insert_get_arg_size(va_list varg_list)
 		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
+			n++;
 		}
 		else if (arg_type==STARPU_DATA_ARRAY)
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t*);
-			(void)va_arg(varg_list, int);
+			int nb_handles = va_arg(varg_list, int);
+			n += nb_handles;
 		}
 		else if (arg_type==STARPU_VALUE)
 		{
@@ -136,7 +140,10 @@ size_t _starpu_task_insert_get_arg_size(va_list varg_list)
 		}
 	}
 
-	return arg_buffer_size;
+	if (cl_arg_size)
+		*cl_arg_size = arg_buffer_size;
+	if (nbuffers)
+		*nbuffers = n;
 }
 
 int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list)
@@ -280,6 +287,8 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 
 	prologue_pop_cl_arg_wrapper->callback_func = NULL;
 
+	(*task)->cl = cl;
+
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
@@ -292,7 +301,9 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 			STARPU_ASSERT(cl != NULL);
 
 			STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
-			if (STARPU_CODELET_GET_MODE(cl, current_buffer))
+			if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
+				STARPU_TASK_SET_MODE(*task, mode, current_buffer);
+			else if (STARPU_CODELET_GET_MODE(cl, current_buffer))
 			{
 				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, current_buffer) == mode,
 						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
@@ -426,9 +437,9 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 		}
 	}
 
-	STARPU_ASSERT(cl == NULL || current_buffer == cl->nbuffers);
+	if (cl && cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
+		(*task)->nbuffers = current_buffer;
 
-	(*task)->cl = cl;
 	(*task)->cl_arg = arg_buffer;
 	(*task)->cl_arg_size = arg_buffer_size;
 

+ 1 - 1
src/util/starpu_task_insert_utils.h

@@ -21,7 +21,7 @@
 #include <stdarg.h>
 #include <starpu.h>
 
-size_t _starpu_task_insert_get_arg_size(va_list varg_list);
+void _starpu_task_insert_get_args_size(va_list varg_list, unsigned *nbuffers, size_t *cl_arg_size);
 int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list);
 void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
 

+ 1 - 2
tests/Makefile.am

@@ -108,7 +108,6 @@ XFAIL_TESTS=	errorcheck/invalid_blocking_calls
 
 noinst_PROGRAMS =				\
 	main/deprecated_func			\
-	main/deprecated_buffer			\
 	main/driver_api/init_run_deinit         \
 	main/driver_api/run_driver              \
 	main/deploop                            \
@@ -199,6 +198,7 @@ noinst_PROGRAMS =				\
 	datawizard/partition_lazy		\
 	datawizard/gpu_register   		\
 	datawizard/gpu_ptr_register   		\
+	datawizard/variable_parameters		\
 	datawizard/wt_host			\
 	datawizard/wt_broadcast			\
 	datawizard/readonly			\
@@ -413,7 +413,6 @@ datawizard_specific_node_SOURCES +=			\
 endif
 
 main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations
-main_deprecated_buffer_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations
 
 main_subgraph_repeat_SOURCES =		\
 	main/subgraph_repeat.c

+ 230 - 0
tests/datawizard/variable_parameters.c

@@ -0,0 +1,230 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2012-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2014  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu.h>
+#include "../helper.h"
+
+static starpu_data_handle_t handle1, handle2, handle3, handle4;
+
+/*
+ *	Increment codelet
+ */
+
+#ifdef STARPU_USE_OPENCL
+/* dummy OpenCL implementation */
+static void increment_opencl_kernel(void *descr[], void *cl_arg)
+{
+	int num = starpu_task_get_current()->nbuffers;
+	int i;
+
+	for (i = 0; i < num; i++)
+	{
+		cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[i]);
+		unsigned h_token;
+
+		cl_command_queue queue;
+		starpu_opencl_get_current_queue(&queue);
+
+		clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
+		h_token++;
+		clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
+		clFinish(queue);
+	}
+}
+#endif
+
+
+#ifdef STARPU_USE_CUDA
+static void increment_cuda_kernel(void *descr[], void *arg)
+{
+	int num = starpu_task_get_current()->nbuffers;
+	int i;
+
+	for (i = 0; i < num; i++)
+	{
+		unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]);
+		unsigned host_token;
+
+		/* This is a dummy technique of course */
+		cudaMemcpyAsync(&host_token, tokenptr, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
+		cudaStreamSynchronize(starpu_cuda_get_local_stream());
+
+		host_token++;
+
+		cudaMemcpyAsync(tokenptr, &host_token, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
+	}
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}
+#endif
+
+static void increment_cpu_kernel(void *descr[], void *cl_arg)
+{
+	int num = starpu_task_get_current()->nbuffers;
+	int i;
+
+	for (i = 0; i < num; i++)
+	{
+		unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]);
+		*tokenptr = *tokenptr + 1;
+	}
+}
+
+static struct starpu_codelet increment_cl =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {increment_cuda_kernel, NULL},
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {increment_opencl_kernel, NULL},
+#endif
+	.cpu_funcs = {increment_cpu_kernel, NULL},
+	.nbuffers = STARPU_VARIABLE_NBUFFERS,
+};
+
+int main(int argc, char **argv)
+{
+	unsigned *pvar = NULL;
+	int ret;
+	unsigned var1 = 0, var2 = 0, var3 = 0, var4 = 0;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(unsigned));
+	starpu_variable_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)&var2, sizeof(unsigned));
+	starpu_variable_data_register(&handle3, STARPU_MAIN_RAM, (uintptr_t)&var3, sizeof(unsigned));
+	starpu_variable_data_register(&handle4, STARPU_MAIN_RAM, (uintptr_t)&var4, sizeof(unsigned));
+
+#ifdef STARPU_QUICK_CHECK
+	unsigned nloops = 4;
+#else
+	unsigned nloops = 16;
+#endif
+
+	unsigned loop;
+	unsigned t;
+
+	for (loop = 0; loop < nloops; loop++)
+	{
+		for (t = 0; t <= 4; t++)
+		{
+			struct starpu_task *task = starpu_task_create();
+			unsigned i;
+
+			task->cl = &increment_cl;
+			task->handles[0] = handle1;
+			task->handles[1] = handle2;
+			task->handles[2] = handle3;
+			task->handles[3] = handle4;
+			for (i = 0; i < t; i++)
+				task->modes[i] = STARPU_RW;
+			task->nbuffers = t;
+
+			ret = starpu_task_submit(task);
+			if (ret == -ENODEV) goto enodev;
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+		}
+
+		starpu_task_insert(&increment_cl,
+				STARPU_RW, handle1,
+				0);
+		starpu_task_insert(&increment_cl,
+				STARPU_RW, handle1,
+				STARPU_RW, handle2,
+				0);
+		starpu_task_insert(&increment_cl,
+				STARPU_RW, handle1,
+				STARPU_RW, handle2,
+				STARPU_RW, handle3,
+				0);
+		starpu_task_insert(&increment_cl,
+				STARPU_RW, handle1,
+				STARPU_RW, handle2,
+				STARPU_RW, handle3,
+				STARPU_RW, handle4,
+				0);
+	}
+
+	ret = starpu_data_acquire(handle1, STARPU_R);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+	if (var1 != 8*nloops)
+	{
+		FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var1, 8*nloops);
+		starpu_data_release(handle1);
+		goto err;
+	}
+	starpu_data_release(handle1);
+
+	ret = starpu_data_acquire(handle2, STARPU_R);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+	if (var2 != 6*nloops)
+	{
+		FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var2, 6*nloops);
+		starpu_data_release(handle2);
+		goto err;
+	}
+	starpu_data_release(handle2);
+
+	ret = starpu_data_acquire(handle3, STARPU_R);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+	if (var3 != 4*nloops)
+	{
+		FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var3, 4*nloops);
+		starpu_data_release(handle3);
+		goto err;
+	}
+	starpu_data_release(handle3);
+
+	ret = starpu_data_acquire(handle4, STARPU_R);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+	if (var4 != 2*nloops)
+	{
+		FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var4, 2*nloops);
+		starpu_data_release(handle4);
+		goto err;
+	}
+	starpu_data_release(handle4);
+
+	starpu_data_unregister(handle1);
+	starpu_data_unregister(handle2);
+	starpu_data_unregister(handle3);
+	starpu_data_unregister(handle4);
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+
+enodev:
+	starpu_data_unregister(handle1);
+	starpu_data_unregister(handle2);
+	starpu_data_unregister(handle3);
+	starpu_data_unregister(handle4);
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+
+err:
+	starpu_data_unregister(handle1);
+	starpu_data_unregister(handle2);
+	starpu_data_unregister(handle3);
+	starpu_data_unregister(handle4);
+	starpu_shutdown();
+	return EXIT_FAILURE;
+}

+ 0 - 178
tests/main/deprecated_buffer.c

@@ -1,178 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <config.h>
-#include <starpu.h>
-#include "../helper.h"
-
-void cpu_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	int *valin = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	int *valout = (int *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-	*valout = *valin;
-}
-
-struct starpu_codelet cl_with_mode =
-{
-	.name = "with_mode",
-	.cpu_funcs = {cpu_codelet, NULL},
-	.cpu_funcs_name = {"cpu_codelet", NULL},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_W},
-};
-
-struct starpu_codelet cl_without_mode =
-{
-	.name = "without_mode",
-	.cpu_funcs = {cpu_codelet, NULL},
-	.cpu_funcs_name = {"cpu_codelet", NULL},
-	.nbuffers = 2
-};
-
-static
-int submit_codelet_task_insert(struct starpu_codelet cl, starpu_data_handle_t handles0, starpu_data_handle_t handles1)
-{
-	int ret;
-
-	ret = starpu_task_insert(&cl,
-				 STARPU_R, handles0,
-				 STARPU_W, handles1,
-				 0);
-	if (ret == -ENODEV) return ret;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
-
-	starpu_task_wait_for_all();
-	return 0;
-}
-
-static
-int submit_codelet_with_buffers(struct starpu_codelet cl, starpu_data_handle_t handles0, starpu_data_handle_t handles1)
-{
-	int ret;
-	struct starpu_task *task;
-
-	task = starpu_task_create();
-	task->cl = &cl;
-	task->buffers[0].handle = handles0;
-	task->buffers[0].mode = STARPU_R;
-	task->buffers[1].handle = handles1;
-	task->buffers[1].mode = STARPU_W;
-
-	ret = starpu_task_submit(task);
-	if (ret == -ENODEV) return ret;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-
-	starpu_task_wait_for_all();
-	return 0;
-}
-
-static
-int submit_codelet_with_handles(struct starpu_codelet cl, starpu_data_handle_t handles0, starpu_data_handle_t handles1)
-{
-	int ret;
-	struct starpu_task *task;
-
-	task = starpu_task_create();
-	task->cl = &cl;
-	task->handles[0] = handles0;
-	task->handles[1] = handles1;
-
-	ret = starpu_task_submit(task);
-	if (ret == -ENODEV) return ret;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-
-	starpu_task_wait_for_all();
-	return 0;
-}
-
-struct submit_task_func
-{
-	int (*func)(struct starpu_codelet cl, starpu_data_handle_t handles0, starpu_data_handle_t handles1);
-	char *name;
-};
-
-static
-int submit_codelet(struct starpu_codelet cl, struct submit_task_func func)
-{
-	int *x, *y;
-	starpu_malloc((void**)&x, sizeof(*x));
-	starpu_malloc((void**)&y, sizeof(*y));
-	*x = 42;
-	*y = 14;
-	starpu_data_handle_t handles[2];
-	int ret;
-
-	starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)x, sizeof(*x));
-	starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)y, sizeof(*y));
-
-	ret = func.func(cl, handles[0], handles[1]);
-	starpu_data_unregister(handles[0]);
-	starpu_data_unregister(handles[1]);
-
-	if (!ret)
-	{
-		FPRINTF(stderr, "%s when executing codelet <%s> with func <%s>\n", *x==*y?"success":"error", cl.name, func.name);
-		ret = (*x != *y);
-	}
-
-	starpu_free(x);
-	starpu_free(y);
-
-	return ret;
-}
-
-int main(int argc, char **argv)
-{
-	int ret;
-	struct submit_task_func task_insert = { .func = submit_codelet_task_insert, .name = "task_insert" };
-	struct submit_task_func with_buffers = { .func = submit_codelet_with_buffers, .name = "with_buffers" };
-	struct submit_task_func with_handles = { .func = submit_codelet_with_handles, .name = "with_handles" };
-
-	ret = starpu_initialize(NULL, &argc, &argv);
-	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = submit_codelet(cl_with_mode, task_insert);
-	if (ret == -ENODEV)
-	{
-		starpu_shutdown();
-		fprintf(stderr, "WARNING: No one can execute this task\n");
-		return STARPU_TEST_SKIPPED;
-	}
-
-	if (!ret)
-	{
-		ret = submit_codelet(cl_with_mode, with_buffers);
-	}
-	if (!ret)
-	{
-		ret = submit_codelet(cl_with_mode, with_handles);
-	}
-	if (!ret)
-	{
-		ret = submit_codelet(cl_without_mode, task_insert);
-	}
-	if (!ret)
-	{
-		ret = submit_codelet(cl_without_mode, with_buffers);
-	}
-	// We do not test the combination cl_without_mode with_handles as it is not expected to work
-
-	starpu_shutdown();
-
-	STARPU_RETURN(ret);
-}

+ 3 - 3
tests/main/restart.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -30,8 +30,8 @@
   #define N	10
 #endif
 
-struct timeval start;
-struct timeval end;
+static struct timeval start;
+static struct timeval end;
 
 int main(int argc, char **argv)
 {