Browse Source

starpu_task_insert & co:
- redefine the sequential implementation by only parsing arguments
once (this should reduce the cost of the functions)
- new flag STARPU_DATA_MODE_ARRAY to allow to define a array of data
handles along with their access modes.

Nathalie Furmento 10 years ago
parent
commit
040abf997a

+ 3 - 0
ChangeLog

@@ -102,6 +102,9 @@ Small features:
     working implementations
     working implementations
   * Add STARPU_MALLOC_NORECLAIM flag to allocate without running a reclaim if
   * Add STARPU_MALLOC_NORECLAIM flag to allocate without running a reclaim if
     the node is out of memory.
     the node is out of memory.
+  * New flag STARPU_DATA_MODE_ARRAY for the function family
+    starpu_task_insert to allow to define a array of data handles
+    along with their access modes.
 
 
 Changes:
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define
   * Data interfaces (variable, vector, matrix and block) now define

+ 9 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -23,6 +23,9 @@ The arguments following the codelet can be of the following types:
 ::STARPU_REDUX an access mode followed by a data handle;
 ::STARPU_REDUX an access mode followed by a data handle;
 <li> ::STARPU_DATA_ARRAY followed by an array of data handles and its
 <li> ::STARPU_DATA_ARRAY followed by an array of data handles and its
 number of elements;
 number of elements;
+<li> ::STARPU_DATA_MODE_ARRAY followed by an array of struct
+starpu_data_descr, i.e data handles with their associated access
+modes, and its number of elements;
 <li> ::STARPU_EXECUTE_ON_WORKER, ::STARPU_WORKER_ORDER followed by an integer value
 <li> ::STARPU_EXECUTE_ON_WORKER, ::STARPU_WORKER_ORDER followed by an integer value
 specifying the worker on which to execute the task (as specified by
 specifying the worker on which to execute the task (as specified by
 starpu_task::execute_on_a_specific_worker)
 starpu_task::execute_on_a_specific_worker)
@@ -33,7 +36,8 @@ appropriated objects as defined elsewhere.
 </ul>
 </ul>
 
 
 When using ::STARPU_DATA_ARRAY, the access mode of the data handles is
 When using ::STARPU_DATA_ARRAY, the access mode of the data handles is
-not defined.
+not defined. One should use ::STARPU_DATA_MODE_ARRAY to define the
+data handles along with the access modes.
 
 
 Parameters to be passed to the codelet implementation are defined
 Parameters to be passed to the codelet implementation are defined
 through the type ::STARPU_VALUE. The function
 through the type ::STARPU_VALUE. The function
@@ -74,6 +78,10 @@ be followed by a integer defining a priority level
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
 TODO
 TODO
 
 
+\def STARPU_DATA_MODE_ARRAY
+\ingroup API_Insert_Task
+TODO
+
 \def STARPU_EXECUTE_ON_WORKER
 \def STARPU_EXECUTE_ON_WORKER
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
 this macro is used when calling starpu_task_insert(), and must be
 this macro is used when calling starpu_task_insert(), and must be

+ 22 - 21
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2014  Université de Bordeaux
  * Copyright (C) 2010-2014  Université de Bordeaux
- * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2014       INRIA
  * Copyright (C) 2014       INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -32,26 +32,27 @@ extern "C"
 
 
 void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg);
 void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg);
 
 
-#define STARPU_VALUE		 (1<<20)
-#define STARPU_CALLBACK		 (2<<20)
-#define STARPU_CALLBACK_WITH_ARG (3<<20)
-#define STARPU_CALLBACK_ARG	 (4<<20)
-#define STARPU_PRIORITY		 (5<<20)
-#define STARPU_EXECUTE_ON_NODE	 (6<<20)
-#define STARPU_EXECUTE_ON_DATA	 (7<<20)
-#define STARPU_DATA_ARRAY        (8<<20)
-#define STARPU_TAG               (9<<20)
-#define STARPU_HYPERVISOR_TAG	 (10<<20)
-#define STARPU_FLOPS	         (11<<20)
-#define STARPU_SCHED_CTX	 (12<<20)
-#define STARPU_PROLOGUE_CALLBACK   (13<<20)
-#define STARPU_PROLOGUE_CALLBACK_ARG (14<<20)
-#define STARPU_PROLOGUE_CALLBACK_POP   (15<<20)
-#define STARPU_PROLOGUE_CALLBACK_POP_ARG (16<<20)
-#define STARPU_EXECUTE_ON_WORKER (17<<20)
-#define STARPU_TAG_ONLY          (18<<20)
-#define STARPU_POSSIBLY_PARALLEL    (19<<20)
-#define STARPU_WORKER_ORDER      (20<<20)
+#define STARPU_VALUE		 (1<<16)
+#define STARPU_CALLBACK		 (2<<16)
+#define STARPU_CALLBACK_WITH_ARG (3<<16)
+#define STARPU_CALLBACK_ARG	 (4<<16)
+#define STARPU_PRIORITY		 (5<<16)
+#define STARPU_EXECUTE_ON_NODE	 (6<<16)
+#define STARPU_EXECUTE_ON_DATA	 (7<<16)
+#define STARPU_DATA_ARRAY        (8<<16)
+#define STARPU_DATA_MODE_ARRAY   (9<<16)
+#define STARPU_TAG               (10<<16)
+#define STARPU_HYPERVISOR_TAG	 (11<<16)
+#define STARPU_FLOPS	         (12<<16)
+#define STARPU_SCHED_CTX	 (13<<16)
+#define STARPU_PROLOGUE_CALLBACK   (14<<16)
+#define STARPU_PROLOGUE_CALLBACK_ARG (15<<16)
+#define STARPU_PROLOGUE_CALLBACK_POP   (16<<16)
+#define STARPU_PROLOGUE_CALLBACK_POP_ARG (17<<16)
+#define STARPU_EXECUTE_ON_WORKER (18<<16)
+#define STARPU_TAG_ONLY          (19<<16)
+#define STARPU_POSSIBLY_PARALLEL    (20<<16)
+#define STARPU_WORKER_ORDER      (21<<16)
 
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 21 - 19
mpi/src/starpu_mpi_task_insert.c

@@ -244,6 +244,11 @@ int _starpu_mpi_task_select_node(struct starpu_codelet *codelet, int me, int nb_
 				current_data ++;
 				current_data ++;
 			}
 			}
 		}
 		}
+		else if (arg_type == STARPU_DATA_MODE_ARRAY)
+		{
+			(void)va_arg(varg_list, struct starpu_data_descr*);
+			(void)va_arg(varg_list, int);
+		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
 			(void)va_arg(varg_list_copy, void *);
 			(void)va_arg(varg_list_copy, void *);
@@ -397,6 +402,11 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 				}
 				}
 			}
 			}
 		}
 		}
+		else if (arg_type == STARPU_DATA_MODE_ARRAY)
+		{
+			(void)va_arg(varg_list, struct starpu_data_descr*);
+			(void)va_arg(varg_list, int);
+		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
 			(void)va_arg(varg_list_copy, void *);
 			(void)va_arg(varg_list_copy, void *);
@@ -523,6 +533,11 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 				current_data++;
 				current_data++;
 			}
 			}
 		}
 		}
+		else if (arg_type == STARPU_DATA_MODE_ARRAY)
+		{
+			(void)va_arg(varg_list, struct starpu_data_descr*);
+			(void)va_arg(varg_list, int);
+		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
 			va_arg(varg_list_copy, void *);
 			va_arg(varg_list_copy, void *);
@@ -605,31 +620,13 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 	if (do_execute == 0) return 1;
 	if (do_execute == 0) return 1;
 	else
 	else
 	{
 	{
-		/* Get the number of buffers and the size of the arguments */
-		va_copy(varg_list_copy, varg_list);
-		_starpu_task_insert_get_args_size(varg_list_copy, NULL, &arg_buffer_size);
-		va_end(varg_list_copy);
-
-		/* Pack arguments if needed */
-		if (arg_buffer_size)
-		{
-			va_copy(varg_list_copy, varg_list);
-			_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list_copy);
-			va_end(varg_list_copy);
-		}
-
 		_STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL);
 		_STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL);
 
 
 		*task = starpu_task_create();
 		*task = starpu_task_create();
 		(*task)->cl_arg_free = 1;
 		(*task)->cl_arg_free = 1;
 
 
-		if (codelet && codelet->nbuffers > STARPU_NMAXBUFS)
-		{
-			(*task)->dyn_handles = malloc(codelet->nbuffers * sizeof(starpu_data_handle_t));
-		}
-
 		va_copy(varg_list_copy, varg_list);
 		va_copy(varg_list_copy, varg_list);
-		_starpu_task_insert_create(arg_buffer, arg_buffer_size, codelet, task, varg_list_copy);
+		_starpu_task_insert_create(codelet, task, varg_list_copy);
 		va_end(varg_list_copy);
 		va_end(varg_list_copy);
 		return 0;
 		return 0;
 	}
 	}
@@ -672,6 +669,11 @@ int _starpu_mpi_task_postbuild_v(MPI_Comm comm, struct starpu_codelet *codelet,
 				current_data++;
 				current_data++;
 			}
 			}
 		}
 		}
+		else if (arg_type == STARPU_DATA_MODE_ARRAY)
+		{
+			(void)va_arg(varg_list, struct starpu_data_descr*);
+			(void)va_arg(varg_list, int);
+		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
 			va_arg(varg_list_copy, void *);
 			va_arg(varg_list_copy, void *);

+ 2 - 32
src/util/starpu_task_insert.c

@@ -27,13 +27,8 @@ void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
 {
 {
 	va_list varg_list;
 	va_list varg_list;
 
 
-	/* Compute the size */
 	va_start(varg_list, arg_buffer_size);
 	va_start(varg_list, arg_buffer_size);
-	_starpu_task_insert_get_args_size(varg_list, NULL, arg_buffer_size);
-	va_end(varg_list);
-
-	va_start(varg_list, arg_buffer_size);
-	_starpu_codelet_pack_args(arg_buffer, *arg_buffer_size, varg_list);
+	_starpu_codelet_pack_args(arg_buffer, arg_buffer_size, varg_list);
 	va_end(varg_list);
 	va_end(varg_list);
 }
 }
 
 
@@ -69,39 +64,14 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 static
 static
 struct starpu_task *_starpu_task_build_v(struct starpu_codelet *cl, const char* task_name, int cl_arg_free, va_list varg_list)
 struct starpu_task *_starpu_task_build_v(struct starpu_codelet *cl, const char* task_name, int cl_arg_free, va_list varg_list)
 {
 {
-	void *arg_buffer = NULL;
 	va_list varg_list_copy;
 	va_list varg_list_copy;
-	size_t arg_buffer_size = 0;
-	unsigned nbuffers;
-
-	/* Compute the size */
-
-	va_copy(varg_list_copy, varg_list);
-	_starpu_task_insert_get_args_size(varg_list_copy, &nbuffers, &arg_buffer_size);
-	va_end(varg_list_copy);
-
-	if (arg_buffer_size)
-	{
-		va_copy(varg_list_copy, varg_list);
-		_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list_copy);
-		va_end(varg_list_copy);
-	}
 
 
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
 	task->name = task_name;
 	task->name = task_name;
 	task->cl_arg_free = cl_arg_free;
 	task->cl_arg_free = cl_arg_free;
 
 
-	if (cl && cl->nbuffers != STARPU_VARIABLE_NBUFFERS)
-	{
-		STARPU_ASSERT_MSG(nbuffers == (unsigned) cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%u)", cl->nbuffers, nbuffers);
-	}
-	if (nbuffers > STARPU_NMAXBUFS)
-	{
-		task->dyn_handles = malloc(nbuffers * sizeof(starpu_data_handle_t));
-	}
-
 	va_copy(varg_list_copy, varg_list);
 	va_copy(varg_list_copy, varg_list);
-	_starpu_task_insert_create(arg_buffer, arg_buffer_size, cl, &task, varg_list_copy);
+	_starpu_task_insert_create(cl, &task, varg_list_copy);
 	va_end(varg_list_copy);
 	va_end(varg_list_copy);
 
 
 	return task;
 	return task;

+ 146 - 149
src/util/starpu_task_insert_utils.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2013-2014              Université Bordeaux
- * Copyright (C) 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2013-2014   Université Bordeaux
+ * Copyright (C) 2011-2014         Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2014        INRIA
  * Copyright (C) 2011, 2014        INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -42,157 +42,48 @@ void _starpu_task_insert_callback_wrapper(void *_cl_arg_wrapper)
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 }
 }
 
 
-void _starpu_task_insert_get_args_size(va_list varg_list, unsigned *nbuffers, size_t *cl_arg_size)
+int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_list varg_list)
 {
 {
 	int arg_type;
 	int arg_type;
-	size_t arg_buffer_size;
-	unsigned n;
-
-	arg_buffer_size = 0;
-	n = 0;
-
-	arg_buffer_size += sizeof(int);
+	int nargs = 0;
+	char *_arg_buffer = NULL; // We would like a void* but we use a char* to allow pointer arithmetic
+	size_t _arg_buffer_size = 0;
+	size_t current_offset = sizeof(int);
 
 
-	while ((arg_type = va_arg(varg_list, int)) != 0)
+	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
 		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 			(void)va_arg(varg_list, starpu_data_handle_t);
-			n++;
 		}
 		}
 		else if (arg_type==STARPU_DATA_ARRAY)
 		else if (arg_type==STARPU_DATA_ARRAY)
 		{
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t*);
 			(void)va_arg(varg_list, starpu_data_handle_t*);
-			int nb_handles = va_arg(varg_list, int);
-			n += nb_handles;
-		}
-		else if (arg_type==STARPU_VALUE)
-		{
-			(void)va_arg(varg_list, void *);
-			size_t cst_size = va_arg(varg_list, size_t);
-
-			arg_buffer_size += sizeof(size_t);
-			arg_buffer_size += cst_size;
-		}
-		else if (arg_type==STARPU_CALLBACK)
-		{
-			(void)va_arg(varg_list, _starpu_callback_func_t);
-		}
-		else if (arg_type==STARPU_CALLBACK_WITH_ARG)
-		{
-			va_arg(varg_list, _starpu_callback_func_t);
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PROLOGUE_CALLBACK)
-		{
-			(void)va_arg(varg_list, _starpu_callback_func_t);
-		}
-		else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG)
-		{
-			(void)va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP)
-		{
-			(void)va_arg(varg_list, _starpu_callback_func_t);
-		}
-		else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG)
-		{
-			(void)va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_CALLBACK_ARG)
-		{
-			(void)va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PRIORITY)
-		{
 			(void)va_arg(varg_list, int);
 			(void)va_arg(varg_list, int);
 		}
 		}
-		else if (arg_type==STARPU_EXECUTE_ON_NODE)
+		else if (arg_type==STARPU_DATA_MODE_ARRAY)
 		{
 		{
-			(void)va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA)
-		{
-			(void)va_arg(varg_list, starpu_data_handle_t);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
-		{
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_WORKER_ORDER)
-		{
-			va_arg(varg_list, unsigned);
-		}
-		else if (arg_type==STARPU_SCHED_CTX)
-		{
-			(void)va_arg(varg_list, unsigned);
-		}
-		else if (arg_type==STARPU_HYPERVISOR_TAG)
-		{
-			(void)va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_POSSIBLY_PARALLEL)
-		{
-			(void)va_arg(varg_list, unsigned);
-		}
-		else if (arg_type==STARPU_FLOPS)
-		{
-			(void)va_arg(varg_list, double);
-		}
-		else if (arg_type==STARPU_TAG || arg_type==STARPU_TAG_ONLY)
-		{
-			(void)va_arg(varg_list, starpu_tag_t);
-		}
-		else
-		{
-			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
-		}
-	}
-
-	if (cl_arg_size)
-		*cl_arg_size = arg_buffer_size;
-	if (nbuffers)
-		*nbuffers = n;
-}
-
-int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list)
-{
-	int arg_type;
-	unsigned current_arg_offset = 0;
-	int nargs = 0;
-	char *_arg_buffer; // We would like a void* but we use a char* to allow pointer arithmetic
-
-	/* The buffer will contain : nargs, {size, content} (x nargs)*/
-	_arg_buffer = malloc(arg_buffer_size);
-
-	/* We will begin the buffer with the number of args */
-	current_arg_offset += sizeof(nargs);
-
-	while((arg_type = va_arg(varg_list, int)) != 0)
-	{
-		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
-		{
-			(void)va_arg(varg_list, starpu_data_handle_t);
-		}
-		else if (arg_type==STARPU_DATA_ARRAY)
-		{
-			(void)va_arg(varg_list, starpu_data_handle_t*);
+			(void)va_arg(varg_list, struct starpu_data_descr*);
 			(void)va_arg(varg_list, int);
 			(void)va_arg(varg_list, int);
 		}
 		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
 			/* We have a constant value: this should be followed by a pointer to the cst value and the size of the constant */
 			/* We have a constant value: this should be followed by a pointer to the cst value and the size of the constant */
 			void *ptr = va_arg(varg_list, void *);
 			void *ptr = va_arg(varg_list, void *);
-			size_t cst_size = va_arg(varg_list, size_t);
-
-			memcpy(_arg_buffer+current_arg_offset, (void *)&cst_size, sizeof(cst_size));
-			current_arg_offset += sizeof(size_t);
-
-			memcpy(_arg_buffer+current_arg_offset, ptr, cst_size);
-			current_arg_offset += cst_size;
+			size_t ptr_size = va_arg(varg_list, size_t);
 
 
 			nargs++;
 			nargs++;
-			STARPU_ASSERT(current_arg_offset <= arg_buffer_size);
+			if (current_offset > _arg_buffer_size)
+			{
+				if (_arg_buffer_size == 0) _arg_buffer_size = 1024; else _arg_buffer_size *= 2;
+				_arg_buffer = realloc(_arg_buffer, _arg_buffer_size);
+			}
+			memcpy(_arg_buffer+current_offset, (void *)&ptr_size, sizeof(ptr_size));
+			current_offset += sizeof(ptr_size);
+
+			memcpy(_arg_buffer+current_offset, ptr, ptr_size);
+			current_offset += ptr_size;
+			STARPU_ASSERT(current_offset <= _arg_buffer_size);
 		}
 		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
@@ -280,13 +171,51 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list
 	}
 	}
 
 
 	*arg_buffer = _arg_buffer;
 	*arg_buffer = _arg_buffer;
+	*arg_buffer_size = _arg_buffer_size;
 	return 0;
 	return 0;
 }
 }
 
 
-void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
+static
+void _starpu_task_insert_check_nb_buffers(struct starpu_codelet *cl, struct starpu_task **task, int *allocated_buffers, int nbuffers)
+{
+	if (nbuffers >= STARPU_NMAXBUFS)
+	{
+		if (*allocated_buffers == 0)
+		{
+			int i;
+			*allocated_buffers = STARPU_NMAXBUFS * 2;
+			(*task)->dyn_handles = malloc(*allocated_buffers * sizeof(starpu_data_handle_t));
+			(*task)->dyn_modes = malloc(*allocated_buffers * sizeof(enum starpu_data_access_mode));
+			for(i=0 ; i<nbuffers ; i++)
+			{
+				(*task)->dyn_handles[i] = (*task)->handles[i];
+			}
+			if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
+			{
+				for(i=0 ; i<nbuffers ; i++)
+				{
+					(*task)->dyn_modes[i] = (*task)->modes[i];
+				}
+			}
+		}
+		else
+		{
+			*allocated_buffers *= 2;
+			(*task)->dyn_handles = realloc((*task)->dyn_handles, *allocated_buffers * sizeof(starpu_data_handle_t));
+			(*task)->dyn_modes = realloc((*task)->dyn_modes, *allocated_buffers * sizeof(enum starpu_data_access_mode));
+		}
+	}
+}
+
+void _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
 {
 {
 	int arg_type;
 	int arg_type;
-	unsigned current_buffer = 0;
+	char *arg_buffer_ = NULL;
+	size_t arg_buffer_size_ = 0;
+	size_t current_offset = sizeof(int);
+	int nbuffers;
+	int nargs = 0;
+	int allocated_buffers = 0;
 
 
 	struct _starpu_task_insert_cb_wrapper *cl_arg_wrapper = (struct _starpu_task_insert_cb_wrapper *) malloc(sizeof(struct _starpu_task_insert_cb_wrapper));
 	struct _starpu_task_insert_cb_wrapper *cl_arg_wrapper = (struct _starpu_task_insert_cb_wrapper *) malloc(sizeof(struct _starpu_task_insert_cb_wrapper));
 	STARPU_ASSERT(cl_arg_wrapper);
 	STARPU_ASSERT(cl_arg_wrapper);
@@ -304,6 +233,7 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 	prologue_pop_cl_arg_wrapper->callback_func = NULL;
 	prologue_pop_cl_arg_wrapper->callback_func = NULL;
 
 
 	(*task)->cl = cl;
 	(*task)->cl = cl;
+	nbuffers = 0;
 
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
@@ -316,15 +246,17 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 
 
 			STARPU_ASSERT(cl != NULL);
 			STARPU_ASSERT(cl != NULL);
 
 
-			STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
+			_starpu_task_insert_check_nb_buffers(cl, task, &allocated_buffers, nbuffers);
+
+			STARPU_TASK_SET_HANDLE((*task), handle, nbuffers);
 			if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
 			if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
-				STARPU_TASK_SET_MODE(*task, mode, current_buffer);
-			else if (STARPU_CODELET_GET_MODE(cl, current_buffer))
+				STARPU_TASK_SET_MODE(*task, mode, nbuffers);
+			else if (STARPU_CODELET_GET_MODE(cl, nbuffers))
 			{
 			{
-				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, current_buffer) == mode,
+				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, nbuffers) == mode,
 						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
 						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
-						  cl->name, STARPU_CODELET_GET_MODE(cl, current_buffer),
-						  current_buffer, mode);
+						  cl->name, STARPU_CODELET_GET_MODE(cl, nbuffers),
+						  nbuffers, mode);
 			}
 			}
 			else
 			else
 			{
 			{
@@ -332,10 +264,10 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 #  warning shall we print a warning to the user
 #  warning shall we print a warning to the user
 /* Morse uses it to avoid having to set it in the codelet structure */
 /* Morse uses it to avoid having to set it in the codelet structure */
 #endif
 #endif
-				STARPU_CODELET_SET_MODE(cl, mode, current_buffer);
+				STARPU_CODELET_SET_MODE(cl, mode, nbuffers);
 			}
 			}
 
 
-			current_buffer++;
+			nbuffers++;
 		}
 		}
 		else if (arg_type == STARPU_DATA_ARRAY)
 		else if (arg_type == STARPU_DATA_ARRAY)
 		{
 		{
@@ -343,18 +275,65 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 			starpu_data_handle_t *handles = va_arg(varg_list, starpu_data_handle_t *);
 			starpu_data_handle_t *handles = va_arg(varg_list, starpu_data_handle_t *);
 			int nb_handles = va_arg(varg_list, int);
 			int nb_handles = va_arg(varg_list, int);
 
 
+			STARPU_ASSERT(cl != NULL);
+
 			int i;
 			int i;
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				STARPU_TASK_SET_HANDLE((*task), handles[i], current_buffer);
-				current_buffer++;
+				_starpu_task_insert_check_nb_buffers(cl, task, &allocated_buffers, nbuffers);
+				STARPU_TASK_SET_HANDLE((*task), handles[i], nbuffers);
+				nbuffers++;
+			}
+
+		}
+		else if (arg_type==STARPU_DATA_MODE_ARRAY)
+		{
+			// Expect to find a array of descr and its size
+			struct starpu_data_descr *descrs = va_arg(varg_list, struct starpu_data_descr *);
+			int nb_descrs = va_arg(varg_list, int);
+
+			STARPU_ASSERT(cl != NULL);
+
+			int i;
+			for(i=0 ; i<nb_descrs ; i++)
+			{
+				_starpu_task_insert_check_nb_buffers(cl, task, &allocated_buffers, nbuffers);
+				STARPU_TASK_SET_HANDLE((*task), descrs[i].handle, nbuffers);
+				if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
+					STARPU_TASK_SET_MODE(*task, descrs[i].mode, nbuffers);
+				else if (STARPU_CODELET_GET_MODE(cl, nbuffers))
+				{
+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, nbuffers) == descrs[i].mode,
+							  "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
+							  cl->name, STARPU_CODELET_GET_MODE(cl, nbuffers),
+							  nbuffers, descrs[i].mode);
+				}
+				else
+				{
+					STARPU_CODELET_SET_MODE(cl, descrs[i].mode, nbuffers);
+				}
+
+				nbuffers++;
 			}
 			}
 
 
 		}
 		}
 		else if (arg_type==STARPU_VALUE)
 		else if (arg_type==STARPU_VALUE)
 		{
 		{
-			(void)va_arg(varg_list, void *);
-			(void)va_arg(varg_list, size_t);
+			void *ptr = va_arg(varg_list, void *);
+			size_t ptr_size = va_arg(varg_list, size_t);
+
+			nargs++;
+			if (current_offset > arg_buffer_size_)
+			{
+				if (arg_buffer_size_ == 0) arg_buffer_size_ = 1024; else arg_buffer_size_ *= 2;
+				arg_buffer_ = realloc(arg_buffer_, arg_buffer_size_);
+			}
+			memcpy(arg_buffer_+current_offset, (void *)&ptr_size, sizeof(ptr_size));
+			current_offset += sizeof(ptr_size);
+
+			memcpy(arg_buffer_+current_offset, ptr, ptr_size);
+			current_offset += ptr_size;
+			STARPU_ASSERT(current_offset <= arg_buffer_size_);
 		}
 		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
@@ -467,11 +446,29 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 		}
 		}
 	}
 	}
 
 
-	if (cl && cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
-		(*task)->nbuffers = current_buffer;
+	if (cl)
+	{
+		if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS)
+		{
+			(*task)->nbuffers = nbuffers;
+		}
+		else
+		{
+			STARPU_ASSERT_MSG(nbuffers == cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%d)", cl->nbuffers, nbuffers);
+		}
+	}
 
 
-	(*task)->cl_arg = arg_buffer;
-	(*task)->cl_arg_size = arg_buffer_size;
+	if (nargs)
+	{
+		memcpy(arg_buffer_, (int *)&nargs, sizeof(nargs));
+		(*task)->cl_arg = arg_buffer_;
+		(*task)->cl_arg_size = arg_buffer_size_;
+	}
+	else
+	{
+		free(arg_buffer_);
+		arg_buffer_ = NULL;
+	}
 
 
 	/* The callback will free the argument stack and execute the
 	/* The callback will free the argument stack and execute the
 	 * application's callback, if any. */
 	 * application's callback, if any. */

+ 3 - 4
src/util/starpu_task_insert_utils.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,9 +21,8 @@
 #include <stdarg.h>
 #include <stdarg.h>
 #include <starpu.h>
 #include <starpu.h>
 
 
-void _starpu_task_insert_get_args_size(va_list varg_list, unsigned *nbuffers, size_t *cl_arg_size);
-int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list);
-void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
+int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_list varg_list);
+void _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
 
 
 #endif // __STARPU_TASK_INSERT_UTILS_H__
 #endif // __STARPU_TASK_INSERT_UTILS_H__