лет назад: 16 · f58ca0920e
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,12 +3,12 @@ StarPU 0.2.901 aka 0.3-rc1 (svn revision 1236)
 
				 The asynchronous heterogeneous multi-accelerator release
			
 
				 
			
 
				   * Many API changes and code cleanups
			
 
				-    - Implement starpu_get_worker_id
			
 
				-    - Implement starpu_get_worker_name
			
 
				-    - Implement starpu_get_worker_type
			
 
				-    - Implement starpu_get_worker_count
			
 
				+    - Implement starpu_worker_get_id
			
 
				+    - Implement starpu_worker_get_name
			
 
				+    - Implement starpu_worker_get_type
			
 
				+    - Implement starpu_worker_get_count
			
 
				     - Implement starpu_display_codelet_stats
			
 
				-    - Implement starpu_prefetch_data_on_node
			
 
				+    - Implement starpu_data_prefetch_on_node
			
 
				     - Expose the starpu_data_set_wb_mask function
			
 
				   * Support nvidia (heterogeneous) multi-GPU
			
 
				   * Add the data request mechanism
			
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -512,18 +512,18 @@ guaranteed to be available until this method has been called.
 
				 @section Workers' Properties
			
 
				 
			
 
				 @menu
			
 
				-* starpu_get_worker_count::        Get the number of processing units
			
 
				-* starpu_get_cpu_worker_count::    Get the number of CPU controlled by StarPU
			
 
				-* starpu_get_cuda_worker_count::   Get the number of CUDA devices controlled by StarPU
			
 
				-* starpu_get_opencl_worker_count:: Get the number of OpenCL devices controlled by StarPU
			
 
				-* starpu_get_spu_worker_count::    Get the number of Cell SPUs controlled by StarPU
			
 
				-* starpu_get_worker_id::           Get the identifier of the current worker
			
 
				-* starpu_get_worker_type::         Get the type of processing unit associated to a worker
			
 
				-* starpu_get_worker_name::         Get the name of a worker
			
 
				+* starpu_worker_get_count::        Get the number of processing units
			
 
				+* starpu_cpu_worker_get_count::    Get the number of CPU controlled by StarPU
			
 
				+* starpu_cuda_worker_get_count::   Get the number of CUDA devices controlled by StarPU
			
 
				+* starpu_opencl_worker_get_count:: Get the number of OpenCL devices controlled by StarPU
			
 
				+* starpu_spu_worker_get_count::    Get the number of Cell SPUs controlled by StarPU
			
 
				+* starpu_worker_get_id::           Get the identifier of the current worker
			
 
				+* starpu_worker_get_type::         Get the type of processing unit associated to a worker
			
 
				+* starpu_worker_get_name::         Get the name of a worker
			
 
				 @end menu
			
 
				 
			
 
				-@node starpu_get_worker_count
			
 
				-@subsection @code{starpu_get_worker_count} -- Get the number of processing units
			
 
				+@node starpu_worker_get_count
			
 
				+@subsection @code{starpu_worker_get_count} -- Get the number of processing units
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -531,11 +531,11 @@ This function returns the number of workers (i.e. processing units executing
 
				 StarPU tasks). The returned value should be at most @code{STARPU_NMAXWORKERS}. 
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{unsigned starpu_get_worker_count(void);}
			
 
				+@code{unsigned starpu_worker_get_count(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_cpu_worker_count
			
 
				-@subsection @code{starpu_get_cpu_worker_count} -- Get the number of CPU controlled by StarPU
			
 
				+@node starpu_cpu_worker_get_count
			
 
				+@subsection @code{starpu_cpu_worker_get_count} -- Get the number of CPU controlled by StarPU
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -543,11 +543,11 @@ This function returns the number of CPUs controlled by StarPU. The returned
 
				 value should be at most @code{STARPU_NMAXCPUS}.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{unsigned starpu_get_cpu_worker_count(void);}
			
 
				+@code{unsigned starpu_cpu_worker_get_count(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_cuda_worker_count
			
 
				-@subsection @code{starpu_get_cuda_worker_count} -- Get the number of CUDA devices controlled by StarPU
			
 
				+@node starpu_cuda_worker_get_count
			
 
				+@subsection @code{starpu_cuda_worker_get_count} -- Get the number of CUDA devices controlled by StarPU
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -555,11 +555,11 @@ This function returns the number of CUDA devices controlled by StarPU. The retur
 
				 value should be at most @code{STARPU_MAXCUDADEVS}.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{unsigned starpu_get_cuda_worker_count(void);}
			
 
				+@code{unsigned starpu_cuda_worker_get_count(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_opencl_worker_count
			
 
				-@subsection @code{starpu_get_opencl_worker_count} -- Get the number of OpenCL devices controlled by StarPU
			
 
				+@node starpu_opencl_worker_get_count
			
 
				+@subsection @code{starpu_opencl_worker_get_count} -- Get the number of OpenCL devices controlled by StarPU
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -567,42 +567,42 @@ This function returns the number of OpenCL devices controlled by StarPU. The ret
 
				 value should be at most @code{STARPU_MAXOPENCLDEVS}.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{unsigned starpu_get_opencl_worker_count(void);}
			
 
				+@code{unsigned starpu_opencl_worker_get_count(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_spu_worker_count
			
 
				-@subsection @code{starpu_get_spu_worker_count} -- Get the number of Cell SPUs controlled by StarPU
			
 
				+@node starpu_spu_worker_get_count
			
 
				+@subsection @code{starpu_spu_worker_get_count} -- Get the number of Cell SPUs controlled by StarPU
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
 
				 This function returns the number of Cell SPUs controlled by StarPU.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{unsigned starpu_get_opencl_worker_count(void);}
			
 
				+@code{unsigned starpu_opencl_worker_get_count(void);}
			
 
				 @end table
			
 
				 
			
 
				 
			
 
				-@node starpu_get_worker_id
			
 
				-@subsection @code{starpu_get_worker_id} -- Get the identifier of the current worker
			
 
				+@node starpu_worker_get_id
			
 
				+@subsection @code{starpu_worker_get_id} -- Get the identifier of the current worker
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
 
				 This function returns the identifier of the worker associated to the calling
			
 
				 thread. The returned value is either -1 if the current context is not a StarPU
			
 
				 worker (i.e. when called from the application outside a task or a callback), or
			
 
				-an integer between 0 and @code{starpu_get_worker_count() - 1}.
			
 
				+an integer between 0 and @code{starpu_worker_get_count() - 1}.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{int starpu_get_worker_id(void);}
			
 
				+@code{int starpu_worker_get_id(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_worker_type
			
 
				-@subsection @code{starpu_get_worker_type} -- Get the type of processing unit associated to a worker
			
 
				+@node starpu_worker_get_type
			
 
				+@subsection @code{starpu_worker_get_type} -- Get the type of processing unit associated to a worker
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
 
				 This function returns the type of worker associated to an identifier (as
			
 
				-returned by the @code{starpu_get_worker_id} function). The returned value
			
 
				+returned by the @code{starpu_worker_get_id} function). The returned value
			
 
				 indicates the architecture of the worker: @code{STARPU_CPU_WORKER} for a CPU
			
 
				 core, @code{STARPU_CUDA_WORKER} for a CUDA device,
			
 
				 @code{STARPU_OPENCL_WORKER} for a OpenCL device, and
			
@@ -610,12 +610,12 @@ core, @code{STARPU_CUDA_WORKER} for a CUDA device,
 
				 identifier is unspecified.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{enum starpu_archtype starpu_get_worker_type(int id);}
			
 
				+@code{enum starpu_archtype starpu_worker_get_type(int id);}
			
 
				 
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_get_worker_name
			
 
				-@subsection @code{starpu_get_worker_name} -- Get the name of a worker
			
 
				+@node starpu_worker_get_name
			
 
				+@subsection @code{starpu_worker_get_name} -- Get the name of a worker
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -627,7 +627,7 @@ is a valid pointer to a buffer of @code{maxlen} bytes at least. Calling this
 
				 function on an invalid identifier results in an unspecified behaviour.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{void starpu_get_worker_name(int id, char *dst, size_t maxlen);}
			
 
				+@code{void starpu_worker_get_name(int id, char *dst, size_t maxlen);}
			
 
				 
			
 
				 @end table
			
 
				 
			
@@ -667,13 +667,13 @@ TODO
 
				 @end table
			
 
				 
			
 
				 
			
 
				-@c void starpu_delete_data(struct starpu_data_state_t *state);
			
 
				+@c void starpu_data_unregister(struct starpu_data_state_t *state);
			
 
				 
			
 
				-@c starpu_get_worker_memory_node TODO
			
 
				+@c starpu_worker_get_memory_node TODO
			
 
				 @c 
			
 
				 
			
 
				 @c user interaction with the DSM
			
 
				-@c   void starpu_sync_data_with_mem(struct starpu_data_state_t *state);
			
 
				+@c   void starpu_data_sync_with_mem(struct starpu_data_state_t *state);
			
 
				 @c   void starpu_notify_data_modification(struct starpu_data_state_t *state, uint32_t modifying_node);
			
 
				 
			
 
				 @node Codelets and Tasks
			
@@ -688,7 +688,7 @@ TODO
 
				 * starpu_task_destroy::           Destroy a dynamically allocated Task
			
 
				 * starpu_task_submit::            Submit a Task
			
 
				 * starpu_task_wait::              Wait for the termination of a Task
			
 
				-* starpu_wait_all_tasks::	  Wait for the termination of all Tasks
			
 
				+* starpu_task_wait_for_all::	  Wait for the termination of all Tasks
			
 
				 @end menu
			
 
				 
			
 
				 @node struct starpu_codelet
			
@@ -824,7 +824,7 @@ task to the worker specified by the @code{workerid} field.
 
				 @item @code{workerid} (optional):
			
 
				 If the @code{execute_on_a_specific_worker} field is set, this field indicates
			
 
				 which is the identifier of the worker that should process this task (as
			
 
				-returned by @code{starpu_get_worker_id}). This field is ignored if
			
 
				+returned by @code{starpu_worker_get_id}). This field is ignored if
			
 
				 @code{execute_on_a_specific_worker} field is set to 0.
			
 
				 
			
 
				 @item @code{detach} (optional) (default = 1):
			
@@ -936,14 +936,14 @@ available and this task is only implemented on top of CUDA).
 
				 @code{int starpu_task_submit(struct starpu_task *task);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_wait_all_tasks
			
 
				-@subsection @code{starpu_wait_all_tasks} -- Wait for the termination of all Tasks
			
 
				+@node starpu_task_wait_for_all
			
 
				+@subsection @code{starpu_task_wait_for_all} -- Wait for the termination of all Tasks
			
 
				 @table @asis
			
 
				 @item @emph{Description}:
			
 
				 This function blocks until all the tasks that were submitted are terminated.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{void starpu_wait_all_tasks(void);}
			
 
				+@code{void starpu_task_wait_for_all(void);}
			
 
				 @end table
			
 
				 
			
 
				 
			
@@ -1088,53 +1088,53 @@ DAG before actually giving StarPU the opportunity to execute the tasks.
 
				 @node CUDA extensions
			
 
				 @section CUDA extensions
			
 
				 
			
 
				-@c void starpu_malloc_pinned_if_possible(float **A, size_t dim);
			
 
				+@c void starpu_data_malloc_pinned_if_possible(float **A, size_t dim);
			
 
				 
			
 
				-@c starpu_helper_init_cublas TODO
			
 
				+@c starpu_helper_cublas_init TODO
			
 
				 
			
 
				-@c starpu_helper_shutdown_cublas TODO
			
 
				+@c starpu_helper_cublas_shutdown TODO
			
 
				 
			
 
				 @menu
			
 
				-* starpu_get_local_cuda_stream::   Get current worker's CUDA stream
			
 
				-* starpu_helper_init_cublas::      Initialize CUBLAS on every CUDA device
			
 
				-* starpu_helper_shutdown_cublas::  Deiitialize CUBLAS on every CUDA device
			
 
				+* starpu_cuda_get_local_stream::   Get current worker's CUDA stream
			
 
				+* starpu_helper_cublas_init::      Initialize CUBLAS on every CUDA device
			
 
				+* starpu_helper_cublas_shutdown::  Deiitialize CUBLAS on every CUDA device
			
 
				 @end menu
			
 
				 
			
 
				-@node starpu_get_local_cuda_stream
			
 
				-@subsection @code{starpu_get_local_cuda_stream} -- Get current worker's CUDA stream
			
 
				+@node starpu_cuda_get_local_stream
			
 
				+@subsection @code{starpu_cuda_get_local_stream} -- Get current worker's CUDA stream
			
 
				 @table @asis
			
 
				 @item @emph{Description}:
			
 
				 StarPU provides a stream for every CUDA device controlled by StarPU. This
			
 
				 function is only provided for convenience so that programmers can easily use
			
 
				 asynchronous operations within codelets without having to create a stream by
			
 
				 hand. Note that the application is not forced to use the stream provided by
			
 
				-@code{starpu_get_local_cuda_stream} and may also create its own streams.
			
 
				+@code{starpu_cuda_get_local_stream} and may also create its own streams.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{cudaStream_t *starpu_get_local_cuda_stream(void);}
			
 
				+@code{cudaStream_t *starpu_cuda_get_local_stream(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_helper_init_cublas
			
 
				-@subsection @code{starpu_helper_init_cublas} -- Initialize CUBLAS on every CUDA device
			
 
				+@node starpu_helper_cublas_init
			
 
				+@subsection @code{starpu_helper_cublas_init} -- Initialize CUBLAS on every CUDA device
			
 
				 @table @asis
			
 
				 @item @emph{Description}:
			
 
				 The CUBLAS library must be initialized prior to any CUBLAS call. Calling
			
 
				-@code{starpu_helper_init_cublas} will initialize CUBLAS on every CUDA device
			
 
				+@code{starpu_helper_cublas_init} will initialize CUBLAS on every CUDA device
			
 
				 controlled by StarPU. This call blocks until CUBLAS has been properly
			
 
				 initialized on every device.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{void starpu_helper_init_cublas(void);}
			
 
				+@code{void starpu_helper_cublas_init(void);}
			
 
				 @end table
			
 
				 
			
 
				-@node starpu_helper_shutdown_cublas
			
 
				-@subsection @code{starpu_helper_shutdown_cublas} -- Deinitialize CUBLAS on every CUDA device
			
 
				+@node starpu_helper_cublas_shutdown
			
 
				+@subsection @code{starpu_helper_cublas_shutdown} -- Deinitialize CUBLAS on every CUDA device
			
 
				 @table @asis
			
 
				 @item @emph{Description}:
			
 
				 This function synchronously deinitializes the CUBLAS library on every CUDA device.
			
 
				 
			
 
				 @item @emph{Prototype}:
			
 
				-@code{void starpu_helper_shutdown_cublas(void);}
			
 
				+@code{void starpu_helper_cublas_shutdown(void);}
			
 
				 @end table
			
 
				 
			
 
				 @node Cell extensions
			
@@ -1369,7 +1369,7 @@ The following lines show how to declare an array of @code{n} elements of type
 
				 float tab[n];
			
 
				 
			
 
				 starpu_data_handle tab_handle;
			
 
				-starpu_register_vector_data(&tab_handle, 0, tab, n, sizeof(float));
			
 
				+starpu_vector_data_register(&tab_handle, 0, tab, n, sizeof(float));
			
 
				 @end example
			
 
				 
			
 
				 The first argument, called the @b{data handle}, is an opaque pointer which
			
--- a/examples/audio/starpu_audio_processing.c
+++ b/examples/audio/starpu_audio_processing.c
@@ -167,7 +167,7 @@ static void band_filter_kernel_gpu(void *descr[], __attribute__((unused)) void *
 
				 	float *localA = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	cufftComplex *localout;
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 	
			
 
				 	/* initialize the plane only during the first iteration */
			
 
				 	if (!plans[workerid].is_initialized)
			
@@ -214,7 +214,7 @@ static void band_filter_kernel_cpu(void *descr[], __attribute__((unused)) void *
 
				 {
			
 
				 	float *localA = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 	
			
 
				 	/* initialize the plane only during the first iteration */
			
 
				 	if (!plans[workerid].is_initialized)
			
@@ -284,7 +284,7 @@ static starpu_codelet band_filter_cl = {
 
				 void callback(void *arg)
			
 
				 {
			
 
				 	/* do some accounting */
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	task_per_worker[id]++;
			
 
				 }
			
 
				 
			
@@ -294,7 +294,7 @@ void create_starpu_task(unsigned iter)
 
				 
			
 
				 	task->cl = &band_filter_cl;
			
 
				 
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(A_handle, 1, iter);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(A_handle, 1, iter);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	task->callback_func = callback;
			
@@ -326,14 +326,14 @@ static void init_problem(void)
 
				 	/* allocate a buffer to store the content of input file */
			
 
				 	if (use_pin)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)&A, length_data*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&A, length_data*sizeof(float));
			
 
				 	}
			
 
				 	else {
			
 
				 		A = malloc(length_data*sizeof(float));
			
 
				 	}
			
 
				 
			
 
				 	/* allocate working buffer (this could be done online, but we'll keep it simple) */
			
 
				-	//starpu_malloc_pinned_if_possible((void **)&outdata, length_data*sizeof(fftwf_complex));
			
 
				+	//starpu_data_malloc_pinned_if_possible((void **)&outdata, length_data*sizeof(fftwf_complex));
			
 
				 
			
 
				 	/* read input data into buffer "A" */
			
 
				 	read_16bit_wav(infile, length_data, A, infile_raw);
			
@@ -389,7 +389,7 @@ int main(int argc, char **argv)
 
				 	/* launch StarPU */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_register_vector_data(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
			
 
				+	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
			
 
				 
			
 
				 	starpu_filter f = 
			
 
				 	{
			
@@ -397,10 +397,10 @@ int main(int argc, char **argv)
 
				 		.filter_arg = niter
			
 
				 	};
			
 
				 
			
 
				-	starpu_partition_data(A_handle, &f);
			
 
				+	starpu_data_partition(A_handle, &f);
			
 
				 
			
 
				 	for (iter = 0; iter < niter; iter++)
			
 
				-		starpu_data_set_wb_mask(starpu_get_sub_data(A_handle, 1, iter), 1<<0);
			
 
				+		starpu_data_set_wb_mask(starpu_data_get_sub_data(A_handle, 1, iter), 1<<0);
			
 
				 
			
 
				 	gettimeofday(&start, NULL);
			
 
				 
			
@@ -409,7 +409,7 @@ int main(int argc, char **argv)
 
				 		create_starpu_task(iter);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -422,7 +422,7 @@ int main(int argc, char **argv)
 
				 		if (task_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 
			
 
				 			unsigned long bytes = nsamples*sizeof(float)*task_per_worker[worker];
			
 
				 
			
@@ -434,8 +434,8 @@ int main(int argc, char **argv)
 
				 		fprintf(stderr, "Writing output data\n");
			
 
				 
			
 
				 	/* make sure that the output is in RAM before quitting StarPU */
			
 
				-	starpu_unpartition_data(A_handle, 0);
			
 
				-	starpu_delete_data(A_handle);
			
 
				+	starpu_data_unpartition(A_handle, 0);
			
 
				+	starpu_data_unregister(A_handle);
			
 
				 
			
 
				 	/* we are done ! */
			
 
				 	starpu_shutdown();
			
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -82,16 +82,16 @@ int main(int argc, char **argv)
 
				 	/* Initialize StarPU */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	/* This is equivalent to 
			
 
				 		vec_a = malloc(N*sizeof(TYPE));
			
 
				 		vec_b = malloc(N*sizeof(TYPE));
			
 
				 	*/
			
 
				-	starpu_malloc_pinned_if_possible((void **)&vec_x, N*sizeof(TYPE));
			
 
				+	starpu_data_malloc_pinned_if_possible((void **)&vec_x, N*sizeof(TYPE));
			
 
				 	assert(vec_x);
			
 
				 
			
 
				-	starpu_malloc_pinned_if_possible((void **)&vec_y, N*sizeof(TYPE));
			
 
				+	starpu_data_malloc_pinned_if_possible((void **)&vec_y, N*sizeof(TYPE));
			
 
				 	assert(vec_y);
			
 
				 
			
 
				 	unsigned i;
			
@@ -105,8 +105,8 @@ int main(int argc, char **argv)
 
				 	fprintf(stderr, "BEFORE y[0] = %2.2f\n", vec_y[0]);
			
 
				 
			
 
				 	/* Declare the data to StarPU */
			
 
				-	starpu_register_vector_data(&handle_x, 0, (uintptr_t)vec_x, N, sizeof(TYPE));
			
 
				-	starpu_register_vector_data(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&handle_x, 0, (uintptr_t)vec_x, N, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
			
 
				 
			
 
				 	/* Divide the vector into blocks */
			
 
				 	starpu_filter block_filter = {
			
@@ -114,8 +114,8 @@ int main(int argc, char **argv)
 
				 		.filter_arg = NBLOCKS
			
 
				 	};
			
 
				 
			
 
				-	starpu_partition_data(handle_x, &block_filter);
			
 
				-	starpu_partition_data(handle_y, &block_filter);
			
 
				+	starpu_data_partition(handle_x, &block_filter);
			
 
				+	starpu_data_partition(handle_y, &block_filter);
			
 
				 
			
 
				 	TYPE alpha = 3.41;
			
 
				 
			
@@ -133,19 +133,19 @@ int main(int argc, char **argv)
 
				 
			
 
				 		task->cl_arg = &alpha;
			
 
				 
			
 
				-		task->buffers[0].handle = starpu_get_sub_data(handle_x, 1, b);
			
 
				+		task->buffers[0].handle = starpu_data_get_sub_data(handle_x, 1, b);
			
 
				 		task->buffers[0].mode = STARPU_R;
			
 
				 		
			
 
				-		task->buffers[1].handle = starpu_get_sub_data(handle_y, 1, b);
			
 
				+		task->buffers[1].handle = starpu_data_get_sub_data(handle_y, 1, b);
			
 
				 		task->buffers[1].mode = STARPU_RW;
			
 
				 		
			
 
				 		starpu_task_submit(task);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				-	starpu_unpartition_data(handle_y, 0);
			
 
				-	starpu_delete_data(handle_y);
			
 
				+	starpu_data_unpartition(handle_y, 0);
			
 
				+	starpu_data_unregister(handle_y);
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				         double timing = (double)((end.tv_sec - start.tv_sec)*1000000 +
			
--- a/examples/basic_examples/mult.c
+++ b/examples/basic_examples/mult.c
@@ -18,13 +18,13 @@
 
				  * This example shows a simple implementation of a blocked matrix
			
 
				  * multiplication. Note that this is NOT intended to be an efficient
			
 
				  * implementation of sgemm! In this example, we show:
			
 
				- *  - how to declare dense matrices (starpu_register_matrix_data)
			
 
				+ *  - how to declare dense matrices (starpu_matrix_data_register)
			
 
				  *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
			
 
				  *  - how to use filters to partition the matrices into blocks
			
 
				- *    (starpu_partition_data and starpu_map_filters)
			
 
				- *  - how to unpartition data (starpu_unpartition_data) and how to stop
			
 
				- *    monitoring data (starpu_delete_data)
			
 
				- *  - how to manipulate subsets of data (starpu_get_sub_data)
			
 
				+ *    (starpu_data_partition and starpu_map_filters)
			
 
				+ *  - how to unpartition data (starpu_data_unpartition) and how to stop
			
 
				+ *    monitoring data (starpu_data_unregister)
			
 
				+ *  - how to manipulate subsets of data (starpu_data_get_sub_data)
			
 
				  *  - how to construct an autocalibrated performance model (starpu_perfmodel_t)
			
 
				  *  - how to submit asynchronous tasks and how to use callback to handle task
			
 
				  *    termination
			
@@ -88,7 +88,7 @@ static void callback_func(void *arg)
 
				 	{
			
 
				 		/* IMPORTANT : note that we CANNOT call blocking operations
			
 
				 		 * within callbacks as it may lead to a deadlock of StarPU.
			
 
				-		 * starpu_unpartition_data is for instance called by the main
			
 
				+		 * starpu_data_unpartition is for instance called by the main
			
 
				 		 * thread since it may cause /potentially/ blocking operations
			
 
				 		 * such as memory transfers from a GPU to a CPU. */
			
 
				 		
			
@@ -199,11 +199,11 @@ static void partition_mult_data(void)
 
				 	 * node in which resides the matrix: 0 means that the 3rd argument is
			
 
				 	 * an adress in main memory.
			
 
				 	 */
			
 
				-	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
			
 
				+	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
			
 
				 		ydim, ydim, zdim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
			
 
				+	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
			
 
				 		zdim, zdim, xdim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
			
 
				+	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
			
 
				 		ydim, ydim, xdim, sizeof(float));
			
 
				 
			
 
				 	/* A filter is a method to partition a data into disjoint chunks, it is
			
@@ -228,17 +228,17 @@ static void partition_mult_data(void)
 
				 		
			
 
				 /*
			
 
				  *	Illustration with nslicex = 4 and nslicey = 2, it is possible to access
			
 
				- *	sub-data by using the "starpu_get_sub_data" method, which takes a data handle,
			
 
				+ *	sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle,
			
 
				  *	the number of filters to apply, and the indexes for each filters, for
			
 
				  *	instance:
			
 
				  *
			
 
				- *		A' handle is starpu_get_sub_data(A_handle, 1, 1); 
			
 
				- *		B' handle is starpu_get_sub_data(B_handle, 1, 2); 
			
 
				- *		C' handle is starpu_get_sub_data(C_handle, 2, 2, 1); 
			
 
				+ *		A' handle is starpu_data_get_sub_data(A_handle, 1, 1); 
			
 
				+ *		B' handle is starpu_data_get_sub_data(B_handle, 1, 2); 
			
 
				+ *		C' handle is starpu_data_get_sub_data(C_handle, 2, 2, 1); 
			
 
				  *
			
 
				  *	Note that here we applied 2 filters recursively onto C.
			
 
				  *
			
 
				- *	"starpu_get_sub_data(C_handle, 1, 3)" would return a handle to the 4th column
			
 
				+ *	"starpu_data_get_sub_data(C_handle, 1, 3)" would return a handle to the 4th column
			
 
				  *	of blocked matrix C for example.
			
 
				  *
			
 
				  *		              |---|---|---|---|
			
@@ -259,18 +259,18 @@ static void partition_mult_data(void)
 
				  *	for each of the elements independantly. The tasks should therefore NOT
			
 
				  *	access inner nodes (eg. one column of C or the whole C) but only the
			
 
				  *	leafs of the tree (ie. blocks here). Manipulating inner nodes is only
			
 
				- *	possible by disapplying the filters (using starpu_unpartition_data), to
			
 
				+ *	possible by disapplying the filters (using starpu_data_unpartition), to
			
 
				  *	enforce memory consistency.
			
 
				  */
			
 
				 
			
 
				-	starpu_partition_data(B_handle, &f);
			
 
				-	starpu_partition_data(A_handle, &f2);
			
 
				+	starpu_data_partition(B_handle, &f);
			
 
				+	starpu_data_partition(A_handle, &f2);
			
 
				 
			
 
				 	/* starpu_map_filters is a variable-arity function, the first argument
			
 
				 	 * is the handle of the data to partition, the second argument is the
			
 
				 	 * number of filters to apply recursively. Filters are applied in the
			
 
				 	 * same order as the arguments.
			
 
				-	 * This would be equivalent to starpu_partition_data(C_handle, &f) and
			
 
				+	 * This would be equivalent to starpu_data_partition(C_handle, &f) and
			
 
				 	 * then applying f2 on each sub-data (ie. each column of C)
			
 
				 	 */
			
 
				 	starpu_map_filters(C_handle, 2, &f, &f2);
			
@@ -338,22 +338,22 @@ static void launch_tasks(void)
 
				 			 * (respectively B) so we grab the handle to the chunk
			
 
				 			 * identified by "tasky" (respectively "taskx). The "1"
			
 
				 			 * tells StarPU that there is a single argument to the
			
 
				-			 * variable-arity function starpu_get_sub_data */
			
 
				-			task->buffers[0].handle = starpu_get_sub_data(A_handle, 1, tasky);
			
 
				+			 * variable-arity function starpu_data_get_sub_data */
			
 
				+			task->buffers[0].handle = starpu_data_get_sub_data(A_handle, 1, tasky);
			
 
				 			task->buffers[0].mode = STARPU_R;
			
 
				-			task->buffers[1].handle = starpu_get_sub_data(B_handle, 1, taskx);
			
 
				+			task->buffers[1].handle = starpu_data_get_sub_data(B_handle, 1, taskx);
			
 
				 			task->buffers[1].mode = STARPU_R;
			
 
				 
			
 
				 			/* 2 filters were applied on matrix C, so we give
			
 
				-			 * starpu_get_sub_data 2 arguments. The order of the arguments
			
 
				+			 * starpu_data_get_sub_data 2 arguments. The order of the arguments
			
 
				 			 * must match the order in which the filters were
			
 
				 			 * applied.
			
 
				-			 * NB: starpu_get_sub_data(C_handle, 1, k) would have returned
			
 
				+			 * NB: starpu_data_get_sub_data(C_handle, 1, k) would have returned
			
 
				 			 * a handle to the column number k of matrix C.
			
 
				-			 * NB2: starpu_get_sub_data(C_handle, 2, taskx, tasky) is
			
 
				+			 * NB2: starpu_data_get_sub_data(C_handle, 2, taskx, tasky) is
			
 
				 			 * equivalent to
			
 
				-			 * starpu_get_sub_data(starpu_get_sub_data(C_handle, 1, taskx), 1, tasky)*/
			
 
				-			task->buffers[2].handle = starpu_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				+			 * starpu_data_get_sub_data(starpu_data_get_sub_data(C_handle, 1, taskx), 1, tasky)*/
			
 
				+			task->buffers[2].handle = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				 			task->buffers[2].mode = STARPU_W;
			
 
				 
			
 
				 			/* this is not a blocking call since task->synchronous = 0 */
			
@@ -389,17 +389,17 @@ int main(__attribute__ ((unused)) int argc,
 
				 	pthread_mutex_unlock(&mutex);
			
 
				 
			
 
				 	/* remove the filters applied by the means of starpu_map_filters; now
			
 
				- 	 * it's not possible to manipulate a subset of C using starpu_get_sub_data until
			
 
				+ 	 * it's not possible to manipulate a subset of C using starpu_data_get_sub_data until
			
 
				 	 * starpu_map_filters is called again on C_handle.
			
 
				 	 * The second argument is the memory node where the different subsets
			
 
				 	 * should be reassembled, 0 = main memory (RAM) */
			
 
				-	starpu_unpartition_data(C_handle, 0);
			
 
				+	starpu_data_unpartition(C_handle, 0);
			
 
				 
			
 
				 	/* stop monitoring matrix C : after this, it is not possible to pass C 
			
 
				 	 * (or any subset of C) as a codelet input/output. This also implements
			
 
				 	 * a barrier so that the piece of data is put back into main memory in
			
 
				 	 * case it was only available on a GPU for instance. */
			
 
				-	starpu_delete_data(C_handle);
			
 
				+	starpu_data_unregister(C_handle);
			
 
				 	
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -17,7 +17,7 @@
 
				 /*
			
 
				  * This example demonstrates how to use StarPU to scale an array by a factor.
			
 
				  * It shows how to manipulate data with StarPU's data management library.
			
 
				- *  1- how to declare a piece of data to StarPU (starpu_register_vector_data)
			
 
				+ *  1- how to declare a piece of data to StarPU (starpu_vector_data_register)
			
 
				  *  2- how to describe which data are accessed by a task (task->buffers[0])
			
 
				  *  3- how a kernel can manipulate the data (buffers[0].vector.ptr)
			
 
				  */
			
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
 
				 	 *  - the fifth argument is the size of each element.
			
 
				 	 */
			
 
				 	starpu_data_handle tab_handle;
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, N, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, N, sizeof(float));
			
 
				 
			
 
				 	float factor = 3.14;
			
 
				 
			
@@ -122,7 +122,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	/* StarPU does not need to manipulate the array anymore so we can stop
			
 
				  	 * monitoring it */
			
 
				-	starpu_delete_data(tab_handle);
			
 
				+	starpu_data_unregister(tab_handle);
			
 
				 
			
 
				 	/* terminate StarPU, no task can be submitted after */
			
 
				 	starpu_shutdown();
			
--- a/examples/block/block.c
+++ b/examples/block/block.c
@@ -43,8 +43,8 @@ void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 
				 	int nz = (int)STARPU_GET_BLOCK_NZ(descr[0]);
			
 
				         float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
			
 
				 
			
 
				-        id = starpu_get_worker_id();
			
 
				-        devid = starpu_get_worker_devid(id);
			
 
				+        id = starpu_worker_get_id();
			
 
				+        devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				         err = starpu_opencl_load_kernel(&kernel, &queue,
			
 
				                                         "examples/block/block_kernel.cl", "block", devid);
			
@@ -84,7 +84,7 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
				         starpu_data_handle multiplier_handle;
			
 
				         int i, j, k;
			
 
				 
			
 
				-	starpu_register_block_data(&block_handle, 0, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
			
 
				+	starpu_block_data_register(&block_handle, 0, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
			
 
				 	starpu_register_variable_data(&multiplier_handle, 0, (uintptr_t)&multiplier, sizeof(float));
			
 
				 
			
 
				 	cl.where = where;
			
@@ -108,17 +108,17 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
				                 return 1;
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* update the array in RAM */
			
 
				-        starpu_sync_data_with_mem(block_handle, STARPU_R);
			
 
				+        starpu_data_sync_with_mem(block_handle, STARPU_R);
			
 
				 
			
 
				         for(i=0 ; i<pnx*pny*pnz; i++) {
			
 
				           fprintf(stderr, "%f ", block[i]);
			
 
				         }
			
 
				         fprintf(stderr, "\n");
			
 
				 
			
 
				-        starpu_release_data_from_mem(block_handle);
			
 
				+        starpu_data_release_from_mem(block_handle);
			
 
				 
			
 
				         return 0;
			
 
				 }
			
--- a/examples/cholesky/dw_cholesky.c
+++ b/examples/cholesky/dw_cholesky.c
@@ -55,7 +55,7 @@ static struct starpu_task * create_task_11(starpu_data_handle dataA, unsigned k)
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -88,9 +88,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j)
 
				 	task->cl = &cl21;	
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!noprio && (j == k+1)) {
			
@@ -128,11 +128,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, i); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, i, j); 
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!noprio && (i == k + 1) && (j == k +1) ) {
			
@@ -202,7 +202,7 @@ static void _dw_cholesky(starpu_data_handle dataA, unsigned nblocks)
 
				 	/* stall the application until the end of computations */
			
 
				 	starpu_tag_wait(TAG11(nblocks-1));
			
 
				 
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -211,7 +211,7 @@ static void _dw_cholesky(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 
			
 
				 	double flop = (1.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
@@ -221,13 +221,13 @@ void initialize_system(float **A, unsigned dim, unsigned pinned)
 
				 {
			
 
				 	starpu_init(NULL);
			
 
				 	
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	_starpu_timing_init();
			
 
				 
			
 
				 	if (pinned)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				 	} 
			
 
				 	else {
			
 
				 		*A = malloc(dim*dim*sizeof(float));
			
@@ -240,7 +240,7 @@ void dw_cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_filter f;
			
 
				 		f.filter_func = starpu_vertical_block_filter_func;
			
@@ -254,7 +254,7 @@ void dw_cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	_dw_cholesky(dataA, nblocks);
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 }
			
--- a/examples/cholesky/dw_cholesky_grain.c
+++ b/examples/cholesky/dw_cholesky_grain.c
@@ -55,7 +55,7 @@ static struct starpu_task * create_task_11(starpu_data_handle dataA, unsigned k,
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -87,9 +87,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j, uns
 
				 	task->cl = &cl21;	
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (j == k+1) {
			
@@ -127,11 +127,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, i); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, i, j); 
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if ( (i == k + 1) && (j == k +1) ) {
			
@@ -168,7 +168,7 @@ static void _dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_filter f;
			
 
				 		f.filter_func = starpu_vertical_block_filter_func;
			
@@ -215,7 +215,7 @@ static void _dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned
 
				 	{
			
 
				 		/* stall the application until the end of computations */
			
 
				 		starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel));
			
 
				-		starpu_unpartition_data(dataA, 0);
			
 
				+		starpu_data_unpartition(dataA, 0);
			
 
				 		return;
			
 
				 	}
			
 
				 	else {
			
@@ -237,8 +237,8 @@ static void _dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 		free(tag_array);
			
 
				 
			
 
				-		starpu_unpartition_data(dataA, 0);
			
 
				-		starpu_delete_data(dataA);
			
 
				+		starpu_data_unpartition(dataA, 0);
			
 
				+		starpu_data_unregister(dataA);
			
 
				 
			
 
				 		float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)];
			
 
				 
			
@@ -250,13 +250,13 @@ void initialize_system(float **A, unsigned dim, unsigned pinned)
 
				 {
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	_starpu_timing_init();
			
 
				 
			
 
				 	if (pinned)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)A, dim*dim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)A, dim*dim*sizeof(float));
			
 
				 	} 
			
 
				 	else {
			
 
				 		*A = malloc(dim*dim*sizeof(float));
			
@@ -281,7 +281,7 @@ void dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks
 
				 	double flop = (1.0f*size*size*size)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 }
			
--- a/examples/cholesky/dw_cholesky_models.c
+++ b/examples/cholesky/dw_cholesky_models.c
@@ -38,7 +38,7 @@ static double cpu_chol_task_11_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
			
 
				 
			
@@ -53,7 +53,7 @@ static double cuda_chol_task_11_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883);
			
 
				 
			
@@ -68,7 +68,7 @@ static double cpu_chol_task_21_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
			
 
				 
			
@@ -83,7 +83,7 @@ static double cuda_chol_task_21_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520);
			
 
				 
			
@@ -98,7 +98,7 @@ static double cpu_chol_task_22_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
			
 
				 
			
@@ -113,7 +113,7 @@ static double cuda_chol_task_22_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666);
			
 
				 
			
--- a/examples/cholesky/dw_cholesky_no_stride.c
+++ b/examples/cholesky/dw_cholesky_no_stride.c
@@ -259,7 +259,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	_starpu_timing_init();
			
 
				 
			
@@ -311,14 +311,14 @@ int main(int argc, char **argv)
 
				 	for (x = 0; x < nblocks; x++)
			
 
				 	{
			
 
				 		if (x <= y) {
			
 
				-			starpu_register_matrix_data(&A_state[y][x], 0, (uintptr_t)A[y][x], 
			
 
				+			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
			
 
				 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	dw_cholesky_no_stride();
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 	return 0;
			
--- a/examples/common/blas_model.c
+++ b/examples/common/blas_model.c
@@ -32,9 +32,9 @@ double gemm_cost(starpu_buffer_descr *descr)
 
				 	uint32_t nxC, nyC, nxA;
			
 
				 
			
 
				 
			
 
				-	nxC = starpu_get_matrix_nx(descr[2].handle);
			
 
				-	nyC = starpu_get_matrix_ny(descr[2].handle);
			
 
				-	nxA = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	nxC = starpu_matrix_get_nx(descr[2].handle);
			
 
				+	nyC = starpu_matrix_get_ny(descr[2].handle);
			
 
				+	nxA = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 //	printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA);
			
 
				 
			
--- a/examples/heat/dw_factolu.c
+++ b/examples/heat/dw_factolu.c
@@ -106,7 +106,7 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 			task->cl_arg = u11arg;
			
 
				 
			
 
				 			task->buffers[0].handle =
			
 
				-				starpu_get_sub_data(args->dataA, 2, k+1, k+1);
			
 
				+				starpu_data_get_sub_data(args->dataA, 2, k+1, k+1);
			
 
				 			task->buffers[0].mode = STARPU_RW;
			
 
				 	
			
 
				 		u11arg->dataA = args->dataA;
			
@@ -145,10 +145,10 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 					u21a->dataA = args->dataA;
			
 
				 
			
 
				 					task21->buffers[0].handle = 
			
 
				-						starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				+						starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				 					task21->buffers[0].mode = STARPU_R;
			
 
				 					task21->buffers[1].handle =
			
 
				-						starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				+						starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				 					task21->buffers[1].mode = STARPU_RW;
			
 
				 		
			
 
				 					starpu_task_submit(task21);
			
@@ -179,9 +179,9 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 					u12a->nblocks = args->nblocks;
			
 
				 					u12a->dataA = args->dataA;
			
 
				 
			
 
				-					task12->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				+					task12->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				 					task12->buffers[0].mode = STARPU_R;
			
 
				-					task12->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				+					task12->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				 					task12->buffers[1].mode = STARPU_RW;
			
 
				 					
			
 
				 					starpu_task_submit(task12);
			
@@ -231,13 +231,13 @@ void dw_callback_v2_codelet_update_u12(void *argcb)
 
				 				u22a->dataA = args->dataA;
			
 
				 				u22a->nblocks = nblocks;
			
 
				 
			
 
				-				task22->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				+				task22->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				 				task22->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				+				task22->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				 				task22->buffers[1].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[2].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				+				task22->buffers[2].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				 				task22->buffers[2].mode = STARPU_RW;
			
 
				 				
			
 
				 				/* schedule that codelet */
			
@@ -290,13 +290,13 @@ void dw_callback_v2_codelet_update_u21(void *argcb)
 
				 				u22a->dataA = args->dataA;
			
 
				 				u22a->nblocks = nblocks;
			
 
				 
			
 
				-				task22->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				+				task22->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				 				task22->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				+				task22->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				 				task22->buffers[1].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[2].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				+				task22->buffers[2].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				 				task22->buffers[2].mode = STARPU_RW;
			
 
				 				
			
 
				 				/* schedule that codelet */
			
@@ -362,9 +362,9 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
				 					u12a->nblocks = args->nblocks;
			
 
				 					u12a->dataA = args->dataA;
			
 
				 
			
 
				-					task12->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				+					task12->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				 					task12->buffers[0].mode = STARPU_R;
			
 
				-					task12->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				+					task12->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				 					task12->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 					if (!no_prio && (slice == i +1))
			
@@ -399,9 +399,9 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
				 					u21a->nblocks = args->nblocks;
			
 
				 					u21a->dataA = args->dataA;
			
 
				 
			
 
				-					task21->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				+					task21->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				 					task21->buffers[0].mode = STARPU_R;
			
 
				-					task21->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				+					task21->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				 					task21->buffers[1].mode = STARPU_RW;
			
 
				 		
			
 
				 					if (!no_prio && (slice == i +1))
			
@@ -479,17 +479,17 @@ void dw_callback_codelet_update_u11(void *argcb)
 
				 			u21a->remaining = remaining;
			
 
				 
			
 
				 			task12->buffers[0].handle = 
			
 
				-				starpu_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				+				starpu_data_get_sub_data(args->dataA, 2, u12a->i, u12a->i); 
			
 
				 			task12->buffers[0].mode = STARPU_R;
			
 
				 			task12->buffers[1].handle = 
			
 
				-				starpu_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				+				starpu_data_get_sub_data(args->dataA, 2, u12a->k, u12a->i); 
			
 
				 			task12->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 			task21->buffers[0].handle = 
			
 
				-				starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				+				starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->i);
			
 
				 			task21->buffers[0].mode = STARPU_R;
			
 
				 			task21->buffers[1].handle = 
			
 
				-				starpu_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				+				starpu_data_get_sub_data(args->dataA, 2, u21a->i, u21a->k);
			
 
				 			task21->buffers[1].mode = STARPU_RW;
			
 
				 		
			
 
				 			starpu_task_submit(task12);
			
@@ -517,7 +517,7 @@ void dw_callback_codelet_update_u22(void *argcb)
 
				 			task->cl = &cl11;
			
 
				 			task->cl_arg = u11arg;
			
 
				 
			
 
				-			task->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1);
			
 
				+			task->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1);
			
 
				 			task->buffers[0].mode = STARPU_RW;
			
 
				 	
			
 
				 		u11arg->dataA = args->dataA;
			
@@ -566,13 +566,13 @@ void dw_callback_codelet_update_u12_21(void *argcb)
 
				 				u22a->nblocks = nblocks;
			
 
				 				u22a->remaining = remaining;
			
 
				 
			
 
				-				task22->buffers[0].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				+				task22->buffers[0].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->k);
			
 
				 				task22->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[1].handle = starpu_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				+				task22->buffers[1].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->k, u22a->j);
			
 
				 				task22->buffers[1].mode = STARPU_R;
			
 
				 
			
 
				-				task22->buffers[2].handle = starpu_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				+				task22->buffers[2].handle = starpu_data_get_sub_data(args->dataA, 2, u22a->i, u22a->j);
			
 
				 				task22->buffers[2].mode = STARPU_RW;
			
 
				 				
			
 
				 				/* schedule that codelet */
			
@@ -605,7 +605,7 @@ void dw_codelet_facto(starpu_data_handle dataA, unsigned nblocks)
 
				 		task->cl = &cl11;
			
 
				 		task->cl_arg = args;
			
 
				 
			
 
				-		task->buffers[0].handle = starpu_get_sub_data(dataA, 2, 0, 0);
			
 
				+		task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, 0, 0);
			
 
				 		task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* schedule the codelet */
			
@@ -625,7 +625,7 @@ void dw_codelet_facto(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 }
			
@@ -657,7 +657,7 @@ void dw_codelet_facto_v2(starpu_data_handle dataA, unsigned nblocks)
 
				 		task->cl = &cl11;
			
 
				 		task->cl_arg = args;
			
 
				 
			
 
				-		task->buffers[0].handle = starpu_get_sub_data(dataA, 2, 0, 0); 
			
 
				+		task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, 0, 0); 
			
 
				 		task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* schedule the codelet */
			
@@ -682,7 +682,7 @@ void dw_codelet_facto_v2(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 }
			
@@ -693,12 +693,12 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 
				 
			
 
				 	_starpu_timing_init();
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	if (pinned)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				-		starpu_malloc_pinned_if_possible((void **)B, (size_t)dim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)B, (size_t)dim*sizeof(float));
			
 
				 	} 
			
 
				 	else {
			
 
				 		*A = malloc((size_t)dim*dim*sizeof(float));
			
@@ -727,7 +727,7 @@ void dw_factoLU(float *matA, unsigned size,
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, 
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
			
 
				 			size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_filter f;
			
@@ -751,9 +751,9 @@ void dw_factoLU(float *matA, unsigned size,
 
				 	}
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 
			
 
				-	starpu_delete_data(dataA);
			
 
				+	starpu_data_unregister(dataA);
			
 
				 
			
 
				 #ifdef CHECK_RESULTS
			
 
				 	compare_A_LU(Asaved, matA, size, ld);
			
--- a/examples/heat/dw_factolu_grain.c
+++ b/examples/heat/dw_factolu_grain.c
@@ -59,7 +59,7 @@ static struct starpu_task *create_task_11(starpu_data_handle dataA, unsigned k,
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -92,9 +92,9 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl12;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, i, k); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (i == k+1) {
			
@@ -129,9 +129,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j, uns
 
				 	task->cl = &cl21;
			
 
				 	
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (j == k+1) {
			
@@ -168,11 +168,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, i, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, i, j); 
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if ( (i == k + 1) && (j == k +1) ) {
			
@@ -197,7 +197,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 	 * (re)partition data
			
 
				 	 */
			
 
				 	starpu_data_handle dataA;
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	STARPU_ASSERT((size % blocksize) == 0);
			
 
				 	STARPU_ASSERT((inner_size % blocksize) == 0);
			
@@ -265,7 +265,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 	{
			
 
				 		/* we wait for the last task and we are done */
			
 
				 		starpu_tag_wait(TAG11(nblocks-1, tag_prefix));
			
 
				-		starpu_unpartition_data(dataA, 0);		
			
 
				+		starpu_data_unpartition(dataA, 0);		
			
 
				 		return;
			
 
				 	}
			
 
				 	else {
			
@@ -288,8 +288,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 
			
 
				 		free(tag_array);
			
 
				 
			
 
				-		starpu_unpartition_data(dataA, 0);
			
 
				-		starpu_delete_data(dataA);
			
 
				+		starpu_data_unpartition(dataA, 0);
			
 
				+		starpu_data_unregister(dataA);
			
 
				 
			
 
				 		float *newmatA = &matA[inner_size*(ld+1)];
			
 
				 
			
--- a/examples/heat/dw_factolu_kernels.c
+++ b/examples/heat/dw_factolu_kernels.c
@@ -30,7 +30,7 @@ unsigned count_22_total = 0;
 
				 
			
 
				 void display_stat_heat(void)
			
 
				 {
			
 
				-	unsigned nworkers = starpu_get_worker_count();
			
 
				+	unsigned nworkers = starpu_worker_get_count();
			
 
				 
			
 
				 	fprintf(stderr, "STATS : \n");
			
 
				 
			
@@ -54,7 +54,7 @@ void display_stat_heat(void)
 
				 		if (count_total_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 			
			
 
				 			fprintf(stderr, "\t\t%s -> %d / %d (%2.2f %%)\n", name, count_11_per_worker[worker], count_11_total, (100.0*count_11_per_worker[worker])/count_11_total);
			
 
				 		}
			
@@ -66,7 +66,7 @@ void display_stat_heat(void)
 
				 		if (count_total_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 			
			
 
				 			fprintf(stderr, "\t\t%s -> %d / %d (%2.2f %%)\n", name, count_12_per_worker[worker], count_12_total, (100.0*count_12_per_worker[worker])/count_12_total);
			
 
				 		}
			
@@ -79,7 +79,7 @@ void display_stat_heat(void)
 
				 		if (count_total_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 			
			
 
				 			fprintf(stderr, "\t\t%s -> %d / %d (%2.2f %%)\n", name, count_21_per_worker[worker], count_21_total, (100.0*count_21_per_worker[worker])/count_21_total);
			
 
				 		}
			
@@ -91,7 +91,7 @@ void display_stat_heat(void)
 
				 		if (count_total_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 			
			
 
				 			fprintf(stderr, "\t\t%s -> %d / %d (%2.2f %%)\n", name, count_22_per_worker[worker], count_22_total, (100.0*count_22_per_worker[worker])/count_22_total);
			
 
				 		}
			
@@ -149,7 +149,7 @@ void dw_cpu_codelet_update_u22(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_cpu_codelet_update_u22(descr, 0, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_22_per_worker[id]++;
			
 
				 }
			
 
				 
			
@@ -158,7 +158,7 @@ void dw_cublas_codelet_update_u22(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_cpu_codelet_update_u22(descr, 1, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_22_per_worker[id]++;
			
 
				 }
			
 
				 #endif// STARPU_USE_CUDA
			
@@ -212,7 +212,7 @@ void dw_cpu_codelet_update_u12(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_codelet_update_u12(descr, 0, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_12_per_worker[id]++;
			
 
				 }
			
 
				 
			
@@ -221,7 +221,7 @@ void dw_cublas_codelet_update_u12(void *descr[], void *_args)
 
				 {
			
 
				 	 dw_common_codelet_update_u12(descr, 1, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_12_per_worker[id]++;
			
 
				 }
			
 
				 #endif // STARPU_USE_CUDA
			
@@ -272,7 +272,7 @@ void dw_cpu_codelet_update_u21(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_codelet_update_u21(descr, 0, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_21_per_worker[id]++;
			
 
				 }
			
 
				 
			
@@ -281,7 +281,7 @@ void dw_cublas_codelet_update_u21(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_codelet_update_u21(descr, 1, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_21_per_worker[id]++;
			
 
				 }
			
 
				 #endif 
			
@@ -367,7 +367,7 @@ void dw_cpu_codelet_update_u11(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_codelet_update_u11(descr, 0, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_11_per_worker[id]++;
			
 
				 }
			
 
				 
			
@@ -376,7 +376,7 @@ void dw_cublas_codelet_update_u11(void *descr[], void *_args)
 
				 {
			
 
				 	dw_common_codelet_update_u11(descr, 1, _args);
			
 
				 
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	count_11_per_worker[id]++;
			
 
				 }
			
 
				 #endif// STARPU_USE_CUDA
			
--- a/examples/heat/dw_factolu_tag.c
+++ b/examples/heat/dw_factolu_tag.c
@@ -61,7 +61,7 @@ static struct starpu_task *create_task_11(starpu_data_handle dataA, unsigned k)
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -95,9 +95,9 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned i)
 
				 	task->cl = &cl12;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, i, k); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (i == k+1)) {
			
@@ -132,9 +132,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j)
 
				 	task->cl = &cl21;
			
 
				 	
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (j == k+1)) {
			
@@ -171,11 +171,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, i, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, j); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, i, j); 
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
			
@@ -254,7 +254,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 }
			
@@ -276,7 +276,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_filter f;
			
 
				 		f.filter_func = starpu_vertical_block_filter_func;
			
@@ -291,7 +291,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
				 	dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 
			
 
				 #ifdef CHECK_RESULTS
			
 
				 	compare_A_LU(Asaved, matA, size, ld);
			
--- a/examples/heat/dw_sparse_cg.c
+++ b/examples/heat/dw_sparse_cg.c
@@ -333,10 +333,10 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 
				 	starpu_data_handle ds_vecr, ds_vecd, ds_vecq; 
			
 
				 
			
 
				 	/* first the user-allocated data */
			
 
				-	starpu_register_csr_data(&ds_matrixA, 0, nnz, nrow, 
			
 
				+	starpu_csr_data_register(&ds_matrixA, 0, nnz, nrow, 
			
 
				 			(uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float));
			
 
				-	starpu_register_vector_data(&ds_vecx, 0, (uintptr_t)vecx, nrow, sizeof(float));
			
 
				-	starpu_register_vector_data(&ds_vecb, 0, (uintptr_t)vecb, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecx, 0, (uintptr_t)vecx, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecb, 0, (uintptr_t)vecb, nrow, sizeof(float));
			
 
				 
			
 
				 	/* then allocate the algorithm intern data */
			
 
				 	float *ptr_vecr, *ptr_vecd, *ptr_vecq;
			
@@ -356,9 +356,9 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 
				 	printf("nrow = %d \n", nrow);
			
 
				 
			
 
				 	/* and declare them as well */
			
 
				-	starpu_register_vector_data(&ds_vecr, 0, (uintptr_t)ptr_vecr, nrow, sizeof(float));
			
 
				-	starpu_register_vector_data(&ds_vecd, 0, (uintptr_t)ptr_vecd, nrow, sizeof(float));
			
 
				-	starpu_register_vector_data(&ds_vecq, 0, (uintptr_t)ptr_vecq, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecr, 0, (uintptr_t)ptr_vecr, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecd, 0, (uintptr_t)ptr_vecd, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecq, 0, (uintptr_t)ptr_vecq, nrow, sizeof(float));
			
 
				 
			
 
				 	/* we now have the complete problem */
			
 
				 	struct cg_problem problem;
			
@@ -395,7 +395,7 @@ void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	conjugate_gradient(nzvalA, vecb, vecx, nnz, nrow, colind, rowptr);
			
 
				 }
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -752,7 +752,7 @@ int main(int argc, char **argv)
 
				 		if (check)
			
 
				 			solve_system(DIM, newsize, result, RefArray, Bformer, A, B);
			
 
				 
			
 
				-		starpu_helper_init_cublas();
			
 
				+		starpu_helper_cublas_init();
			
 
				 
			
 
				 		starpu_shutdown();
			
 
				 	}
			
--- a/examples/heat/lu_kernels_model.c
+++ b/examples/heat/lu_kernels_model.c
@@ -44,7 +44,7 @@ double task_11_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/537.5);
			
 
				 
			
@@ -55,7 +55,7 @@ double task_12_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 //	double cost = ((n*n*n)/1744.695);
			
 
				 	double cost = ((n*n*n)/3210.80);
			
@@ -69,7 +69,7 @@ double task_21_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 //	double cost = ((n*n*n)/1744.695);
			
 
				 	double cost = ((n*n*n)/3691.53);
			
@@ -84,9 +84,9 @@ double task_22_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_get_matrix_nx(descr[2].handle);
			
 
				-	ny = starpu_get_matrix_ny(descr[2].handle);
			
 
				-	nz = starpu_get_matrix_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/4110.0);
			
 
				 
			
@@ -104,7 +104,7 @@ double task_11_cost_cuda(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/1853.7806);
			
 
				 
			
@@ -116,7 +116,7 @@ double task_12_cost_cuda(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/42838.5718);
			
 
				 
			
@@ -129,7 +129,7 @@ double task_21_cost_cuda(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/49208.667);
			
 
				 
			
@@ -143,9 +143,9 @@ double task_22_cost_cuda(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_get_matrix_nx(descr[2].handle);
			
 
				-	ny = starpu_get_matrix_ny(descr[2].handle);
			
 
				-	nz = starpu_get_matrix_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/57523.560);
			
 
				 
			
@@ -163,7 +163,7 @@ double task_11_cost_cpu(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/537.5);
			
 
				 
			
@@ -175,7 +175,7 @@ double task_12_cost_cpu(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/6668.224);
			
 
				 
			
@@ -188,7 +188,7 @@ double task_21_cost_cpu(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/6793.8423);
			
 
				 
			
@@ -202,9 +202,9 @@ double task_22_cost_cpu(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_get_matrix_nx(descr[2].handle);
			
 
				-	ny = starpu_get_matrix_ny(descr[2].handle);
			
 
				-	nz = starpu_get_matrix_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/4203.0175);
			
 
				 
			
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
 
				 	float float_array[4] __attribute__ ((aligned (16))) = { 0.0f, 0.0f, 0.0f, 0.0f};
			
 
				 
			
 
				 	starpu_data_handle float_array_handle;
			
 
				-	starpu_register_vector_data(&float_array_handle, 0 /* home node */,
			
 
				+	starpu_vector_data_register(&float_array_handle, 0 /* home node */,
			
 
				 			(uintptr_t)&float_array, 4, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
@@ -92,10 +92,10 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* update the array in RAM */
			
 
				-	starpu_sync_data_with_mem(float_array_handle, STARPU_R);
			
 
				+	starpu_data_sync_with_mem(float_array_handle, STARPU_R);
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -107,7 +107,7 @@ int main(int argc, char **argv)
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	starpu_release_data_from_mem(float_array_handle);
			
 
				+	starpu_data_release_from_mem(float_array_handle);
			
 
				 
			
 
				 	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 +
			
 
				 					(end.tv_usec - start.tv_usec));
			
--- a/examples/incrementer/incrementer_kernels_opencl.c
+++ b/examples/incrementer/incrementer_kernels_opencl.c
@@ -25,8 +25,8 @@ void opencl_codelet(void *descr[], void *_args)
 
				 	cl_command_queue queue;
			
 
				 	int id, devid, err;
			
 
				 
			
 
				-        id = starpu_get_worker_id();
			
 
				-        devid = starpu_get_worker_devid(id);
			
 
				+        id = starpu_worker_get_id();
			
 
				+        devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				 	err = starpu_opencl_load_kernel(&kernel, &queue,
			
 
				                                         "examples/incrementer/incrementer_kernels_opencl_codelet.cl", "incrementer", devid);
			
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -113,7 +113,7 @@ void copy_matrix_into_blocks(void)
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)&A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE));
			
 
				 
			
 
				 		for (j = 0; j < blocksize; j++)
			
 
				 		for (i = 0; i < blocksize; i++)
			
@@ -127,7 +127,7 @@ void copy_matrix_into_blocks(void)
 
				 static void init_matrix(void)
			
 
				 {
			
 
				 	/* allocate matrix */
			
 
				-	starpu_malloc_pinned_if_possible((void **)&A, (size_t)size*size*sizeof(TYPE));
			
 
				+	starpu_data_malloc_pinned_if_possible((void **)&A, (size_t)size*size*sizeof(TYPE));
			
 
				 	STARPU_ASSERT(A);
			
 
				 
			
 
				 	starpu_srand48((long int)time(NULL));
			
@@ -246,7 +246,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	init_matrix();
			
 
				 
			
@@ -303,7 +303,7 @@ int main(int argc, char **argv)
 
				 		check_result();
			
 
				 	}
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/lu/xlu.c
+++ b/examples/lu/xlu.c
@@ -55,7 +55,7 @@ static struct starpu_task *create_task_11(starpu_data_handle dataA, unsigned k)
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -79,9 +79,9 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned j)
 
				 	task->cl = &cl12;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, j, k); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (j == k+1)) {
			
@@ -106,9 +106,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned i)
 
				 	task->cl = &cl21;
			
 
				 	
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, i); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (i == k+1)) {
			
@@ -135,11 +135,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, i); /* produced by TAG21(k, i) */ 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, i); /* produced by TAG21(k, i) */ 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, j, k); /* produced by TAG12(k, j) */
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); /* produced by TAG12(k, j) */
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
			
@@ -218,7 +218,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 }
			
@@ -229,7 +229,7 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
@@ -247,5 +247,5 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 	dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 }
			
--- a/examples/lu/xlu_implicit.c
+++ b/examples/lu/xlu_implicit.c
@@ -25,7 +25,7 @@ static void create_task_11(starpu_data_handle dataA, unsigned k)
 
				 	task->cl = &cl11;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k);
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 
			
 
				 	/* this is an important task */
			
@@ -41,9 +41,9 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned j)
 
				 	task->cl = &cl12;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, j, k); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (j == k+1))
			
@@ -59,9 +59,9 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned i)
 
				 	task->cl = &cl21;
			
 
				 	
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, k); 
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, k); 
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, k, i); 
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio && (i == k+1))
			
@@ -77,11 +77,11 @@ static void create_task_22(starpu_data_handle dataA, unsigned k, unsigned i, uns
 
				 	task->cl = &cl22;
			
 
				 
			
 
				 	/* which sub-data is manipulated ? */
			
 
				-	task->buffers[0].handle = starpu_get_sub_data(dataA, 2, k, i);
			
 
				+	task->buffers[0].handle = starpu_data_get_sub_data(dataA, 2, k, i);
			
 
				 	task->buffers[0].mode = STARPU_R;
			
 
				-	task->buffers[1].handle = starpu_get_sub_data(dataA, 2, j, k);
			
 
				+	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k);
			
 
				 	task->buffers[1].mode = STARPU_R;
			
 
				-	task->buffers[2].handle = starpu_get_sub_data(dataA, 2, j, i);
			
 
				+	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i);
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 	if (!no_prio &&  (i == k + 1) && (j == k +1) )
			
@@ -120,7 +120,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 
				 	}
			
 
				 
			
 
				 	/* stall the application until the end of computations */
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -128,7 +128,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 }
			
@@ -139,7 +139,7 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	starpu_filter f;
			
 
				 		f.filter_func = starpu_vertical_block_filter_func;
			
@@ -154,5 +154,5 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 	dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 }
			
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -165,7 +165,7 @@ static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
 
				 	}
			
 
				 
			
 
				 	/* stall the application until the end of computations */
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -177,7 +177,7 @@ starpu_data_handle get_block_with_striding(starpu_data_handle *dataAp,
 
				 			unsigned nblocks __attribute__((unused)), unsigned j, unsigned i)
			
 
				 {
			
 
				 	/* we use filters */
			
 
				-	return starpu_get_sub_data(*dataAp, 2, j, i);
			
 
				+	return starpu_data_get_sub_data(*dataAp, 2, j, i);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -187,7 +187,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	starpu_filter f;
			
 
				 		f.filter_func = starpu_vertical_block_filter_func;
			
@@ -218,12 +218,12 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	fprintf(stderr, "%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -243,7 +243,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_register_matrix_data(&dataAp[bi+nblocks*bj], 0,
			
 
				+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
			
 
				 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
			
 
				 			size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 	}
			
@@ -267,13 +267,13 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	fprintf(stderr, "%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataAp[0])*nblocks;
			
 
				+	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_delete_data(dataAp[bi+nblocks*bj]);
			
 
				+		starpu_data_unregister(dataAp[bi+nblocks*bj]);
			
 
				 	}
			
 
				 }
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -286,7 +286,7 @@ static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
 
				 
			
 
				 	/* stall the application until the end of computations */
			
 
				 	starpu_tag_wait_array(ndeps, tags);
			
 
				-//	starpu_wait_all_tasks();
			
 
				+//	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -298,7 +298,7 @@ starpu_data_handle get_block_with_striding(starpu_data_handle *dataAp,
 
				 			unsigned nblocks __attribute__((unused)), unsigned j, unsigned i)
			
 
				 {
			
 
				 	/* we use filters */
			
 
				-	return starpu_get_sub_data(*dataAp, 2, j, i);
			
 
				+	return starpu_data_get_sub_data(*dataAp, 2, j, i);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -308,7 +308,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
@@ -351,12 +351,12 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	fprintf(stderr, "%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataA);
			
 
				+	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_unpartition_data(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, 0);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -376,7 +376,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_register_matrix_data(&dataAp[bi+nblocks*bj], 0,
			
 
				+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
			
 
				 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
			
 
				 			size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 
			
@@ -403,13 +403,13 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 
				 	fprintf(stderr, "Computation took (in ms)\n");
			
 
				 	fprintf(stderr, "%2.2f\n", timing/1000);
			
 
				 
			
 
				-	unsigned n = starpu_get_matrix_nx(dataAp[0])*nblocks;
			
 
				+	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
 
				 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_delete_data(dataAp[bi+nblocks*bj]);
			
 
				+		starpu_data_unregister(dataAp[bi+nblocks*bj]);
			
 
				 	}
			
 
				 }
			
--- a/examples/matvecmult/matvecmult.c
+++ b/examples/matvecmult/matvecmult.c
@@ -34,8 +34,8 @@ void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 
				 	float *vector = (float *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 	float *mult = (float *)STARPU_GET_VECTOR_PTR(descr[2]);
			
 
				 
			
 
				-        id = starpu_get_worker_id();
			
 
				-        devid = starpu_get_worker_devid(id);
			
 
				+        id = starpu_worker_get_id();
			
 
				+        devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				         err = starpu_opencl_load_kernel(&kernel, &queue,
			
 
				                                         "examples/matvecmult/matvecmult_kernel.cl", "matVecMult", devid);
			
@@ -141,9 +141,9 @@ int main(int argc, char **argv)
 
				         fillArray(mult, height);
			
 
				         matVecMult(matrix, vector, width, height, correctResult);
			
 
				 
			
 
				-	starpu_register_matrix_data(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				-	starpu_register_vector_data(&vector_handle, 0, (uintptr_t)vector, width, sizeof(float));
			
 
				-	starpu_register_vector_data(&mult_handle, 0, (uintptr_t)mult, height, sizeof(float));
			
 
				+	starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, width, sizeof(float));
			
 
				+	starpu_vector_data_register(&mult_handle, 0, (uintptr_t)mult, height, sizeof(float));
			
 
				 
			
 
				         _starpu_opencl_compile_source_to_opencl("examples/matvecmult/matvecmult_kernel.cl");
			
 
				 
			
@@ -168,12 +168,12 @@ int main(int argc, char **argv)
 
				                 exit(0);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* update the array in RAM */
			
 
				-        starpu_sync_data_with_mem(matrix_handle, STARPU_R);
			
 
				-        starpu_sync_data_with_mem(vector_handle, STARPU_R);
			
 
				-        starpu_sync_data_with_mem(mult_handle, STARPU_R);
			
 
				+        starpu_data_sync_with_mem(matrix_handle, STARPU_R);
			
 
				+        starpu_data_sync_with_mem(vector_handle, STARPU_R);
			
 
				+        starpu_data_sync_with_mem(mult_handle, STARPU_R);
			
 
				 
			
 
				         int res = compareL2fe(correctResult, mult, height, 1e-6f);
			
 
				         printf("TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!");
			
@@ -182,9 +182,9 @@ int main(int argc, char **argv)
 
				         printArray(vector, width);
			
 
				         printArray(mult, height);
			
 
				 #endif
			
 
				-        starpu_release_data_from_mem(matrix_handle);
			
 
				-        starpu_release_data_from_mem(vector_handle);
			
 
				-        starpu_release_data_from_mem(mult_handle);
			
 
				+        starpu_data_release_from_mem(matrix_handle);
			
 
				+        starpu_data_release_from_mem(vector_handle);
			
 
				+        starpu_data_release_from_mem(mult_handle);
			
 
				 
			
 
				         starpu_shutdown();
			
 
				 #endif
			
--- a/examples/mult/dw_mult.c
+++ b/examples/mult/dw_mult.c
@@ -48,9 +48,9 @@ void terminate(void)
 
				 {
			
 
				 
			
 
				 	fprintf(stderr, "unpartition !!\n");
			
 
				-	starpu_unpartition_data(C_handle, 0);
			
 
				+	starpu_data_unpartition(C_handle, 0);
			
 
				 
			
 
				-	starpu_delete_data(C_handle);
			
 
				+	starpu_data_unregister(C_handle);
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -80,7 +80,7 @@ void terminate(void)
 
				 void callback_func(void *arg)
			
 
				 {
			
 
				 	/* do some accounting */
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	flop_per_worker[id] += BLAS3_FLOP(conf.m, conf.n, conf.k);
			
 
				 	ls_per_worker[id] += BLAS3_LS(conf.m, conf.n, conf.k);
			
 
				 }
			
@@ -91,9 +91,9 @@ static void init_problem_data(void)
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	if (pin) {
			
 
				-		starpu_malloc_pinned_if_possible((void **)&A, zdim*ydim*sizeof(float));
			
 
				-		starpu_malloc_pinned_if_possible((void **)&B, xdim*zdim*sizeof(float));
			
 
				-		starpu_malloc_pinned_if_possible((void **)&C, xdim*ydim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&A, zdim*ydim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&B, xdim*zdim*sizeof(float));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&C, xdim*ydim*sizeof(float));
			
 
				 	} else
			
 
				 #endif
			
 
				 	{
			
@@ -153,11 +153,11 @@ static void partition_mult_data(void)
 
				 {
			
 
				 	gettimeofday(&start, NULL);
			
 
				 
			
 
				-	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
			
 
				+	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
			
 
				 		ydim, ydim, zdim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
			
 
				+	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
			
 
				 		zdim, zdim, xdim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
			
 
				+	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
			
 
				 		ydim, ydim, xdim, sizeof(float));
			
 
				 
			
 
				 	starpu_data_set_wb_mask(C_handle, 1<<0);
			
@@ -174,8 +174,8 @@ static void partition_mult_data(void)
 
				 	f2.filter_func = starpu_block_filter_func;
			
 
				 	f2.filter_arg = nslicesy;
			
 
				 		
			
 
				-	starpu_partition_data(B_handle, &f);
			
 
				-	starpu_partition_data(A_handle, &f2);
			
 
				+	starpu_data_partition(B_handle, &f);
			
 
				+	starpu_data_partition(A_handle, &f2);
			
 
				 
			
 
				 	starpu_map_filters(C_handle, 2, &f, &f2);
			
 
				 }
			
@@ -234,12 +234,12 @@ static void launch_codelets(void)
 
				 			task->use_tag = 1;
			
 
				 			task->tag_id = tag;
			
 
				 
			
 
				-			task->buffers[0].handle = starpu_get_sub_data(A_handle, 1, tasky);
			
 
				+			task->buffers[0].handle = starpu_data_get_sub_data(A_handle, 1, tasky);
			
 
				 			task->buffers[0].mode = STARPU_R;
			
 
				-			task->buffers[1].handle = starpu_get_sub_data(B_handle, 1, taskx);
			
 
				+			task->buffers[1].handle = starpu_data_get_sub_data(B_handle, 1, taskx);
			
 
				 			task->buffers[1].mode = STARPU_R;
			
 
				 			task->buffers[2].handle = 
			
 
				-				starpu_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				+				starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				 			task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 			starpu_task_submit(task);
			
@@ -255,7 +255,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	init_problem_data();
			
 
				 
			
@@ -263,11 +263,11 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	launch_codelets();
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	terminate();
			
 
				 	
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return 0;
			
--- a/examples/mult/dw_mult.h
+++ b/examples/mult/dw_mult.h
@@ -89,7 +89,7 @@ static struct block_conf conf __attribute__ ((aligned (128)));
 
				 static void display_stats(double timing)
			
 
				 {
			
 
				 	unsigned worker;
			
 
				-	unsigned nworkers = starpu_get_worker_count();
			
 
				+	unsigned nworkers = starpu_worker_get_count();
			
 
				 
			
 
				 	fprintf(stderr, "Computation took (ms):\n");
			
 
				 	printf("%2.2f\n", timing/1000);
			
@@ -102,7 +102,7 @@ static void display_stats(double timing)
 
				 		ls_total += ls_per_worker[worker];
			
 
				 
			
 
				 		char name[32];
			
 
				-		starpu_get_worker_name(worker, name, 32);
			
 
				+		starpu_worker_get_name(worker, name, 32);
			
 
				 
			
 
				 		fprintf(stderr, "\t%s -> %2.2f GFlop\t%2.2f GFlop/s\n", name, (double)flop_per_worker[worker]/1000000000.0f, (double)flop_per_worker[worker]/(double)timing/1000);
			
 
				 	}
			
--- a/examples/mult/dw_mult_no_stride.c
+++ b/examples/mult/dw_mult_no_stride.c
@@ -155,7 +155,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (z = 0; z < nslicesz; z++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
			
 
				+			starpu_matrix_data_register(&A_state[y][z], 0, (uintptr_t)A[y][z], 
			
 
				 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -164,7 +164,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (x = 0; x < nslicesx; x++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
			
 
				+			starpu_matrix_data_register(&B_state[z][x], 0, (uintptr_t)B[z][x], 
			
 
				 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -173,7 +173,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (x = 0; x < nslicesx; x++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
			
 
				+			starpu_matrix_data_register(&C_state[y][x], 0, (uintptr_t)C[y][x], 
			
 
				 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -298,7 +298,7 @@ static void callback_func_2(void *arg)
 
				 	free(cb2);
			
 
				 
			
 
				 	/* do some accounting */
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	flop_per_worker[id] += BLAS3_FLOP(BLOCKSIZEX, BLOCKSIZEY, BLOCKSIZEZ);
			
 
				 	ls_per_worker[id] += BLAS3_LS(BLOCKSIZEX, BLOCKSIZEY, BLOCKSIZEZ);
			
 
				 
			
@@ -373,7 +373,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				 	load_elf_sgemm();
			
@@ -383,7 +383,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	launch_codelets();
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -393,7 +393,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	cleanup_problem();
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return 0;
			
--- a/examples/mult/dw_mult_no_stride_no_tag.c
+++ b/examples/mult/dw_mult_no_stride_no_tag.c
@@ -171,7 +171,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (z = 0; z < nslicesz; z++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
			
 
				+			starpu_matrix_data_register(&A_state[y][z], 0, (uintptr_t)A[y][z], 
			
 
				 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -180,7 +180,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (x = 0; x < nslicesx; x++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
			
 
				+			starpu_matrix_data_register(&B_state[z][x], 0, (uintptr_t)B[z][x], 
			
 
				 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -189,7 +189,7 @@ static void init_problem_data(void)
 
				 	{
			
 
				 		for (x = 0; x < nslicesx; x++)
			
 
				 		{
			
 
				-			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
			
 
				+			starpu_matrix_data_register(&C_state[y][x], 0, (uintptr_t)C[y][x], 
			
 
				 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
			
 
				 		}
			
 
				 	}
			
@@ -344,7 +344,7 @@ static void construct_task(unsigned x, unsigned y, unsigned z, unsigned iter, st
 
				 static void callback_func_3(void *arg)
			
 
				 {
			
 
				 	/* do some accounting */
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	flop_per_worker[id] += BLAS3_FLOP(BLOCKSIZEX, BLOCKSIZEY, BLOCKSIZEZ);
			
 
				 	ls_per_worker[id] += BLAS3_LS(BLOCKSIZEX, BLOCKSIZEY, BLOCKSIZEZ);
			
 
				 
			
@@ -401,7 +401,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				 	load_elf_sgemm();
			
@@ -411,7 +411,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	launch_codelets();
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
@@ -421,7 +421,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	cleanup_problem();
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return 0;
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -65,7 +65,7 @@ static void check_output(void)
 
				 void callback_func(void *arg)
			
 
				 {
			
 
				 	/* do some accounting */
			
 
				-	int id = starpu_get_worker_id();
			
 
				+	int id = starpu_worker_get_id();
			
 
				 	flop_per_worker[id] += BLAS3_FLOP(conf.m, conf.n, conf.k);
			
 
				 	ls_per_worker[id] += BLAS3_LS(conf.m, conf.n, conf.k);
			
 
				 }
			
@@ -76,9 +76,9 @@ static void init_problem_data(void)
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	if (pin) {
			
 
				-		starpu_malloc_pinned_if_possible((void **)&A, zdim*ydim*sizeof(TYPE));
			
 
				-		starpu_malloc_pinned_if_possible((void **)&B, xdim*zdim*sizeof(TYPE));
			
 
				-		starpu_malloc_pinned_if_possible((void **)&C, xdim*ydim*sizeof(TYPE));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&A, zdim*ydim*sizeof(TYPE));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&B, xdim*zdim*sizeof(TYPE));
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&C, xdim*ydim*sizeof(TYPE));
			
 
				 	} else
			
 
				 #endif
			
 
				 	{
			
@@ -137,11 +137,11 @@ static void init_problem_data(void)
 
				 
			
 
				 static void partition_mult_data(void)
			
 
				 {
			
 
				-	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
			
 
				+	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
			
 
				 		ydim, ydim, zdim, sizeof(TYPE));
			
 
				-	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
			
 
				+	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
			
 
				 		zdim, zdim, xdim, sizeof(TYPE));
			
 
				-	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
			
 
				+	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
			
 
				 		ydim, ydim, xdim, sizeof(TYPE));
			
 
				 
			
 
				 	starpu_data_set_wb_mask(C_handle, 1<<0);
			
@@ -158,17 +158,17 @@ static void partition_mult_data(void)
 
				 	f2.filter_func = starpu_block_filter_func;
			
 
				 	f2.filter_arg = nslicesy;
			
 
				 		
			
 
				-	starpu_partition_data(B_handle, &f);
			
 
				-	starpu_partition_data(A_handle, &f2);
			
 
				+	starpu_data_partition(B_handle, &f);
			
 
				+	starpu_data_partition(A_handle, &f2);
			
 
				 
			
 
				 	starpu_map_filters(C_handle, 2, &f, &f2);
			
 
				 }
			
 
				 
			
 
				 static void unpartition_mult_data(void)
			
 
				 {
			
 
				-	starpu_unpartition_data(C_handle, 0);
			
 
				+	starpu_data_unpartition(C_handle, 0);
			
 
				 
			
 
				-	starpu_delete_data(C_handle);
			
 
				+	starpu_data_unregister(C_handle);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_perfmodel_t gemm_model = {
			
@@ -212,11 +212,11 @@ static void launch_codelets(void)
 
				 			task->callback_func = callback_func;
			
 
				 			task->callback_arg = NULL;
			
 
				 
			
 
				-			task->buffers[0].handle = starpu_get_sub_data(A_handle, 1, tasky);
			
 
				+			task->buffers[0].handle = starpu_data_get_sub_data(A_handle, 1, tasky);
			
 
				 			task->buffers[0].mode = STARPU_R;
			
 
				-			task->buffers[1].handle = starpu_get_sub_data(B_handle, 1, taskx);
			
 
				+			task->buffers[1].handle = starpu_data_get_sub_data(B_handle, 1, taskx);
			
 
				 			task->buffers[1].mode = STARPU_R;
			
 
				-			task->buffers[2].handle = starpu_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				+			task->buffers[2].handle = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
			
 
				 			task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 			starpu_task_submit(task);
			
@@ -232,7 +232,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 
			
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	init_problem_data();
			
 
				 
			
@@ -241,7 +241,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	partition_mult_data();
			
 
				 
			
 
				 	launch_codelets();
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 +
			
@@ -253,7 +253,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	if (check)
			
 
				 		check_output();
			
 
				 	
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return 0;
			
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -82,13 +82,13 @@ int main(int argc, char **argv)
 
				 
			
 
				 	/* Any worker may use that array now */
			
 
				 	starpu_data_handle sobol_qrng_direction_handle;
			
 
				-	starpu_register_vector_data(&sobol_qrng_direction_handle, 0,
			
 
				+	starpu_vector_data_register(&sobol_qrng_direction_handle, 0,
			
 
				 		(uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned));
			
 
				 
			
 
				 	unsigned *cnt_array = malloc(ntasks*sizeof(unsigned));
			
 
				 	STARPU_ASSERT(cnt_array);
			
 
				 	starpu_data_handle cnt_array_handle;
			
 
				-	starpu_register_vector_data(&cnt_array_handle, 0, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
			
 
				+	starpu_vector_data_register(&cnt_array_handle, 0, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
			
 
				 
			
 
				 	/* Use a write-back policy : when the data is modified on an
			
 
				 	 * accelerator, we know that it will only be modified once and be
			
@@ -100,7 +100,7 @@ int main(int argc, char **argv)
 
				 		.filter_arg = ntasks
			
 
				 	};
			
 
				 	
			
 
				-	starpu_partition_data(cnt_array_handle, &f);
			
 
				+	starpu_data_partition(cnt_array_handle, &f);
			
 
				 
			
 
				 	static struct starpu_perfmodel_t model = {
			
 
				 		.type = STARPU_HISTORY_BASED,
			
@@ -128,22 +128,22 @@ int main(int argc, char **argv)
 
				 
			
 
				 		task->cl = &cl;
			
 
				 
			
 
				-		STARPU_ASSERT(starpu_get_sub_data(cnt_array_handle, 1, i));
			
 
				+		STARPU_ASSERT(starpu_data_get_sub_data(cnt_array_handle, 1, i));
			
 
				 
			
 
				 		task->buffers[0].handle = sobol_qrng_direction_handle;
			
 
				 		task->buffers[0].mode   = STARPU_R;
			
 
				-		task->buffers[1].handle = starpu_get_sub_data(cnt_array_handle, 1, i);
			
 
				+		task->buffers[1].handle = starpu_data_get_sub_data(cnt_array_handle, 1, i);
			
 
				 		task->buffers[1].mode   = STARPU_W;
			
 
				 
			
 
				 		int ret = starpu_task_submit(task);
			
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* Get the cnt_array back in main memory */
			
 
				-	starpu_unpartition_data(cnt_array_handle, 0);
			
 
				-	starpu_sync_data_with_mem(cnt_array_handle, STARPU_RW);
			
 
				+	starpu_data_unpartition(cnt_array_handle, 0);
			
 
				+	starpu_data_sync_with_mem(cnt_array_handle, STARPU_RW);
			
 
				 
			
 
				 	/* Count the total number of entries */
			
 
				 	unsigned long total_cnt = 0;
			
@@ -161,7 +161,7 @@ int main(int argc, char **argv)
 
				 	fprintf(stderr, "Total time : %f ms\n", timing/1000.0);
			
 
				 	fprintf(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(10e3*timing));
			
 
				 
			
 
				-	starpu_release_data_from_mem(cnt_array_handle);
			
 
				+	starpu_data_release_from_mem(cnt_array_handle);
			
 
				 
			
 
				 	starpu_display_codelet_stats(&cl);
			
 
				 
			
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -169,43 +169,43 @@ int main(int argc, char **argv)
 
				 	for (frame = 0; frame < nframes; frame++)
			
 
				 	{
			
 
				 		/* register Y layer */
			
 
				-		starpu_register_matrix_data(&frame_y_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_y_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].y,
			
 
				 			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(frame_y_handle[frame], &filter_y);
			
 
				+		starpu_data_partition(frame_y_handle[frame], &filter_y);
			
 
				 
			
 
				-		starpu_register_matrix_data(&new_frame_y_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_y_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].y,
			
 
				 			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(new_frame_y_handle[frame], &filter_y);
			
 
				+		starpu_data_partition(new_frame_y_handle[frame], &filter_y);
			
 
				 
			
 
				 		/* register U layer */
			
 
				-		starpu_register_matrix_data(&frame_u_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_u_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].u,
			
 
				 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(frame_u_handle[frame], &filter_u);
			
 
				+		starpu_data_partition(frame_u_handle[frame], &filter_u);
			
 
				 
			
 
				-		starpu_register_matrix_data(&new_frame_u_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_u_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].u,
			
 
				 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(new_frame_u_handle[frame], &filter_u);
			
 
				+		starpu_data_partition(new_frame_u_handle[frame], &filter_u);
			
 
				 
			
 
				 		/* register V layer */
			
 
				-		starpu_register_matrix_data(&frame_v_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_v_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].v,
			
 
				 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(frame_v_handle[frame], &filter_v);
			
 
				+		starpu_data_partition(frame_v_handle[frame], &filter_v);
			
 
				 
			
 
				-		starpu_register_matrix_data(&new_frame_v_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_v_handle[frame], 0,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].v,
			
 
				 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				-		starpu_partition_data(new_frame_v_handle[frame], &filter_v);
			
 
				+		starpu_data_partition(new_frame_v_handle[frame], &filter_v);
			
 
				 
			
 
				 	}
			
 
				 
			
@@ -229,11 +229,11 @@ int main(int argc, char **argv)
 
				 				task->callback_func = ds_callback;
			
 
				 
			
 
				 				/* input */
			
 
				-				task->buffers[0].handle = starpu_get_sub_data(frame_y_handle[frame], 1, blocky);
			
 
				+				task->buffers[0].handle = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);
			
 
				 				task->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				 				/* output */
			
 
				-				task->buffers[1].handle = starpu_get_sub_data(new_frame_y_handle[frame], 1, blocky);
			
 
				+				task->buffers[1].handle = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);
			
 
				 				task->buffers[1].mode = STARPU_W;
			
 
				 
			
 
				 			starpu_task_submit(task);
			
@@ -247,11 +247,11 @@ int main(int argc, char **argv)
 
				 				task->callback_func = ds_callback;
			
 
				 
			
 
				 				/* input */
			
 
				-				task->buffers[0].handle = starpu_get_sub_data(frame_u_handle[frame], 1, blocku);
			
 
				+				task->buffers[0].handle = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);
			
 
				 				task->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				 				/* output */
			
 
				-				task->buffers[1].handle = starpu_get_sub_data(new_frame_u_handle[frame], 1, blocku);
			
 
				+				task->buffers[1].handle = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);
			
 
				 				task->buffers[1].mode = STARPU_W;
			
 
				 
			
 
				 			starpu_task_submit(task);
			
@@ -265,11 +265,11 @@ int main(int argc, char **argv)
 
				 				task->callback_func = ds_callback;
			
 
				 
			
 
				 				/* input */
			
 
				-				task->buffers[0].handle = starpu_get_sub_data(frame_v_handle[frame], 1, blockv);
			
 
				+				task->buffers[0].handle = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);
			
 
				 				task->buffers[0].mode = STARPU_R;
			
 
				 
			
 
				 				/* output */
			
 
				-				task->buffers[1].handle = starpu_get_sub_data(new_frame_v_handle[frame], 1, blockv);
			
 
				+				task->buffers[1].handle = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);
			
 
				 				task->buffers[1].mode = STARPU_W;
			
 
				 
			
 
				 			starpu_task_submit(task);
			
@@ -290,9 +290,9 @@ int main(int argc, char **argv)
 
				 	/* make sure all output buffers are sync'ed */
			
 
				 	for (frame = 0; frame < nframes; frame++)
			
 
				 	{
			
 
				-		starpu_sync_data_with_mem(new_frame_y_handle[frame], STARPU_R);
			
 
				-		starpu_sync_data_with_mem(new_frame_u_handle[frame], STARPU_R);
			
 
				-		starpu_sync_data_with_mem(new_frame_v_handle[frame], STARPU_R);
			
 
				+		starpu_data_sync_with_mem(new_frame_y_handle[frame], STARPU_R);
			
 
				+		starpu_data_sync_with_mem(new_frame_u_handle[frame], STARPU_R);
			
 
				+		starpu_data_sync_with_mem(new_frame_v_handle[frame], STARPU_R);
			
 
				 	}
			
 
				 
			
 
				 	/* partition the layers into smaller parts */
			
--- a/examples/spmv/dw_block_spmv.c
+++ b/examples/spmv/dw_block_spmv.c
@@ -43,11 +43,11 @@ void create_data(void)
 
				 	bcsr_matrix = mm_file_to_bcsr(inputfile, c, r);
			
 
				 
			
 
				 	/* declare the corresponding block CSR to the runtime */
			
 
				-	starpu_register_bcsr_data(&sparse_matrix, 0, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
			
 
				+	starpu_bcsr_data_register(&sparse_matrix, 0, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
			
 
				 	                (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, 
			
 
				 			0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float));
			
 
				 
			
 
				-	size = c*r*starpu_get_bcsr_nnz(sparse_matrix);
			
 
				+	size = c*r*starpu_bcsr_get_nnz(sparse_matrix);
			
 
				 //	printf("size = %d \n ", size);
			
 
				 
			
 
				 	/* initiate the 2 vectors */
			
@@ -65,8 +65,8 @@ void create_data(void)
 
				 		vector_out_ptr[ind] = 0.0f;
			
 
				 	}
			
 
				 
			
 
				-	starpu_register_vector_data(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				-	starpu_register_vector_data(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				 }
			
 
				 
			
 
				 void init_problem_callback(void *arg)
			
@@ -83,8 +83,8 @@ void init_problem_callback(void *arg)
 
				 		printf("DONE ...\n");
			
 
				 		gettimeofday(&end, NULL);
			
 
				 
			
 
				-//		starpu_unpartition_data(sparse_matrix, 0);
			
 
				-		starpu_unpartition_data(vector_out, 0);
			
 
				+//		starpu_data_unpartition(sparse_matrix, 0);
			
 
				+		starpu_data_unpartition(vector_out, 0);
			
 
				 
			
 
				 		sem_post(&sem);
			
 
				 	}
			
@@ -105,10 +105,10 @@ void call_filters(void)
 
				 	vector_out_f.filter_func = starpu_block_filter_func_vector;
			
 
				 	vector_out_f.filter_arg  = size/r;
			
 
				 
			
 
				-	starpu_partition_data(sparse_matrix, &bcsr_f);
			
 
				+	starpu_data_partition(sparse_matrix, &bcsr_f);
			
 
				 
			
 
				-	starpu_partition_data(vector_in, &vector_in_f);
			
 
				-	starpu_partition_data(vector_out, &vector_out_f);
			
 
				+	starpu_data_partition(vector_in, &vector_in_f);
			
 
				+	starpu_data_partition(vector_out, &vector_out_f);
			
 
				 }
			
 
				 
			
 
				 #define NSPMV	32
			
@@ -129,8 +129,8 @@ void launch_spmv_codelets(void)
 
				 	uint8_t *is_entry_tab;
			
 
				 
			
 
				 	/* we call one codelet per block */
			
 
				-	unsigned nblocks = starpu_get_bcsr_nnz(sparse_matrix); 
			
 
				-	unsigned nrows = starpu_get_bcsr_nrow(sparse_matrix); 
			
 
				+	unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); 
			
 
				+	unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); 
			
 
				 
			
 
				 	remainingtasks = NSPMV*nblocks;
			
 
				 	totaltasks = remainingtasks;
			
@@ -145,8 +145,8 @@ void launch_spmv_codelets(void)
 
				 
			
 
				 	printf("there will be %d codelets\n", remainingtasks);
			
 
				 
			
 
				-	uint32_t *rowptr = starpu_get_bcsr_local_rowptr(sparse_matrix);
			
 
				-	uint32_t *colind = starpu_get_bcsr_local_colind(sparse_matrix);
			
 
				+	uint32_t *rowptr = starpu_bcsr_get_local_rowptr(sparse_matrix);
			
 
				+	uint32_t *colind = starpu_bcsr_get_local_colind(sparse_matrix);
			
 
				 
			
 
				 	gettimeofday(&start, NULL);
			
 
				 
			
@@ -181,11 +181,11 @@ void launch_spmv_codelets(void)
 
				 				unsigned i = colind[index];
			
 
				 				unsigned j = row;
			
 
				 		
			
 
				-				task->buffers[0].handle = starpu_get_sub_data(sparse_matrix, 1, part);
			
 
				+				task->buffers[0].handle = starpu_data_get_sub_data(sparse_matrix, 1, part);
			
 
				 				task->buffers[0].mode  = STARPU_R;
			
 
				-				task->buffers[1].handle = starpu_get_sub_data(vector_in, 1, i);
			
 
				+				task->buffers[1].handle = starpu_data_get_sub_data(vector_in, 1, i);
			
 
				 				task->buffers[1].mode = STARPU_R;
			
 
				-				task->buffers[2].handle = starpu_get_sub_data(vector_out, 1, j);
			
 
				+				task->buffers[2].handle = starpu_data_get_sub_data(vector_out, 1, j);
			
 
				 				task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				 				/* all tasks in the same row are dependant so that we don't wait too much for data 
			
--- a/examples/spmv/dw_spmv.c
+++ b/examples/spmv/dw_spmv.c
@@ -48,8 +48,8 @@ void spmv_kernel_opencl(void *descr[], void *args)
 
				 	float *vecout = (float *)STARPU_GET_VECTOR_PTR(descr[2]);
			
 
				 	uint32_t nx_out = STARPU_GET_VECTOR_NX(descr[2]);
			
 
				 
			
 
				-        id = starpu_get_worker_id();
			
 
				-        devid = starpu_get_worker_devid(id);
			
 
				+        id = starpu_worker_get_id();
			
 
				+        devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				         err = starpu_opencl_load_kernel(&kernel, &queue,
			
 
				                                         "examples/spmv/spmv_opencl.cl", "spvm", devid);
			
@@ -200,7 +200,7 @@ static void create_data(void)
 
				 
			
 
				 	rowptr[size] = nnz;
			
 
				 	
			
 
				-	starpu_register_csr_data(&sparse_matrix, 0, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
			
 
				+	starpu_csr_data_register(&sparse_matrix, 0, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
			
 
				 
			
 
				 	sparse_matrix_nzval = nzval;
			
 
				 	sparse_matrix_colind = colind;
			
@@ -222,8 +222,8 @@ static void create_data(void)
 
				 		outvec[ind] = 0.0f;
			
 
				 	}
			
 
				 
			
 
				-	starpu_register_vector_data(&vector_in, 0, (uintptr_t)invec, size, sizeof(float));
			
 
				-	starpu_register_vector_data(&vector_out, 0, (uintptr_t)outvec, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_in, 0, (uintptr_t)invec, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_out, 0, (uintptr_t)outvec, size, sizeof(float));
			
 
				 
			
 
				 	vector_in_ptr = invec;
			
 
				 	vector_out_ptr = outvec;
			
@@ -240,8 +240,8 @@ void call_spmv_codelet_filters(void)
 
				 	vector_f.filter_func = starpu_block_filter_func_vector;
			
 
				 	vector_f.filter_arg  = nblocks;
			
 
				 
			
 
				-	starpu_partition_data(sparse_matrix, &csr_f);
			
 
				-	starpu_partition_data(vector_out, &vector_f);
			
 
				+	starpu_data_partition(sparse_matrix, &csr_f);
			
 
				+	starpu_data_partition(vector_out, &vector_f);
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         {
			
@@ -281,11 +281,11 @@ void call_spmv_codelet_filters(void)
 
				 		task->cl = &cl;
			
 
				 		task->cl_arg = NULL;
			
 
				 	
			
 
				-		task->buffers[0].handle = starpu_get_sub_data(sparse_matrix, 1, part);
			
 
				+		task->buffers[0].handle = starpu_data_get_sub_data(sparse_matrix, 1, part);
			
 
				 		task->buffers[0].mode  = STARPU_R;
			
 
				 		task->buffers[1].handle = vector_in;
			
 
				 		task->buffers[1].mode = STARPU_R;
			
 
				-		task->buffers[2].handle = starpu_get_sub_data(vector_out, 1, part);
			
 
				+		task->buffers[2].handle = starpu_data_get_sub_data(vector_out, 1, part);
			
 
				 		task->buffers[2].mode = STARPU_W;
			
 
				 	
			
 
				 		ret = starpu_task_submit(task);
			
@@ -296,12 +296,12 @@ void call_spmv_codelet_filters(void)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
 
				-	starpu_unpartition_data(sparse_matrix, 0);
			
 
				-	starpu_unpartition_data(vector_out, 0);
			
 
				+	starpu_data_unpartition(sparse_matrix, 0);
			
 
				+	starpu_data_unpartition(vector_out, 0);
			
 
				 }
			
 
				 
			
 
				 static void print_results(void)
			
--- a/examples/starpufft/starpufftx.c
+++ b/examples/starpufft/starpufftx.c
@@ -146,18 +146,18 @@ compute_roots(STARPUFFT(plan) plan)
 
				 		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
			
 
				 		for (k = 0; k < plan->n[dim]; k++)
			
 
				 			plan->roots[dim][k] = cexp(exp*k);
			
 
				-		starpu_register_vector_data(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
			
 
				+		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		if (plan->n[dim] > 100000) {
			
 
				 			/* prefetch the big root array on GPUs */
			
 
				 			unsigned worker;
			
 
				-			unsigned nworkers = starpu_get_worker_count();
			
 
				+			unsigned nworkers = starpu_worker_get_count();
			
 
				 			for (worker = 0; worker < nworkers; worker++)
			
 
				 			{
			
 
				-				unsigned node = starpu_get_worker_memory_node(worker);
			
 
				-				if (starpu_get_worker_type(worker) == STARPU_CUDA_WORKER)
			
 
				-					starpu_prefetch_data_on_node(plan->roots_handle[dim], node, 0);
			
 
				+				unsigned node = starpu_worker_get_memory_node(worker);
			
 
				+				if (starpu_worker_get_type(worker) == STARPU_CUDA_WORKER)
			
 
				+					starpu_data_prefetch_on_node(plan->roots_handle[dim], node, 0);
			
 
				 			}
			
 
				 		}
			
 
				 #endif
			
@@ -180,7 +180,7 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 
				 		case 1: {
			
 
				 			switch (plan->type) {
			
 
				 			case C2C:
			
 
				-				starpu_register_vector_data(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				+				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				 				for (z = 0; z < plan->totsize1; z++)
			
 
				 					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				 				tag = STARPUFFT(start1dC2C)(plan);
			
@@ -192,7 +192,7 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 
				 			break;
			
 
				 		}
			
 
				 		case 2:
			
 
				-			starpu_register_vector_data(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				+			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				 			for (z = 0; z < plan->totsize1; z++)
			
 
				 				plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				 			tag = STARPUFFT(start2dC2C)(plan);
			
@@ -207,7 +207,7 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 
				 void
			
 
				 STARPUFFT(cleanup)(STARPUFFT(plan) plan)
			
 
				 {
			
 
				-	starpu_delete_data(plan->in_handle);
			
 
				+	starpu_data_unregister(plan->in_handle);
			
 
				 }
			
 
				 
			
 
				 void
			
@@ -232,8 +232,8 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
				 {
			
 
				 	int workerid, dim, i;
			
 
				 
			
 
				-	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
			
 
				-		switch (starpu_get_worker_type(workerid)) {
			
 
				+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
			
 
				+		switch (starpu_worker_get_type(workerid)) {
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				 			_FFTW(free)(plan->plans[workerid].in1);
			
@@ -255,9 +255,9 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
				 		}
			
 
				 	}
			
 
				 	for (i = 0; i < plan->totsize1; i++) {
			
 
				-		starpu_delete_data(plan->twisted1_handle[i]);
			
 
				+		starpu_data_unregister(plan->twisted1_handle[i]);
			
 
				 		free(plan->twist1_tasks[i]);
			
 
				-		starpu_delete_data(plan->fft1_handle[i]);
			
 
				+		starpu_data_unregister(plan->fft1_handle[i]);
			
 
				 		free(plan->fft1_tasks[i]);
			
 
				 	}
			
 
				 
			
@@ -270,9 +270,9 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
				 	free(plan->join_task);
			
 
				 
			
 
				 	for (i = 0; i < plan->totsize3; i++) {
			
 
				-		starpu_delete_data(plan->twisted2_handle[i]);
			
 
				+		starpu_data_unregister(plan->twisted2_handle[i]);
			
 
				 		free(plan->twist2_tasks[i]);
			
 
				-		starpu_delete_data(plan->fft2_handle[i]);
			
 
				+		starpu_data_unregister(plan->fft2_handle[i]);
			
 
				 		free(plan->fft2_tasks[i]);
			
 
				 		free(plan->twist3_tasks[i]);
			
 
				 	}
			
@@ -287,7 +287,7 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
				 	free(plan->fft2_args);
			
 
				 
			
 
				 	for (dim = 0; dim < plan->dim; dim++) {
			
 
				-		starpu_delete_data(plan->roots_handle[dim]);
			
 
				+		starpu_data_unregister(plan->roots_handle[dim]);
			
 
				 		free(plan->roots[dim]);
			
 
				 	}
			
 
				 
			
@@ -321,7 +321,7 @@ STARPUFFT(malloc)(size_t n)
 
				 {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	void *res;
			
 
				-	starpu_malloc_pinned_if_possible(&res, n);
			
 
				+	starpu_data_malloc_pinned_if_possible(&res, n);
			
 
				 	return res;
			
 
				 #else
			
 
				 #  ifdef STARPU_HAVE_FFTW
			
@@ -368,7 +368,7 @@ STARPUFFT(showstats)(FILE *out)
 
				 		if (task_per_worker[worker])
			
 
				 		{
			
 
				 			char name[32];
			
 
				-			starpu_get_worker_name(worker, name, 32);
			
 
				+			starpu_worker_get_name(worker, name, 32);
			
 
				 
			
 
				 			unsigned long bytes = sizeof(STARPUFFT(complex))*samples_per_worker[worker];
			
 
				 
			
--- a/examples/starpufft/starpufftx1d.c
+++ b/examples/starpufft/starpufftx1d.c
@@ -32,7 +32,7 @@ STARPUFFT(twist1_1d_kernel_gpu)(void *descr[], void *_args)
 
				 	_cufftComplex * restrict in = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	_cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 	
			
 
				-	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_get_worker_id());
			
 
				+	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_worker_get_id());
			
 
				 
			
 
				 	STARPUFFT(cuda_twist1_1d_host)(in, twisted1, i, n1, n2, stream);
			
 
				 
			
@@ -53,7 +53,7 @@ STARPUFFT(fft1_1d_kernel_gpu)(void *descr[], void *_args)
 
				 	_cufftComplex * restrict out = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 	const _cufftComplex * restrict roots = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[2]);
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	cudaStream_t stream;
			
 
				 
			
@@ -90,7 +90,7 @@ STARPUFFT(fft2_1d_kernel_gpu)(void *descr[], void *_args)
 
				 	_cufftComplex * restrict in = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	_cufftComplex * restrict out = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	if (!plan->plans[workerid].initialized2) {
			
 
				 		cures = cufftPlan1d(&plan->plans[workerid].plan2_cuda, n1, _CUFFT_C2C, n3);
			
@@ -140,7 +140,7 @@ STARPUFFT(fft1_1d_kernel_cpu)(void *descr[], void *_args)
 
				 	int i = args->i;
			
 
				 	int j;
			
 
				 	int n2 = plan->n2[0];
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	const STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[1]);
			
@@ -190,7 +190,7 @@ STARPUFFT(fft2_1d_kernel_cpu)(void *descr[], void *_args)
 
				 	struct STARPUFFT(args) *args = _args;
			
 
				 	STARPUFFT(plan) plan = args->plan;
			
 
				 	//int jj = args->jj;
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	const STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[1]);
			
@@ -389,8 +389,8 @@ STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags)
 
				 	compute_roots(plan);
			
 
				 
			
 
				 	/* Initialize per-worker working set */
			
 
				-	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
			
 
				-		switch (starpu_get_worker_type(workerid)) {
			
 
				+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
			
 
				+		switch (starpu_worker_get_type(workerid)) {
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				 			/* first fft plan: one n2 fft */
			
@@ -460,8 +460,8 @@ STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags)
 
				 		plan->fft1_args[z].i = i;
			
 
				 
			
 
				 		/* Register (n2) chunks */
			
 
				-		starpu_register_vector_data(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
			
 
				-		starpu_register_vector_data(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
			
 
				+		starpu_vector_data_register(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
			
 
				+		starpu_vector_data_register(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
			
 
				 
			
 
				 		/* We'll need it on the CPU for the second twist anyway */
			
 
				 		starpu_data_set_wb_mask(plan->fft1_handle[z], 1<<0);
			
@@ -522,8 +522,8 @@ STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags)
 
				 		plan->fft2_args[z].jj = jj;
			
 
				 
			
 
				 		/* Register n3 (n1) chunks */
			
 
				-		starpu_register_vector_data(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
			
 
				-		starpu_register_vector_data(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
			
 
				+		starpu_vector_data_register(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
			
 
				+		starpu_vector_data_register(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
			
 
				 
			
 
				 		/* We'll need it on the CPU for the last twist anyway */
			
 
				 		starpu_data_set_wb_mask(plan->fft2_handle[z], 1<<0);
			
--- a/examples/starpufft/starpufftx2d.c
+++ b/examples/starpufft/starpufftx2d.c
@@ -39,7 +39,7 @@ STARPUFFT(twist1_2d_kernel_gpu)(void *descr[], void *_args)
 
				 	_cufftComplex * restrict in = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	_cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 
			
 
				-	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_get_worker_id());
			
 
				+	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_worker_get_id());
			
 
				 
			
 
				 	STARPUFFT(cuda_twist1_2d_host)(in, twisted1, i, j, n1, n2, m1, m2, stream);
			
 
				 	cudaStreamSynchronize(stream);
			
@@ -62,7 +62,7 @@ STARPUFFT(fft1_2d_kernel_gpu)(void *descr[], void *_args)
 
				 	const _cufftComplex * restrict roots0 = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[2]);
			
 
				 	const _cufftComplex * restrict roots1 = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[3]);
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	cudaStream_t stream;
			
 
				 
			
@@ -104,7 +104,7 @@ STARPUFFT(fft2_2d_kernel_gpu)(void *descr[], void *_args)
 
				 	_cufftComplex * restrict in = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	_cufftComplex * restrict out = (_cufftComplex *)STARPU_GET_VECTOR_PTR(descr[1]);
			
 
				 
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	if (!plan->plans[workerid].initialized2) {
			
 
				 		cures = cufftPlan2d(&plan->plans[workerid].plan2_cuda, n1, m1, _CUFFT_C2C);
			
@@ -162,7 +162,7 @@ STARPUFFT(fft1_2d_kernel_cpu)(void *descr[], void *_args)
 
				 	int k, l;
			
 
				 	int n2 = plan->n2[0];
			
 
				 	int m2 = plan->n2[1];
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	const STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[1]);
			
@@ -221,7 +221,7 @@ STARPUFFT(fft2_2d_kernel_cpu)(void *descr[], void *_args)
 
				 	STARPUFFT(plan) plan = args->plan;
			
 
				 	//int kk = args->kk;
			
 
				 	//int ll = args->ll;
			
 
				-	int workerid = starpu_get_worker_id();
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				 	const STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[0]);
			
 
				 	STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)STARPU_GET_VECTOR_PTR(descr[1]);
			
@@ -448,8 +448,8 @@ STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags)
 
				 	compute_roots(plan);
			
 
				 
			
 
				 	/* Initialize per-worker working set */
			
 
				-	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
			
 
				-		switch (starpu_get_worker_type(workerid)) {
			
 
				+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
			
 
				+		switch (starpu_worker_get_type(workerid)) {
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				 			/* first fft plan: one n2*m2 fft */
			
@@ -520,8 +520,8 @@ STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags)
 
				 		plan->fft1_args[z].j = j;
			
 
				 
			
 
				 		/* Register (n2,m2) chunks */
			
 
				-		starpu_register_vector_data(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
			
 
				-		starpu_register_vector_data(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
			
 
				+		starpu_vector_data_register(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
			
 
				+		starpu_vector_data_register(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
			
 
				 
			
 
				 		/* We'll need it on the CPU for the second twist anyway */
			
 
				 		starpu_data_set_wb_mask(plan->fft1_handle[z], 1<<0);
			
@@ -585,8 +585,8 @@ STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags)
 
				 		plan->fft2_args[z].ll = ll;
			
 
				 
			
 
				 		/* Register n3*m3 (n1,m1) chunks */
			
 
				-		starpu_register_vector_data(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
			
 
				-		starpu_register_vector_data(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
			
 
				+		starpu_vector_data_register(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
			
 
				+		starpu_vector_data_register(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
			
 
				 
			
 
				 		/* We'll need it on the CPU for the last twist anyway */
			
 
				 		starpu_data_set_wb_mask(plan->fft2_handle[z], 1<<0);
			
--- a/examples/strassen/strassen.c
+++ b/examples/strassen/strassen.c
@@ -23,22 +23,22 @@ static starpu_data_handle create_tmp_matrix(starpu_data_handle M)
 
				 	starpu_data_handle state = malloc(sizeof(starpu_data_handle));
			
 
				 
			
 
				 	/* create a matrix with the same dimensions as M */
			
 
				-	uint32_t nx = starpu_get_matrix_nx(M);
			
 
				-	uint32_t ny = starpu_get_matrix_nx(M);
			
 
				+	uint32_t nx = starpu_matrix_get_nx(M);
			
 
				+	uint32_t ny = starpu_matrix_get_nx(M);
			
 
				 
			
 
				 	STARPU_ASSERT(state);
			
 
				 
			
 
				 	data = malloc(nx*ny*sizeof(float));
			
 
				 	STARPU_ASSERT(data);
			
 
				 
			
 
				-	starpu_register_matrix_data(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
			
 
				+	starpu_matrix_data_register(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
			
 
				 	
			
 
				 	return state;
			
 
				 }
			
 
				 
			
 
				 static void free_tmp_matrix(starpu_data_handle matrix)
			
 
				 {
			
 
				-	starpu_delete_data(matrix);
			
 
				+	starpu_data_unregister(matrix);
			
 
				 	free(matrix);
			
 
				 }
			
 
				 
			
@@ -62,20 +62,20 @@ static void partition_matrices(strassen_iter_state_t *iter)
 
				 	starpu_map_filters(B, 2, &f, &f2);
			
 
				 	starpu_map_filters(C, 2, &f, &f2);
			
 
				 
			
 
				-	iter->A11 = starpu_get_sub_data(A, 2, 0, 0);
			
 
				-	iter->A12 = starpu_get_sub_data(A, 2, 1, 0);
			
 
				-	iter->A21 = starpu_get_sub_data(A, 2, 0, 1);
			
 
				-	iter->A22 = starpu_get_sub_data(A, 2, 1, 1);
			
 
				+	iter->A11 = starpu_data_get_sub_data(A, 2, 0, 0);
			
 
				+	iter->A12 = starpu_data_get_sub_data(A, 2, 1, 0);
			
 
				+	iter->A21 = starpu_data_get_sub_data(A, 2, 0, 1);
			
 
				+	iter->A22 = starpu_data_get_sub_data(A, 2, 1, 1);
			
 
				 
			
 
				-	iter->B11 = starpu_get_sub_data(B, 2, 0, 0);
			
 
				-	iter->B12 = starpu_get_sub_data(B, 2, 1, 0);
			
 
				-	iter->B21 = starpu_get_sub_data(B, 2, 0, 1);
			
 
				-	iter->B22 = starpu_get_sub_data(B, 2, 1, 1);
			
 
				+	iter->B11 = starpu_data_get_sub_data(B, 2, 0, 0);
			
 
				+	iter->B12 = starpu_data_get_sub_data(B, 2, 1, 0);
			
 
				+	iter->B21 = starpu_data_get_sub_data(B, 2, 0, 1);
			
 
				+	iter->B22 = starpu_data_get_sub_data(B, 2, 1, 1);
			
 
				 
			
 
				-	iter->C11 = starpu_get_sub_data(C, 2, 0, 0);
			
 
				-	iter->C12 = starpu_get_sub_data(C, 2, 1, 0);
			
 
				-	iter->C21 = starpu_get_sub_data(C, 2, 0, 1);
			
 
				-	iter->C22 = starpu_get_sub_data(C, 2, 1, 1);
			
 
				+	iter->C11 = starpu_data_get_sub_data(C, 2, 0, 0);
			
 
				+	iter->C12 = starpu_data_get_sub_data(C, 2, 1, 0);
			
 
				+	iter->C21 = starpu_data_get_sub_data(C, 2, 0, 1);
			
 
				+	iter->C22 = starpu_data_get_sub_data(C, 2, 1, 1);
			
 
				 
			
 
				 	/* TODO check that all sub-matrices have the same size */
			
 
				 }
			
@@ -83,9 +83,9 @@ static void partition_matrices(strassen_iter_state_t *iter)
 
				 static void unpartition_matrices(strassen_iter_state_t *iter)
			
 
				 {
			
 
				 	/* TODO there is no  need to actually gather those results ... */
			
 
				-	starpu_unpartition_data(iter->A, 0);
			
 
				-	starpu_unpartition_data(iter->B, 0);
			
 
				-	starpu_unpartition_data(iter->C, 0);
			
 
				+	starpu_data_unpartition(iter->A, 0);
			
 
				+	starpu_data_unpartition(iter->B, 0);
			
 
				+	starpu_data_unpartition(iter->C, 0);
			
 
				 }
			
 
				 
			
 
				 static starpu_codelet cl_add = {
			
--- a/examples/strassen/strassen_models.c
+++ b/examples/strassen/strassen_models.c
@@ -41,7 +41,7 @@ static double self_add_sub_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (n*n)/10.0f/4.0f/7.75f;
			
 
				 
			
@@ -56,7 +56,7 @@ static double cuda_self_add_sub_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (n*n)/10.0f/4.0f;
			
 
				 
			
@@ -71,7 +71,7 @@ static double add_sub_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (1.45f*n*n)/10.0f/2.0f;
			
 
				 
			
@@ -86,7 +86,7 @@ static double cuda_add_sub_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (1.45f*n*n)/10.0f/2.0f;
			
 
				 
			
@@ -102,7 +102,7 @@ static double mult_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/1000.0f/4.11f/0.2588);
			
 
				 
			
@@ -117,7 +117,7 @@ static double cuda_mult_cost(starpu_buffer_descr *descr)
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_get_matrix_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/1000.0f/4.11f);
			
 
				 
			
--- a/examples/strassen/test_strassen.c
+++ b/examples/strassen/test_strassen.c
@@ -156,11 +156,11 @@ void init_problem(void)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_register_matrix_data(&A_state, 0, (uintptr_t)A, 
			
 
				+	starpu_matrix_data_register(&A_state, 0, (uintptr_t)A, 
			
 
				 		dim, dim, dim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&B_state, 0, (uintptr_t)B, 
			
 
				+	starpu_matrix_data_register(&B_state, 0, (uintptr_t)B, 
			
 
				 		dim, dim, dim, sizeof(float));
			
 
				-	starpu_register_matrix_data(&C_state, 0, (uintptr_t)C, 
			
 
				+	starpu_matrix_data_register(&C_state, 0, (uintptr_t)C, 
			
 
				 		dim, dim, dim, sizeof(float));
			
 
				 
			
 
				 	gettimeofday(&start, NULL);
			
@@ -176,7 +176,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	/* start the runtime */
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	sem_init(&sem, 0, 0U);
			
 
				 
			
@@ -184,7 +184,7 @@ int main(__attribute__ ((unused)) int argc,
 
				 	sem_wait(&sem);
			
 
				 	sem_destroy(&sem);
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/strassen2/strassen2.c
+++ b/examples/strassen2/strassen2.c
@@ -190,7 +190,7 @@ static starpu_data_handle allocate_tmp_matrix(unsigned size, unsigned reclevel)
 
				 
			
 
				 	buffer = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
			
 
				 
			
 
				-	starpu_register_matrix_data(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
			
 
				 
			
 
				 	/* we construct a starpu_filter tree of depth reclevel */
			
 
				 	unsigned rec;
			
@@ -337,7 +337,7 @@ void cleanup_callback(void *_arg)
 
				 
			
 
				 	unsigned i;
			
 
				 	for (i = 0; i < arg->ndata; i++)
			
 
				-		starpu_advise_if_data_is_important(arg->data[i], 0);
			
 
				+		starpu_data_advise_as_important(arg->data[i], 0);
			
 
				 
			
 
				 	free(arg);
			
 
				 }
			
@@ -402,22 +402,22 @@ void strassen_mult(struct strassen_iter *iter)
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-        starpu_data_handle A11 = starpu_get_sub_data(iter->A, 2, 0, 0);
			
 
				-        starpu_data_handle A12 = starpu_get_sub_data(iter->A, 2, 1, 0);
			
 
				-        starpu_data_handle A21 = starpu_get_sub_data(iter->A, 2, 0, 1);
			
 
				-        starpu_data_handle A22 = starpu_get_sub_data(iter->A, 2, 1, 1);
			
 
				+        starpu_data_handle A11 = starpu_data_get_sub_data(iter->A, 2, 0, 0);
			
 
				+        starpu_data_handle A12 = starpu_data_get_sub_data(iter->A, 2, 1, 0);
			
 
				+        starpu_data_handle A21 = starpu_data_get_sub_data(iter->A, 2, 0, 1);
			
 
				+        starpu_data_handle A22 = starpu_data_get_sub_data(iter->A, 2, 1, 1);
			
 
				 
			
 
				-        starpu_data_handle B11 = starpu_get_sub_data(iter->B, 2, 0, 0);
			
 
				-        starpu_data_handle B12 = starpu_get_sub_data(iter->B, 2, 1, 0);
			
 
				-        starpu_data_handle B21 = starpu_get_sub_data(iter->B, 2, 0, 1);
			
 
				-        starpu_data_handle B22 = starpu_get_sub_data(iter->B, 2, 1, 1);
			
 
				+        starpu_data_handle B11 = starpu_data_get_sub_data(iter->B, 2, 0, 0);
			
 
				+        starpu_data_handle B12 = starpu_data_get_sub_data(iter->B, 2, 1, 0);
			
 
				+        starpu_data_handle B21 = starpu_data_get_sub_data(iter->B, 2, 0, 1);
			
 
				+        starpu_data_handle B22 = starpu_data_get_sub_data(iter->B, 2, 1, 1);
			
 
				 
			
 
				-        starpu_data_handle C11 = starpu_get_sub_data(iter->C, 2, 0, 0);
			
 
				-        starpu_data_handle C12 = starpu_get_sub_data(iter->C, 2, 1, 0);
			
 
				-        starpu_data_handle C21 = starpu_get_sub_data(iter->C, 2, 0, 1);
			
 
				-        starpu_data_handle C22 = starpu_get_sub_data(iter->C, 2, 1, 1);
			
 
				+        starpu_data_handle C11 = starpu_data_get_sub_data(iter->C, 2, 0, 0);
			
 
				+        starpu_data_handle C12 = starpu_data_get_sub_data(iter->C, 2, 1, 0);
			
 
				+        starpu_data_handle C21 = starpu_data_get_sub_data(iter->C, 2, 0, 1);
			
 
				+        starpu_data_handle C22 = starpu_data_get_sub_data(iter->C, 2, 1, 1);
			
 
				 
			
 
				-	unsigned size = starpu_get_matrix_nx(A11);
			
 
				+	unsigned size = starpu_matrix_get_nx(A11);
			
 
				 
			
 
				 	/* M1a = (A11 + A22) */
			
 
				 	iter->Mia_data[0] = allocate_tmp_matrix(size, iter->reclevel);
			
@@ -785,11 +785,11 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				         if (pin) {
			
 
				-                starpu_malloc_pinned_if_possible((void **)&bigbuffer, used_mem_predicted);
			
 
				+                starpu_data_malloc_pinned_if_possible((void **)&bigbuffer, used_mem_predicted);
			
 
				         } else
			
 
				 #endif
			
 
				         {
			
@@ -804,9 +804,9 @@ int main(int argc, char **argv)
 
				 	B = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
			
 
				 	C = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
			
 
				 
			
 
				-	starpu_register_matrix_data(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
			
 
				-	starpu_register_matrix_data(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
			
 
				-	starpu_register_matrix_data(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
			
 
				 
			
 
				 	unsigned rec;
			
 
				 	for (rec = 0; rec < reclevel; rec++)
			
@@ -844,7 +844,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/variable/variable.c
+++ b/examples/variable/variable.c
@@ -85,14 +85,14 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* update the array in RAM */
			
 
				-	starpu_sync_data_with_mem(float_array_handle, STARPU_R);
			
 
				+	starpu_data_sync_with_mem(float_array_handle, STARPU_R);
			
 
				 
			
 
				 	fprintf(stderr, "variable -> %f\n", foo);
			
 
				 
			
 
				-	starpu_release_data_from_mem(float_array_handle);
			
 
				+	starpu_data_release_from_mem(float_array_handle);
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/variable/variable_kernels_opencl.c
+++ b/examples/variable/variable_kernels_opencl.c
@@ -24,8 +24,8 @@ void opencl_codelet(void *descr[], void *_args)
 
				 	cl_command_queue queue;
			
 
				 	int id, devid, err;
			
 
				 
			
 
				-        id = starpu_get_worker_id();
			
 
				-        devid = starpu_get_worker_devid(id);
			
 
				+        id = starpu_worker_get_id();
			
 
				+        devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				 	err = starpu_opencl_load_kernel(&kernel, &queue,
			
 
				                                         "examples/variable/variable_kernels_opencl_codelet.cl", "variable", devid);
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -75,17 +75,17 @@ void starpu_shutdown(void);
 
				 
			
 
				 /* This function returns the number of workers (ie. processing units executing
			
 
				  * StarPU tasks). The returned value should be at most STARPU_NMAXWORKERS. */
			
 
				-unsigned starpu_get_worker_count(void);
			
 
				+unsigned starpu_worker_get_count(void);
			
 
				 
			
 
				-unsigned starpu_get_cpu_worker_count(void);
			
 
				-unsigned starpu_get_cuda_worker_count(void);
			
 
				-unsigned starpu_get_spu_worker_count(void);
			
 
				-unsigned starpu_get_opencl_worker_count(void);
			
 
				+unsigned starpu_cpu_worker_get_count(void);
			
 
				+unsigned starpu_cuda_worker_get_count(void);
			
 
				+unsigned starpu_spu_worker_get_count(void);
			
 
				+unsigned starpu_opencl_worker_get_count(void);
			
 
				 
			
 
				 /* Return the identifier of the thread in case this is associated to a worker.
			
 
				  * This will return -1 if this function is called directly from the application
			
 
				  * or if it is some SPU worker where a single thread controls different SPUs. */
			
 
				-int starpu_get_worker_id(void);
			
 
				+int starpu_worker_get_id(void);
			
 
				 
			
 
				 enum starpu_archtype {
			
 
				 	STARPU_CPU_WORKER, /* CPU core */
			
@@ -95,11 +95,11 @@ enum starpu_archtype {
 
				 };
			
 
				 
			
 
				 /* This function returns the type of worker associated to an identifier (as
			
 
				- * returned by the starpu_get_worker_id function). The returned value indicates
			
 
				+ * returned by the starpu_worker_get_id function). The returned value indicates
			
 
				  * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,
			
 
				  * STARPU_CUDA_WORKER for a CUDA device, and STARPU_GORDON_WORKER for a Cell
			
 
				  * SPU. The value returned for an invalid identifier is unspecified.  */
			
 
				-enum starpu_archtype starpu_get_worker_type(int id);
			
 
				+enum starpu_archtype starpu_worker_get_type(int id);
			
 
				 
			
 
				 /* StarPU associates a unique human readable string to each processing unit.
			
 
				  * This function copies at most the "maxlen" first bytes of the unique
			
@@ -108,12 +108,12 @@ enum starpu_archtype starpu_get_worker_type(int id);
 
				  * "dst" is a valid pointer to a buffer of "maxlen" bytes at least.
			
 
				  * Calling this function on an invalid identifier results in an unspecified
			
 
				  * behaviour. */
			
 
				-void starpu_get_worker_name(int id, char *dst, size_t maxlen);
			
 
				+void starpu_worker_get_name(int id, char *dst, size_t maxlen);
			
 
				 
			
 
				 /* This functions returns the device id of the worker associated to an
			
 
				- *  identifier (as returned by the starpu_get_worker_id() function)
			
 
				+ *  identifier (as returned by the starpu_worker_get_id() function)
			
 
				  */
			
 
				-int starpu_get_worker_devid(int id);
			
 
				+int starpu_worker_get_devid(int id);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -42,24 +42,24 @@ typedef struct starpu_buffer_descr_t {
 
				 
			
 
				 struct starpu_data_interface_ops_t;
			
 
				 
			
 
				-void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
			
 
				-void starpu_delete_data(starpu_data_handle state);
			
 
				+void starpu_data_unpartition(starpu_data_handle root_data, uint32_t gathering_node);
			
 
				+void starpu_data_unregister(starpu_data_handle state);
			
 
				 
			
 
				-void starpu_advise_if_data_is_important(starpu_data_handle state, unsigned is_important);
			
 
				+void starpu_data_advise_as_important(starpu_data_handle state, unsigned is_important);
			
 
				 
			
 
				-int starpu_sync_data_with_mem(starpu_data_handle state, starpu_access_mode mode);
			
 
				-int starpu_sync_data_with_mem_non_blocking(starpu_data_handle handle,
			
 
				+int starpu_data_sync_with_mem(starpu_data_handle state, starpu_access_mode mode);
			
 
				+int starpu_data_sync_with_mem_non_blocking(starpu_data_handle handle,
			
 
				 			starpu_access_mode mode, void (*callback)(void *), void *arg);
			
 
				-void starpu_release_data_from_mem(starpu_data_handle state);
			
 
				+void starpu_data_release_from_mem(starpu_data_handle state);
			
 
				 
			
 
				-int starpu_malloc_pinned_if_possible(void **A, size_t dim);
			
 
				-int starpu_free_pinned_if_possible(void *A);
			
 
				+int starpu_data_malloc_pinned_if_possible(void **A, size_t dim);
			
 
				+int starpu_data_free_pinned_if_possible(void *A);
			
 
				 
			
 
				-int starpu_request_data_allocation(starpu_data_handle state, uint32_t node);
			
 
				+int starpu_data_request_allocation(starpu_data_handle state, uint32_t node);
			
 
				 
			
 
				-int starpu_prefetch_data_on_node(starpu_data_handle state, unsigned node, unsigned async);
			
 
				+int starpu_data_prefetch_on_node(starpu_data_handle state, unsigned node, unsigned async);
			
 
				 
			
 
				-unsigned starpu_get_worker_memory_node(unsigned workerid);
			
 
				+unsigned starpu_worker_get_memory_node(unsigned workerid);
			
 
				 
			
 
				 /* It is possible to associate a mask to a piece of data (and its children) so
			
 
				  * that when it is modified, it is automatically transfered into those memory
			
@@ -69,7 +69,7 @@ void starpu_data_set_wb_mask(starpu_data_handle state, uint32_t wb_mask);
 
				 
			
 
				 void starpu_data_set_sequential_consistency_flag(starpu_data_handle handle, unsigned flag);
			
 
				 
			
 
				-unsigned starpu_test_if_data_is_allocated_on_node(starpu_data_handle handle, uint32_t memory_node);
			
 
				+unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_data_filters.h
+++ b/include/starpu_data_filters.h
@@ -30,8 +30,8 @@ typedef struct starpu_filter_t {
 
				 	void *filter_arg_ptr;
			
 
				 } starpu_filter;
			
 
				 
			
 
				-void starpu_partition_data(starpu_data_handle initial_data, starpu_filter *f); 
			
 
				-void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
			
 
				+void starpu_data_partition(starpu_data_handle initial_data, starpu_filter *f); 
			
 
				+void starpu_data_unpartition(starpu_data_handle root_data, uint32_t gathering_node);
			
 
				 
			
 
				 struct starpu_data_interface_ops_t;
			
 
				 void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren,
			
@@ -40,7 +40,7 @@ void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren,
 
				 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i);
			
 
				 
			
 
				 /* unsigned list */
			
 
				-starpu_data_handle starpu_get_sub_data(starpu_data_handle root_data, unsigned depth, ... );
			
 
				+starpu_data_handle starpu_data_get_sub_data(starpu_data_handle root_data, unsigned depth, ... );
			
 
				 
			
 
				 /* starpu_filter * list */
			
 
				 void starpu_map_filters(starpu_data_handle root_data, unsigned nfilters, ...);
			
@@ -56,8 +56,8 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 
				 
			
 
				 /* for vector */
			
 
				 void starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
			
 
				-void starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
			
 
				-void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
			
 
				+void starpu_vector_list_filter_func(starpu_filter *f, starpu_data_handle root_data);
			
 
				+void starpu_vector_divide_in_2_filter_func(starpu_filter *f, starpu_data_handle root_data);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -44,14 +44,14 @@ typedef struct starpu_matrix_interface_s {
 
				 	size_t elemsize;
			
 
				 } starpu_matrix_interface_t;
			
 
				 
			
 
				-void starpu_register_matrix_data(starpu_data_handle *handle, uint32_t home_node,
			
 
				+void starpu_matrix_data_register(starpu_data_handle *handle, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t ld, uint32_t nx,
			
 
				                         uint32_t ny, size_t elemsize);
			
 
				-uint32_t starpu_get_matrix_nx(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_matrix_ny(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle);
			
 
				-uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle);
			
 
				-size_t starpu_get_matrix_elemsize(starpu_data_handle handle);
			
 
				+uint32_t starpu_matrix_get_nx(starpu_data_handle handle);
			
 
				+uint32_t starpu_matrix_get_ny(starpu_data_handle handle);
			
 
				+uint32_t starpu_matrix_get_local_ld(starpu_data_handle handle);
			
 
				+uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle handle);
			
 
				+size_t starpu_matrix_get_elemsize(starpu_data_handle handle);
			
 
				 
			
 
				 /* helper methods */
			
 
				 #define STARPU_GET_MATRIX_PTR(interface)	(((starpu_matrix_interface_t *)(interface))->ptr)
			
@@ -74,16 +74,16 @@ typedef struct starpu_block_interface_s {
 
				 	size_t elemsize;
			
 
				 } starpu_block_interface_t;
			
 
				 
			
 
				-void starpu_register_block_data(starpu_data_handle *handle, uint32_t home_node,
			
 
				+void starpu_block_data_register(starpu_data_handle *handle, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				                         uint32_t ny, uint32_t nz, size_t elemsize);
			
 
				-uint32_t starpu_get_block_nx(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_block_ny(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_block_nz(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_block_local_ldy(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_block_local_ldz(starpu_data_handle handle);
			
 
				-uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle);
			
 
				-size_t starpu_get_block_elemsize(starpu_data_handle handle);
			
 
				+uint32_t starpu_block_get_nx(starpu_data_handle handle);
			
 
				+uint32_t starpu_block_get_ny(starpu_data_handle handle);
			
 
				+uint32_t starpu_block_get_nz(starpu_data_handle handle);
			
 
				+uint32_t starpu_block_get_local_ldy(starpu_data_handle handle);
			
 
				+uint32_t starpu_block_get_local_ldz(starpu_data_handle handle);
			
 
				+uintptr_t starpu_block_get_local_ptr(starpu_data_handle handle);
			
 
				+size_t starpu_block_get_elemsize(starpu_data_handle handle);
			
 
				 
			
 
				 /* helper methods */
			
 
				 #define STARPU_GET_BLOCK_PTR(interface)	(((starpu_block_interface_t *)(interface))->ptr)
			
@@ -103,11 +103,11 @@ typedef struct starpu_vector_interface_s {
 
				 	size_t elemsize;
			
 
				 } starpu_vector_interface_t;
			
 
				 
			
 
				-void starpu_register_vector_data(starpu_data_handle *handle, uint32_t home_node,
			
 
				+void starpu_vector_data_register(starpu_data_handle *handle, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t nx, size_t elemsize);
			
 
				-uint32_t starpu_get_vector_nx(starpu_data_handle handle);
			
 
				-size_t starpu_get_vector_elemsize(starpu_data_handle handle);
			
 
				-uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle);
			
 
				+uint32_t starpu_vector_get_nx(starpu_data_handle handle);
			
 
				+size_t starpu_vector_get_elemsize(starpu_data_handle handle);
			
 
				+uintptr_t starpu_vector_get_local_ptr(starpu_data_handle handle);
			
 
				 
			
 
				 /* helper methods */
			
 
				 #define STARPU_GET_VECTOR_PTR(interface)	(((starpu_vector_interface_t *)(interface))->ptr)
			
@@ -144,15 +144,15 @@ typedef struct starpu_csr_interface_s {
 
				 	size_t elemsize;
			
 
				 } starpu_csr_interface_t;
			
 
				 
			
 
				-void starpu_register_csr_data(starpu_data_handle *handle, uint32_t home_node, uint32_t nnz, uint32_t nrow,
			
 
				+void starpu_csr_data_register(starpu_data_handle *handle, uint32_t home_node, uint32_t nnz, uint32_t nrow,
			
 
				 		uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize);
			
 
				-uint32_t starpu_get_csr_nnz(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_csr_nrow(starpu_data_handle handle);
			
 
				-uint32_t starpu_get_csr_firstentry(starpu_data_handle handle);
			
 
				-uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle);
			
 
				-uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle);
			
 
				-uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle);
			
 
				-size_t starpu_get_csr_elemsize(starpu_data_handle handle);
			
 
				+uint32_t starpu_csr_get_nnz(starpu_data_handle handle);
			
 
				+uint32_t starpu_csr_get_nrow(starpu_data_handle handle);
			
 
				+uint32_t starpu_csr_get_firstentry(starpu_data_handle handle);
			
 
				+uintptr_t starpu_csr_get_local_nzval(starpu_data_handle handle);
			
 
				+uint32_t *starpu_csr_get_local_colind(starpu_data_handle handle);
			
 
				+uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle handle);
			
 
				+size_t starpu_csr_get_elemsize(starpu_data_handle handle);
			
 
				 
			
 
				 #define STARPU_GET_CSR_NNZ(interface)	(((starpu_csr_interface_t *)(interface))->nnz)
			
 
				 #define STARPU_GET_CSR_NROW(interface)	(((starpu_csr_interface_t *)(interface))->nrow)
			
@@ -197,19 +197,19 @@ typedef struct starpu_bcsr_interface_s {
 
				 	size_t elemsize;
			
 
				 } starpu_bcsr_interface_t;
			
 
				 
			
 
				-void starpu_register_bcsr_data(starpu_data_handle *handle, uint32_t home_node, uint32_t nnz, uint32_t nrow,
			
 
				+void starpu_bcsr_data_register(starpu_data_handle *handle, uint32_t home_node, uint32_t nnz, uint32_t nrow,
			
 
				 		uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize);
			
 
				 
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_nnz(starpu_data_handle);
			
 
				-uint32_t starpu_get_bcsr_nrow(starpu_data_handle);
			
 
				-uint32_t starpu_get_bcsr_firstentry(starpu_data_handle);
			
 
				-uintptr_t starpu_get_bcsr_local_nzval(starpu_data_handle);
			
 
				-uint32_t *starpu_get_bcsr_local_colind(starpu_data_handle);
			
 
				-uint32_t *starpu_get_bcsr_local_rowptr(starpu_data_handle);
			
 
				-uint32_t starpu_get_bcsr_r(starpu_data_handle);
			
 
				-uint32_t starpu_get_bcsr_c(starpu_data_handle);
			
 
				-size_t starpu_get_bcsr_elemsize(starpu_data_handle);
			
 
				+uint32_t starpu_bcsr_get_nnz(starpu_data_handle);
			
 
				+uint32_t starpu_bcsr_get_nrow(starpu_data_handle);
			
 
				+uint32_t starpu_bcsr_get_firstentry(starpu_data_handle);
			
 
				+uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle);
			
 
				+uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle);
			
 
				+uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle);
			
 
				+uint32_t starpu_bcsr_get_r(starpu_data_handle);
			
 
				+uint32_t starpu_bcsr_get_c(starpu_data_handle);
			
 
				+size_t starpu_bcsr_get_elemsize(starpu_data_handle);
			
 
				 
			
 
				 #define STARPU_MATRIX_INTERFACE_ID	0
			
 
				 #define STARPU_BLOCK_INTERFACE_ID	1
			
--- a/include/starpu_expert.h
+++ b/include/starpu_expert.h
@@ -26,8 +26,8 @@ extern "C" {
 
				 
			
 
				 void starpu_wake_all_blocked_workers(void);
			
 
				 
			
 
				-int starpu_register_progression_hook(unsigned (*func)(void *arg), void *arg);
			
 
				-void starpu_deregister_progression_hook(int hook_id);
			
 
				+int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg);
			
 
				+void starpu_progression_hook_deregister(int hook_id);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -212,7 +212,7 @@ int starpu_task_wait(struct starpu_task *task);
 
				 
			
 
				 /* This function waits until all the tasks that were already submitted have
			
 
				  * been executed. */
			
 
				-int starpu_wait_all_tasks(void);
			
 
				+int starpu_task_wait_for_all(void);
			
 
				 
			
 
				 void starpu_display_codelet_stats(struct starpu_codelet_t *cl);
			
 
				 
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -196,8 +196,8 @@ static inline int starpu_get_env_number(const char *str)
 
				 void starpu_trace_user_event(unsigned code);
			
 
				 
			
 
				 /* Some helper functions for application using CUBLAS kernels */
			
 
				-void starpu_helper_init_cublas(void);
			
 
				-void starpu_helper_shutdown_cublas(void);
			
 
				+void starpu_helper_cublas_init(void);
			
 
				+void starpu_helper_cublas_shutdown(void);
			
 
				 
			
 
				 /* Call func(arg) on every worker matching the "where" mask (eg.
			
 
				  * STARPU_CUDA|STARPU_CPU to execute the function on every CPU and every CUDA
			
@@ -212,7 +212,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 
				 				void (*callback)(void *), void *callback_arg);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-cudaStream_t *starpu_get_local_cuda_stream(void);
			
 
				+cudaStream_t *starpu_cuda_get_local_stream(void);
			
 
				 #endif
			
 
				 
			
 
				 /* If FILE is currently on a comment line, eat it.  */
			
--- a/mpi/examples/mpi_lu/plu_example.c
+++ b/mpi/examples/mpi_lu/plu_example.c
@@ -220,7 +220,7 @@ static void init_matrix(int rank)
 
				 			{
			
 
				 				/* This blocks should be treated by the current MPI process */
			
 
				 				/* Allocate and fill it */
			
 
				-				starpu_malloc_pinned_if_possible((void **)blockptr, blocksize);
			
 
				+				starpu_data_malloc_pinned_if_possible((void **)blockptr, blocksize);
			
 
				 				allocated_memory += blocksize;
			
 
				 
			
 
				 				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
			
@@ -236,7 +236,7 @@ static void init_matrix(int rank)
 
				 				}
			
 
				 
			
 
				 				/* Register it to StarPU */
			
 
				-				starpu_register_matrix_data(handleptr, 0,
			
 
				+				starpu_matrix_data_register(handleptr, 0,
			
 
				 					(uintptr_t)*blockptr, size/nblocks,
			
 
				 					size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 			}
			
@@ -253,9 +253,9 @@ static void init_matrix(int rank)
 
				 
			
 
				 	/* tmp buffer 11 */
			
 
				 #ifdef SINGLE_TMP11
			
 
				-	starpu_malloc_pinned_if_possible((void **)&tmp_11_block, blocksize);
			
 
				+	starpu_data_malloc_pinned_if_possible((void **)&tmp_11_block, blocksize);
			
 
				 	allocated_memory_extra += blocksize;
			
 
				-	starpu_register_matrix_data(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
			
 
				+	starpu_matrix_data_register(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
			
 
				 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 #else
			
 
				 	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle));
			
@@ -266,11 +266,11 @@ static void init_matrix(int rank)
 
				 	{
			
 
				 		if (tmp_11_block_is_needed(rank, nblocks, k))
			
 
				 		{
			
 
				-			starpu_malloc_pinned_if_possible((void **)&tmp_11_block[k], blocksize);
			
 
				+			starpu_data_malloc_pinned_if_possible((void **)&tmp_11_block[k], blocksize);
			
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_11_block[k]);
			
 
				 
			
 
				-			starpu_register_matrix_data(&tmp_11_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_11_block_handles[k], 0,
			
 
				 				(uintptr_t)tmp_11_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -301,22 +301,22 @@ static void init_matrix(int rank)
 
				 #ifdef SINGLE_TMP1221
			
 
				 		if (tmp_12_block_is_needed(rank, nblocks, k))
			
 
				 		{
			
 
				-			starpu_malloc_pinned_if_possible((void **)&tmp_12_block[k], blocksize);
			
 
				+			starpu_data_malloc_pinned_if_possible((void **)&tmp_12_block[k], blocksize);
			
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_12_block[k]);
			
 
				 
			
 
				-			starpu_register_matrix_data(&tmp_12_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_12_block_handles[k], 0,
			
 
				 				(uintptr_t)tmp_12_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
 
				 
			
 
				 		if (tmp_21_block_is_needed(rank, nblocks, k))
			
 
				 		{
			
 
				-			starpu_malloc_pinned_if_possible((void **)&tmp_21_block[k], blocksize);
			
 
				+			starpu_data_malloc_pinned_if_possible((void **)&tmp_21_block[k], blocksize);
			
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_21_block[k]);
			
 
				 
			
 
				-			starpu_register_matrix_data(&tmp_21_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_21_block_handles[k], 0,
			
 
				 				(uintptr_t)tmp_21_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -324,22 +324,22 @@ static void init_matrix(int rank)
 
				 	for (i = 0; i < 2; i++) {
			
 
				 		if (tmp_12_block_is_needed(rank, nblocks, k))
			
 
				 		{
			
 
				-			starpu_malloc_pinned_if_possible((void **)&tmp_12_block[i][k], blocksize);
			
 
				+			starpu_data_malloc_pinned_if_possible((void **)&tmp_12_block[i][k], blocksize);
			
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_12_block[i][k]);
			
 
				 	
			
 
				-			starpu_register_matrix_data(&tmp_12_block_handles[i][k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_12_block_handles[i][k], 0,
			
 
				 				(uintptr_t)tmp_12_block[i][k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
 
				 
			
 
				 		if (tmp_21_block_is_needed(rank, nblocks, k))
			
 
				 		{
			
 
				-			starpu_malloc_pinned_if_possible((void **)&tmp_21_block[i][k], blocksize);
			
 
				+			starpu_data_malloc_pinned_if_possible((void **)&tmp_21_block[i][k], blocksize);
			
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_21_block[i][k]);
			
 
				 	
			
 
				-			starpu_register_matrix_data(&tmp_21_block_handles[i][k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_21_block_handles[i][k], 0,
			
 
				 				(uintptr_t)tmp_21_block[i][k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -420,7 +420,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_init(NULL);
			
 
				 	starpu_mpi_initialize();
			
 
				-	starpu_helper_init_cublas();
			
 
				+	starpu_helper_cublas_init();
			
 
				 
			
 
				 	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
			
 
				 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
			
@@ -553,7 +553,7 @@ int main(int argc, char **argv)
 
				 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
			
 
				 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
			
 
				 
			
 
				-	starpu_helper_shutdown_cublas();
			
 
				+	starpu_helper_cublas_shutdown();
			
 
				 	starpu_mpi_shutdown();
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/mpi/examples/mpi_lu/pxlu.c
+++ b/mpi/examples/mpi_lu/pxlu.c
@@ -774,9 +774,9 @@ static void wait_tag_and_fetch_handle(starpu_tag_t tag, starpu_data_handle handl
 
				 	starpu_tag_wait(tag);
			
 
				 //	fprintf(stderr, "Rank %d : tag %lx is done\n", rank, tag);
			
 
				 
			
 
				-	starpu_sync_data_with_mem(handle, STARPU_R);
			
 
				+	starpu_data_sync_with_mem(handle, STARPU_R);
			
 
				 
			
 
				-//	starpu_delete_data(handle);
			
 
				+//	starpu_data_unregister(handle);
			
 
				 }
			
 
				 
			
 
				 static void wait_termination(void)
			
--- a/mpi/starpu_mpi.c
+++ b/mpi/starpu_mpi.c
@@ -97,7 +97,7 @@ int starpu_mpi_isend(starpu_data_handle data_handle, starpu_mpi_req *public_req,
 
				 	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	 * it is available in main memory, submit_mpi_req(req) is called and
			
 
				 	 * the request is actually submitted  */
			
 
				-	starpu_sync_data_with_mem_non_blocking(data_handle, STARPU_R,
			
 
				+	starpu_data_sync_with_mem_non_blocking(data_handle, STARPU_R,
			
 
				 			submit_mpi_req, (void *)req);
			
 
				 
			
 
				 	return 0;
			
@@ -134,7 +134,7 @@ int starpu_mpi_isend_detached(starpu_data_handle data_handle,
 
				 	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	 * it is available in main memory, submit_mpi_req(req) is called and
			
 
				 	 * the request is actually submitted  */
			
 
				-	starpu_sync_data_with_mem_non_blocking(data_handle, STARPU_R,
			
 
				+	starpu_data_sync_with_mem_non_blocking(data_handle, STARPU_R,
			
 
				 			submit_mpi_req, (void *)req);
			
 
				 
			
 
				 	return 0;
			
@@ -197,7 +197,7 @@ int starpu_mpi_irecv(starpu_data_handle data_handle, starpu_mpi_req *public_req,
 
				 	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	 * it is available in main memory, submit_mpi_req(req) is called and
			
 
				 	 * the request is actually submitted  */
			
 
				-	starpu_sync_data_with_mem_non_blocking(data_handle, STARPU_W,
			
 
				+	starpu_data_sync_with_mem_non_blocking(data_handle, STARPU_W,
			
 
				 			submit_mpi_req, (void *)req);
			
 
				 
			
 
				 	return 0;
			
@@ -233,7 +233,7 @@ int starpu_mpi_irecv_detached(starpu_data_handle data_handle, int source, int mp
 
				 	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	 * it is available in main memory, submit_mpi_req(req) is called and
			
 
				 	 * the request is actually submitted  */
			
 
				-	starpu_sync_data_with_mem_non_blocking(data_handle, STARPU_W,
			
 
				+	starpu_data_sync_with_mem_non_blocking(data_handle, STARPU_W,
			
 
				 			submit_mpi_req, (void *)req);
			
 
				 
			
 
				 	return 0;
			
@@ -410,7 +410,7 @@ int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 static void handle_request_termination(struct starpu_mpi_req_s *req)
			
 
				 {
			
 
				 	MPI_Type_free(&req->datatype);
			
 
				-	starpu_release_data_from_mem(req->data_handle);
			
 
				+	starpu_data_release_from_mem(req->data_handle);
			
 
				 
			
 
				 #ifdef VERBOSE_STARPU_MPI
			
 
				 	int rank;
			
@@ -660,7 +660,7 @@ int starpu_mpi_initialize(void)
 
				 	PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 
			
 
				 #ifdef USE_STARPU_ACTIVITY
			
 
				-	hookid = starpu_register_progression_hook(progression_hook_func, NULL);
			
 
				+	hookid = starpu_progression_hook_register(progression_hook_func, NULL);
			
 
				 	STARPU_ASSERT(hookid >= 0);
			
 
				 #endif
			
 
				 
			
@@ -682,7 +682,7 @@ int starpu_mpi_shutdown(void)
 
				 	pthread_join(progress_thread, &value);
			
 
				 
			
 
				 #ifdef USE_STARPU_ACTIVITY
			
 
				-	starpu_deregister_progression_hook(hookid);
			
 
				+	starpu_progression_hook_deregister(hookid);
			
 
				 #endif 
			
 
				 
			
 
				 	/* liberate the request queues */
			
--- a/mpi/starpu_mpi_datatype.c
+++ b/mpi/starpu_mpi_datatype.c
@@ -32,10 +32,10 @@ static int handle_to_datatype_matrix(starpu_data_handle data_handle, MPI_Datatyp
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	unsigned nx = starpu_get_matrix_nx(data_handle);
			
 
				-	unsigned ny = starpu_get_matrix_ny(data_handle);
			
 
				-	unsigned ld = starpu_get_matrix_local_ld(data_handle);
			
 
				-	size_t elemsize = starpu_get_matrix_elemsize(data_handle);
			
 
				+	unsigned nx = starpu_matrix_get_nx(data_handle);
			
 
				+	unsigned ny = starpu_matrix_get_ny(data_handle);
			
 
				+	unsigned ld = starpu_matrix_get_local_ld(data_handle);
			
 
				+	size_t elemsize = starpu_matrix_get_elemsize(data_handle);
			
 
				 
			
 
				 	ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype);
			
 
				 	STARPU_ASSERT(ret == MPI_SUCCESS);
			
@@ -48,7 +48,7 @@ static int handle_to_datatype_matrix(starpu_data_handle data_handle, MPI_Datatyp
 
				 
			
 
				 static void *handle_to_ptr_matrix(starpu_data_handle data_handle)
			
 
				 {
			
 
				-	return (void *)starpu_get_matrix_local_ptr(data_handle);
			
 
				+	return (void *)starpu_matrix_get_local_ptr(data_handle);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -59,12 +59,12 @@ static int handle_to_datatype_block(starpu_data_handle data_handle, MPI_Datatype
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	unsigned nx = starpu_get_block_nx(data_handle);
			
 
				-	unsigned ny = starpu_get_block_ny(data_handle);
			
 
				-	unsigned nz = starpu_get_block_nz(data_handle);
			
 
				-	unsigned ldy = starpu_get_block_local_ldy(data_handle);
			
 
				-	unsigned ldz = starpu_get_block_local_ldz(data_handle);
			
 
				-	size_t elemsize = starpu_get_block_elemsize(data_handle);
			
 
				+	unsigned nx = starpu_block_get_nx(data_handle);
			
 
				+	unsigned ny = starpu_block_get_ny(data_handle);
			
 
				+	unsigned nz = starpu_block_get_nz(data_handle);
			
 
				+	unsigned ldy = starpu_block_get_local_ldy(data_handle);
			
 
				+	unsigned ldz = starpu_block_get_local_ldz(data_handle);
			
 
				+	size_t elemsize = starpu_block_get_elemsize(data_handle);
			
 
				 
			
 
				 	MPI_Datatype datatype_2dlayer;
			
 
				 	ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer);
			
@@ -84,7 +84,7 @@ static int handle_to_datatype_block(starpu_data_handle data_handle, MPI_Datatype
 
				 
			
 
				 static void *handle_to_ptr_block(starpu_data_handle data_handle)
			
 
				 {
			
 
				-	return (void *)starpu_get_block_local_ptr(data_handle);
			
 
				+	return (void *)starpu_block_get_local_ptr(data_handle);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -95,8 +95,8 @@ static int handle_to_datatype_vector(starpu_data_handle data_handle, MPI_Datatyp
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	unsigned nx = starpu_get_vector_nx(data_handle);
			
 
				-	size_t elemsize = starpu_get_vector_elemsize(data_handle);
			
 
				+	unsigned nx = starpu_vector_get_nx(data_handle);
			
 
				+	size_t elemsize = starpu_vector_get_elemsize(data_handle);
			
 
				 
			
 
				 	ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype);
			
 
				 	STARPU_ASSERT(ret == MPI_SUCCESS);
			
@@ -109,7 +109,7 @@ static int handle_to_datatype_vector(starpu_data_handle data_handle, MPI_Datatyp
 
				 
			
 
				 static void *handle_to_ptr_vector(starpu_data_handle data_handle)
			
 
				 {
			
 
				-	return (void *)starpu_get_vector_local_ptr(data_handle);
			
 
				+	return (void *)starpu_vector_get_local_ptr(data_handle);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/mpi/tests/block_interface.c
+++ b/mpi/tests/block_interface.c
@@ -72,7 +72,7 @@ int main(int argc, char **argv)
 
				 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
			
 
				 		}
			
 
				 
			
 
				-		starpu_register_block_data(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, 0,
			
 
				 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 
				 		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
			
 
				 		assert(block);
			
 
				 
			
 
				-		starpu_register_block_data(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, 0,
			
 
				 			(uintptr_t)block, SIZE, SIZE*SIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
@@ -94,7 +94,7 @@ int main(int argc, char **argv)
 
				 		starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);
			
 
				 
			
 
				 		/* check the content of the block */
			
 
				-		starpu_sync_data_with_mem(block_handle, STARPU_R);
			
 
				+		starpu_data_sync_with_mem(block_handle, STARPU_R);
			
 
				 		unsigned i, j, k;
			
 
				 		for (k = 0; k < SIZE; k++)
			
 
				 		for (j = 0; j < SIZE; j++)
			
@@ -102,7 +102,7 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
			
 
				 		}
			
 
				-		starpu_release_data_from_mem(block_handle);
			
 
				+		starpu_data_release_from_mem(block_handle);
			
 
				 		
			
 
				 	}
			
 
				 	else /* rank == 1 */
			
@@ -111,7 +111,7 @@ int main(int argc, char **argv)
 
				 		starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);
			
 
				 
			
 
				 		/* check the content of the block and modify it */
			
 
				-		starpu_sync_data_with_mem(block_handle, STARPU_RW);
			
 
				+		starpu_data_sync_with_mem(block_handle, STARPU_RW);
			
 
				 		unsigned i, j, k;
			
 
				 		for (k = 0; k < SIZE; k++)
			
 
				 		for (j = 0; j < SIZE; j++)
			
@@ -120,7 +120,7 @@ int main(int argc, char **argv)
 
				 			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
			
 
				 			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
			
 
				 		}
			
 
				-		starpu_release_data_from_mem(block_handle);
			
 
				+		starpu_data_release_from_mem(block_handle);
			
 
				 
			
 
				 		starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
			
 
				 	}
			
--- a/mpi/tests/block_interface_pinned.c
+++ b/mpi/tests/block_interface_pinned.c
@@ -60,7 +60,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	if (rank == 0)
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)&block,
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&block,
			
 
				 				BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
			
 
				 		memset(block, 0, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
			
 
				 
			
@@ -73,17 +73,17 @@ int main(int argc, char **argv)
 
				 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
			
 
				 		}
			
 
				 
			
 
				-		starpu_register_block_data(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, 0,
			
 
				 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
 
				 	else /* rank == 1 */
			
 
				 	{
			
 
				-		starpu_malloc_pinned_if_possible((void **)&block,
			
 
				+		starpu_data_malloc_pinned_if_possible((void **)&block,
			
 
				 			SIZE*SIZE*SIZE*sizeof(float));
			
 
				 		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
			
 
				 
			
 
				-		starpu_register_block_data(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, 0,
			
 
				 			(uintptr_t)block, SIZE, SIZE*SIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
 
				 		starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);
			
 
				 
			
 
				 		/* check the content of the block */
			
 
				-		starpu_sync_data_with_mem(block_handle, STARPU_R);
			
 
				+		starpu_data_sync_with_mem(block_handle, STARPU_R);
			
 
				 		unsigned i, j, k;
			
 
				 		for (k = 0; k < SIZE; k++)
			
 
				 		for (j = 0; j < SIZE; j++)
			
@@ -104,7 +104,7 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
			
 
				 		}
			
 
				-		starpu_release_data_from_mem(block_handle);
			
 
				+		starpu_data_release_from_mem(block_handle);
			
 
				 		
			
 
				 	}
			
 
				 	else /* rank == 1 */
			
@@ -113,7 +113,7 @@ int main(int argc, char **argv)
 
				 		starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);
			
 
				 
			
 
				 		/* check the content of the block and modify it */
			
 
				-		starpu_sync_data_with_mem(block_handle, STARPU_RW);
			
 
				+		starpu_data_sync_with_mem(block_handle, STARPU_RW);
			
 
				 		unsigned i, j, k;
			
 
				 		for (k = 0; k < SIZE; k++)
			
 
				 		for (j = 0; j < SIZE; j++)
			
@@ -122,7 +122,7 @@ int main(int argc, char **argv)
 
				 			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
			
 
				 			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
			
 
				 		}
			
 
				-		starpu_release_data_from_mem(block_handle);
			
 
				+		starpu_data_release_from_mem(block_handle);
			
 
				 
			
 
				 		starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
			
 
				 	}
			
--- a/mpi/tests/mpi_detached_tag.c
+++ b/mpi/tests/mpi_detached_tag.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/mpi_irecv.c
+++ b/mpi/tests/mpi_irecv.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/mpi_irecv_detached.c
+++ b/mpi/tests/mpi_irecv_detached.c
@@ -59,7 +59,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/mpi_isend.c
+++ b/mpi/tests/mpi_isend.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/mpi_isend_detached.c
+++ b/mpi/tests/mpi_isend_detached.c
@@ -59,7 +59,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/mpi_test.c
+++ b/mpi/tests/mpi_test.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/pingpong.c
+++ b/mpi/tests/pingpong.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	tab = malloc(SIZE*sizeof(float));
			
 
				 
			
 
				-	starpu_register_vector_data(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				+	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
--- a/mpi/tests/ring.c
+++ b/mpi/tests/ring.c
@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 
				 	starpu_init(NULL);
			
 
				 	starpu_mpi_initialize();
			
 
				 
			
 
				-	starpu_register_vector_data(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				+	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
@@ -106,9 +106,9 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 		else {
			
 
				 
			
 
				-			starpu_sync_data_with_mem(token_handle, STARPU_R);
			
 
				+			starpu_data_sync_with_mem(token_handle, STARPU_R);
			
 
				 			fprintf(stdout, "Finished : token value %d\n", token);
			
 
				-			starpu_release_data_from_mem(token_handle);
			
 
				+			starpu_data_release_from_mem(token_handle);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/mpi/tests/ring_async.c
+++ b/mpi/tests/ring_async.c
@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 
				 	starpu_init(NULL);
			
 
				 	starpu_mpi_initialize();
			
 
				 
			
 
				-	starpu_register_vector_data(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				+	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				 
			
 
				 	unsigned nloops = NITER;
			
 
				 	unsigned loop;
			
@@ -111,9 +111,9 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 		else {
			
 
				 
			
 
				-			starpu_sync_data_with_mem(token_handle, STARPU_R);
			
 
				+			starpu_data_sync_with_mem(token_handle, STARPU_R);
			
 
				 			fprintf(stdout, "Finished : token value %d\n", token);
			
 
				-			starpu_release_data_from_mem(token_handle);
			
 
				+			starpu_data_release_from_mem(token_handle);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/src/core/policies/sched_policy.c
+++ b/src/core/policies/sched_policy.c
@@ -223,7 +223,7 @@ int _starpu_push_task(starpu_job_t j)
 
				 		
			
 
				 		if (use_prefetch)
			
 
				 		{
			
 
				-			uint32_t memory_node = starpu_get_worker_memory_node(workerid); 
			
 
				+			uint32_t memory_node = starpu_worker_get_memory_node(workerid); 
			
 
				 			_starpu_prefetch_task_input_on_node(task, memory_node);
			
 
				 		}
			
 
				 
			
--- a/src/core/progress_hook.c
+++ b/src/core/progress_hook.c
@@ -30,7 +30,7 @@ struct progression_hook {
 
				 static pthread_mutex_t progression_hook_mutex = PTHREAD_MUTEX_INITIALIZER;
			
 
				 static struct progression_hook hooks[NMAXHOOKS] = {{NULL, NULL, 0}};
			
 
				 
			
 
				-int starpu_register_progression_hook(unsigned (*func)(void *arg), void *arg)
			
 
				+int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
			
 
				 {
			
 
				 	int hook;
			
 
				 	PTHREAD_MUTEX_LOCK(&progression_hook_mutex);
			
@@ -57,7 +57,7 @@ int starpu_register_progression_hook(unsigned (*func)(void *arg), void *arg)
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-void starpu_deregister_progression_hook(int hook_id)
			
 
				+void starpu_progression_hook_deregister(int hook_id)
			
 
				 {
			
 
				 	PTHREAD_MUTEX_LOCK(&progression_hook_mutex);
			
 
				 	hooks[hook_id].active = 0;
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -217,7 +217,7 @@ int starpu_task_submit(struct starpu_task *task)
 
				 void starpu_display_codelet_stats(struct starpu_codelet_t *cl)
			
 
				 {
			
 
				 	unsigned worker;
			
 
				-	unsigned nworkers = starpu_get_worker_count();
			
 
				+	unsigned nworkers = starpu_worker_get_count();
			
 
				 
			
 
				 	if (cl->model && cl->model->symbol)
			
 
				 		fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
			
@@ -230,7 +230,7 @@ void starpu_display_codelet_stats(struct starpu_codelet_t *cl)
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				 		char name[32];
			
 
				-		starpu_get_worker_name(worker, name, 32);
			
 
				+		starpu_worker_get_name(worker, name, 32);
			
 
				 
			
 
				 		fprintf(stderr, "\t%s -> %ld / %ld (%2.2f \%%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total);
			
 
				 	}
			
@@ -241,7 +241,7 @@ void starpu_display_codelet_stats(struct starpu_codelet_t *cl)
 
				  * regenerable is not considered finished until it was explicitely set as
			
 
				  * non-regenerale anymore (eg. from a callback).
			
 
				  */
			
 
				-int starpu_wait_all_tasks(void)
			
 
				+int starpu_task_wait_for_all(void)
			
 
				 {
			
 
				 	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				 		return -EDEADLK;
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -20,7 +20,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				 
			
 
				-/* In order to implement starpu_wait_all_tasks, we keep track of the number of
			
 
				+/* In order to implement starpu_task_wait_for_all, we keep track of the number of
			
 
				  * task currently submitted */
			
 
				 void _starpu_decrement_nsubmitted_tasks(void);
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -526,27 +526,27 @@ void starpu_shutdown(void)
 
				 	PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_worker_count(void)
			
 
				+unsigned starpu_worker_get_count(void)
			
 
				 {
			
 
				 	return config.nworkers;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_cpu_worker_count(void)
			
 
				+unsigned starpu_cpu_worker_get_count(void)
			
 
				 {
			
 
				 	return config.ncpus;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_cuda_worker_count(void)
			
 
				+unsigned starpu_cuda_worker_get_count(void)
			
 
				 {
			
 
				 	return config.ncudagpus;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_opencl_worker_count(void)
			
 
				+unsigned starpu_opencl_worker_get_count(void)
			
 
				 {
			
 
				 	return config.nopenclgpus;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_spu_worker_count(void)
			
 
				+unsigned starpu_spu_worker_get_count(void)
			
 
				 {
			
 
				 	return config.ngordon_spus;
			
 
				 }
			
@@ -555,8 +555,8 @@ unsigned starpu_get_spu_worker_count(void)
 
				  * that actually performed the task. This function returns the id of the
			
 
				  * processing unit actually executing it, therefore it makes no sense to use it
			
 
				  * within the callbacks of SPU functions for instance. If called by some thread
			
 
				- * that is not controlled by StarPU, starpu_get_worker_id returns -1. */
			
 
				-int starpu_get_worker_id(void)
			
 
				+ * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
			
 
				+int starpu_worker_get_id(void)
			
 
				 {
			
 
				 	struct starpu_worker_s * worker;
			
 
				 
			
@@ -572,7 +572,7 @@ int starpu_get_worker_id(void)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int starpu_get_worker_devid(int id)
			
 
				+int starpu_worker_get_devid(int id)
			
 
				 {
			
 
				 	return config.workers[id].devid;
			
 
				 }
			
@@ -582,12 +582,12 @@ struct starpu_worker_s *_starpu_get_worker_struct(unsigned id)
 
				 	return &config.workers[id];
			
 
				 }
			
 
				 
			
 
				-enum starpu_archtype starpu_get_worker_type(int id)
			
 
				+enum starpu_archtype starpu_worker_get_type(int id)
			
 
				 {
			
 
				 	return config.workers[id].arch;
			
 
				 }
			
 
				 
			
 
				-void starpu_get_worker_name(int id, char *dst, size_t maxlen)
			
 
				+void starpu_worker_get_name(int id, char *dst, size_t maxlen)
			
 
				 {
			
 
				 	char *name = config.workers[id].name;
			
 
				 
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -482,7 +482,7 @@ inline void _starpu_set_data_requested_flag_if_needed(starpu_data_handle handle,
 
				 //	pthread_spin_unlock(&handle->header_lock);
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_test_if_data_is_allocated_on_node(starpu_data_handle handle, uint32_t memory_node)
			
 
				+unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node)
			
 
				 {
			
 
				 	return handle->per_node[memory_node].allocated;
			
 
				 } 
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -121,7 +121,7 @@ cudaStream_t *stream;
 
				 						cures = cudaEventCreate(&req->async_channel.cuda_event);
			
 
				 						STARPU_ASSERT(cures == cudaSuccess);
			
 
				 
			
 
				-						stream = starpu_get_local_cuda_stream();
			
 
				+						stream = starpu_cuda_get_local_stream();
			
 
				 						ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
			
 
				 
			
 
				 						cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
			
@@ -184,7 +184,7 @@ cudaStream_t *stream;
 
				 					cures = cudaEventCreate(&req->async_channel.cuda_event);
			
 
				 					STARPU_ASSERT(cures == cudaSuccess);
			
 
				 
			
 
				-					stream = starpu_get_local_cuda_stream();
			
 
				+					stream = starpu_cuda_get_local_stream();
			
 
				 					ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
			
 
				 
			
 
				 					cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -25,7 +25,7 @@ static void map_filter(starpu_data_handle root_handle, starpu_filter *f)
 
				 	if (root_handle->nchildren == 0)
			
 
				 	{
			
 
				 		/* this is a leaf */
			
 
				-		starpu_partition_data(root_handle, f);
			
 
				+		starpu_data_partition(root_handle, f);
			
 
				 	}
			
 
				 	else {
			
 
				 		/* try to apply the starpu_filter recursively */
			
@@ -55,7 +55,7 @@ void starpu_map_filters(starpu_data_handle root_handle, unsigned nfilters, ...)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * example starpu_get_sub_data(starpu_data_handle root_handle, 3, 42, 0, 1);
			
 
				+ * example starpu_data_get_sub_data(starpu_data_handle root_handle, 3, 42, 0, 1);
			
 
				  */
			
 
				 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i)
			
 
				 {
			
@@ -64,7 +64,7 @@ starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i)
 
				 	return &handle->children[i];
			
 
				 }
			
 
				 
			
 
				-starpu_data_handle starpu_get_sub_data(starpu_data_handle root_handle, unsigned depth, ... )
			
 
				+starpu_data_handle starpu_data_get_sub_data(starpu_data_handle root_handle, unsigned depth, ... )
			
 
				 {
			
 
				 	STARPU_ASSERT(root_handle);
			
 
				 	starpu_data_handle current_handle = root_handle;
			
@@ -91,7 +91,7 @@ starpu_data_handle starpu_get_sub_data(starpu_data_handle root_handle, unsigned
 
				  * For now, we assume that partitionned_data is already properly allocated;
			
 
				  * at least by the starpu_filter function !
			
 
				  */
			
 
				-void starpu_partition_data(starpu_data_handle initial_handle, starpu_filter *f)
			
 
				+void starpu_data_partition(starpu_data_handle initial_handle, starpu_filter *f)
			
 
				 {
			
 
				 	int nparts;
			
 
				 	int i;
			
@@ -148,21 +148,21 @@ void starpu_partition_data(starpu_data_handle initial_handle, starpu_filter *f)
 
				 	_starpu_spin_unlock(&initial_handle->header_lock);
			
 
				 }
			
 
				 
			
 
				-void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_node)
			
 
				+void starpu_data_unpartition(starpu_data_handle root_handle, uint32_t gathering_node)
			
 
				 {
			
 
				 	unsigned child;
			
 
				 	unsigned node;
			
 
				 
			
 
				 	_starpu_spin_lock(&root_handle->header_lock);
			
 
				 
			
 
				-#warning starpu_unpartition_data is not supported with NO_DATA_RW_LOCK yet ...
			
 
				+#warning starpu_data_unpartition is not supported with NO_DATA_RW_LOCK yet ...
			
 
				 
			
 
				 	/* first take all the children lock (in order !) */
			
 
				 	for (child = 0; child < root_handle->nchildren; child++)
			
 
				 	{
			
 
				 		/* make sure the intermediate children is unpartitionned as well */
			
 
				 		if (root_handle->children[child].nchildren > 0)
			
 
				-			starpu_unpartition_data(&root_handle->children[child], gathering_node);
			
 
				+			starpu_data_unpartition(&root_handle->children[child], gathering_node);
			
 
				 
			
 
				 		int ret;
			
 
				 		ret = _starpu_fetch_data_on_node(&root_handle->children[child], gathering_node, 1, 0, 0);
			
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -42,7 +42,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(root_handle, 0));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(root_handle, 0));
			
 
				 
			
 
				 	/* each chunk becomes a small dense matrix */
			
 
				 	unsigned chunk;
			
@@ -62,7 +62,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 
				 			local->ld = c;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				struct starpu_bcsr_interface_s *node_interface =
			
 
				 					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 				uint8_t *nzval = (uint8_t *)(node_interface->nzval);
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -106,7 +106,7 @@ static void register_bcsr_handle(starpu_data_handle handle, uint32_t home_node,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_register_bcsr_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				+void starpu_bcsr_data_register(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind,
			
 
				 		uint32_t *rowptr, uint32_t firstentry,
			
 
				 		uint32_t r, uint32_t c, size_t elemsize)
			
@@ -130,15 +130,15 @@ static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle handle)
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				-	hash = _starpu_crc32_be(starpu_get_bcsr_nnz(handle), 0);
			
 
				-	hash = _starpu_crc32_be(starpu_get_bcsr_c(handle), hash);
			
 
				-	hash = _starpu_crc32_be(starpu_get_bcsr_r(handle), hash);
			
 
				+	hash = _starpu_crc32_be(starpu_bcsr_get_nnz(handle), 0);
			
 
				+	hash = _starpu_crc32_be(starpu_bcsr_get_c(handle), hash);
			
 
				+	hash = _starpu_crc32_be(starpu_bcsr_get_r(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_bcsr_nnz(starpu_data_handle handle)
			
 
				+uint32_t starpu_bcsr_get_nnz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -146,7 +146,7 @@ uint32_t starpu_get_bcsr_nnz(starpu_data_handle handle)
 
				 	return interface->nnz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_nrow(starpu_data_handle handle)
			
 
				+uint32_t starpu_bcsr_get_nrow(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -154,7 +154,7 @@ uint32_t starpu_get_bcsr_nrow(starpu_data_handle handle)
 
				 	return interface->nrow;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_firstentry(starpu_data_handle handle)
			
 
				+uint32_t starpu_bcsr_get_firstentry(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -162,7 +162,7 @@ uint32_t starpu_get_bcsr_firstentry(starpu_data_handle handle)
 
				 	return interface->firstentry;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_r(starpu_data_handle handle)
			
 
				+uint32_t starpu_bcsr_get_r(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -170,7 +170,7 @@ uint32_t starpu_get_bcsr_r(starpu_data_handle handle)
 
				 	return interface->r;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_c(starpu_data_handle handle)
			
 
				+uint32_t starpu_bcsr_get_c(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -178,7 +178,7 @@ uint32_t starpu_get_bcsr_c(starpu_data_handle handle)
 
				 	return interface->c;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_bcsr_elemsize(starpu_data_handle handle)
			
 
				+size_t starpu_bcsr_get_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -186,12 +186,12 @@ size_t starpu_get_bcsr_elemsize(starpu_data_handle handle)
 
				 	return interface->elemsize;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_bcsr_local_nzval(starpu_data_handle handle)
			
 
				+uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -199,7 +199,7 @@ uintptr_t starpu_get_bcsr_local_nzval(starpu_data_handle handle)
 
				 	return interface->nzval;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_bcsr_local_colind(starpu_data_handle handle)
			
 
				+uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* XXX 0 */
			
 
				 	starpu_bcsr_interface_t *interface =
			
@@ -208,7 +208,7 @@ uint32_t *starpu_get_bcsr_local_colind(starpu_data_handle handle)
 
				 	return interface->colind;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_bcsr_local_rowptr(starpu_data_handle handle)
			
 
				+uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* XXX 0 */
			
 
				 	starpu_bcsr_interface_t *interface =
			
@@ -222,11 +222,11 @@ static size_t bcsr_interface_get_size(starpu_data_handle handle)
 
				 {
			
 
				 	size_t size;
			
 
				 
			
 
				-	uint32_t nnz = starpu_get_bcsr_nnz(handle);
			
 
				-	uint32_t nrow = starpu_get_bcsr_nrow(handle);
			
 
				-	uint32_t r = starpu_get_bcsr_r(handle);
			
 
				-	uint32_t c = starpu_get_bcsr_c(handle);
			
 
				-	size_t elemsize = starpu_get_bcsr_elemsize(handle);
			
 
				+	uint32_t nnz = starpu_bcsr_get_nnz(handle);
			
 
				+	uint32_t nrow = starpu_bcsr_get_nrow(handle);
			
 
				+	uint32_t r = starpu_bcsr_get_r(handle);
			
 
				+	uint32_t c = starpu_bcsr_get_c(handle);
			
 
				+	size_t elemsize = starpu_bcsr_get_elemsize(handle);
			
 
				 
			
 
				 	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); 
			
 
				 
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -132,7 +132,7 @@ static void register_block_handle(starpu_data_handle handle, uint32_t home_node,
 
				 }
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_block_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				+void starpu_block_data_register(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				 			uint32_t ny, uint32_t nz, size_t elemsize)
			
 
				 {
			
@@ -155,9 +155,9 @@ static uint32_t footprint_block_interface_crc32(starpu_data_handle handle)
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				-	hash = _starpu_crc32_be(starpu_get_block_nx(handle), 0);
			
 
				-	hash = _starpu_crc32_be(starpu_get_block_ny(handle), hash);
			
 
				-	hash = _starpu_crc32_be(starpu_get_block_nz(handle), hash);
			
 
				+	hash = _starpu_crc32_be(starpu_block_get_nx(handle), 0);
			
 
				+	hash = _starpu_crc32_be(starpu_block_get_ny(handle), hash);
			
 
				+	hash = _starpu_crc32_be(starpu_block_get_nz(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
@@ -184,7 +184,7 @@ static size_t block_interface_get_size(starpu_data_handle handle)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_block_nx(starpu_data_handle handle)
			
 
				+uint32_t starpu_block_get_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -192,7 +192,7 @@ uint32_t starpu_get_block_nx(starpu_data_handle handle)
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_ny(starpu_data_handle handle)
			
 
				+uint32_t starpu_block_get_ny(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -200,7 +200,7 @@ uint32_t starpu_get_block_ny(starpu_data_handle handle)
 
				 	return interface->ny;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_nz(starpu_data_handle handle)
			
 
				+uint32_t starpu_block_get_nz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -208,12 +208,12 @@ uint32_t starpu_get_block_nz(starpu_data_handle handle)
 
				 	return interface->nz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_local_ldy(starpu_data_handle handle)
			
 
				+uint32_t starpu_block_get_local_ldy(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 	
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -221,12 +221,12 @@ uint32_t starpu_get_block_local_ldy(starpu_data_handle handle)
 
				 	return interface->ldy;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_local_ldz(starpu_data_handle handle)
			
 
				+uint32_t starpu_block_get_local_ldz(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -234,12 +234,12 @@ uint32_t starpu_get_block_local_ldz(starpu_data_handle handle)
 
				 	return interface->ldz;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle)
			
 
				+uintptr_t starpu_block_get_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -247,7 +247,7 @@ uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle)
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_block_elemsize(starpu_data_handle handle)
			
 
				+size_t starpu_block_get_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
--- a/src/datawizard/interfaces/csr_filters.c
+++ b/src/datawizard/interfaces/csr_filters.c
@@ -39,7 +39,7 @@ void starpu_vertical_block_filter_func_csr(starpu_filter *f, starpu_data_handle
 
				 	/* actually create all the chunks */
			
 
				 	uint32_t chunk_size = (nrow + nchunks - 1)/nchunks;
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(root_handle, 0));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(root_handle, 0));
			
 
				 	uint32_t *rowptr = root_interface->rowptr;
			
 
				 
			
 
				 	unsigned chunk;
			
@@ -70,7 +70,7 @@ void starpu_vertical_block_filter_func_csr(starpu_filter *f, starpu_data_handle
 
				 			local->firstentry = local_firstentry;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				local->rowptr = &root_local->rowptr[first_index];
			
 
				 				local->colind = &root_local->colind[local_firstentry];
			
 
				 				local->nzval = root_local->nzval + local_firstentry * elemsize;
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -102,7 +102,7 @@ static void register_csr_handle(starpu_data_handle handle, uint32_t home_node, v
 
				 }
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_csr_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				+void starpu_csr_data_register(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
			
 
				 {
			
 
				 	starpu_csr_interface_t interface = {
			
@@ -120,11 +120,11 @@ void starpu_register_csr_data(starpu_data_handle *handleptr, uint32_t home_node,
 
				 
			
 
				 static uint32_t footprint_csr_interface_crc32(starpu_data_handle handle)
			
 
				 {
			
 
				-	return _starpu_crc32_be(starpu_get_csr_nnz(handle), 0);
			
 
				+	return _starpu_crc32_be(starpu_csr_get_nnz(handle), 0);
			
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_csr_nnz(starpu_data_handle handle)
			
 
				+uint32_t starpu_csr_get_nnz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -132,7 +132,7 @@ uint32_t starpu_get_csr_nnz(starpu_data_handle handle)
 
				 	return interface->nnz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_csr_nrow(starpu_data_handle handle)
			
 
				+uint32_t starpu_csr_get_nrow(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -140,7 +140,7 @@ uint32_t starpu_get_csr_nrow(starpu_data_handle handle)
 
				 	return interface->nrow;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_csr_firstentry(starpu_data_handle handle)
			
 
				+uint32_t starpu_csr_get_firstentry(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -148,7 +148,7 @@ uint32_t starpu_get_csr_firstentry(starpu_data_handle handle)
 
				 	return interface->firstentry;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_csr_elemsize(starpu_data_handle handle)
			
 
				+size_t starpu_csr_get_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -156,12 +156,12 @@ size_t starpu_get_csr_elemsize(starpu_data_handle handle)
 
				 	return interface->elemsize;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle)
			
 
				+uintptr_t starpu_csr_get_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -169,12 +169,12 @@ uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle)
 
				 	return interface->nzval;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle)
			
 
				+uint32_t *starpu_csr_get_local_colind(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -182,12 +182,12 @@ uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle)
 
				 	return interface->colind;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle)
			
 
				+uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -199,9 +199,9 @@ static size_t csr_interface_get_size(starpu_data_handle handle)
 
				 {
			
 
				 	size_t size;
			
 
				 
			
 
				-	uint32_t nnz = starpu_get_csr_nnz(handle);
			
 
				-	uint32_t nrow = starpu_get_csr_nrow(handle);
			
 
				-	size_t elemsize = starpu_get_csr_elemsize(handle);
			
 
				+	uint32_t nnz = starpu_csr_get_nnz(handle);
			
 
				+	uint32_t nrow = starpu_csr_get_nrow(handle);
			
 
				+	size_t elemsize = starpu_csr_get_elemsize(handle);
			
 
				 
			
 
				 	size = nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
			
 
				 
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -129,7 +129,7 @@ void starpu_data_liberate_interfaces(starpu_data_handle handle)
 
				 		free(handle->interface[node]);
			
 
				 }
			
 
				 
			
 
				-void starpu_delete_data(starpu_data_handle handle)
			
 
				+void starpu_data_unregister(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 
			
--- a/src/datawizard/interfaces/matrix_filters.c
+++ b/src/datawizard/interfaces/matrix_filters.c
@@ -63,7 +63,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 
				 			local->ny = ny;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				starpu_matrix_interface_t *local_root =
			
 
				 					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
@@ -116,7 +116,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 
				 			local->ny = child_ny;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				starpu_matrix_interface_t *local_root =
			
 
				 					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -140,7 +140,7 @@ static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node
 
				 }
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_matrix_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				+void starpu_matrix_data_register(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 			uintptr_t ptr, uint32_t ld, uint32_t nx,
			
 
				 			uint32_t ny, size_t elemsize)
			
 
				 {
			
@@ -159,7 +159,7 @@ void starpu_register_matrix_data(starpu_data_handle *handleptr, uint32_t home_no
 
				 
			
 
				 static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle)
			
 
				 {
			
 
				-	return _starpu_crc32_be(starpu_get_matrix_nx(handle), starpu_get_matrix_ny(handle));
			
 
				+	return _starpu_crc32_be(starpu_matrix_get_nx(handle), starpu_matrix_get_ny(handle));
			
 
				 }
			
 
				 
			
 
				 static void display_matrix_interface(starpu_data_handle handle, FILE *f)
			
@@ -182,7 +182,7 @@ static size_t matrix_interface_get_size(starpu_data_handle handle)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_matrix_nx(starpu_data_handle handle)
			
 
				+uint32_t starpu_matrix_get_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_matrix_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -190,7 +190,7 @@ uint32_t starpu_get_matrix_nx(starpu_data_handle handle)
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_matrix_ny(starpu_data_handle handle)
			
 
				+uint32_t starpu_matrix_get_ny(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_matrix_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -198,12 +198,12 @@ uint32_t starpu_get_matrix_ny(starpu_data_handle handle)
 
				 	return interface->ny;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle)
			
 
				+uint32_t starpu_matrix_get_local_ld(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_matrix_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -211,12 +211,12 @@ uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle)
 
				 	return interface->ld;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle)
			
 
				+uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_matrix_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -224,7 +224,7 @@ uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle)
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_matrix_elemsize(starpu_data_handle handle)
			
 
				+size_t starpu_matrix_get_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_matrix_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -160,7 +160,7 @@ uintptr_t starpu_get_variable_local_ptr(starpu_data_handle handle)
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	return STARPU_GET_VARIABLE_PTR(starpu_data_get_interface_on_node(handle, node));
			
 
				 }
			
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -57,7 +57,7 @@ void starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_h
 
				 			local->nx = child_nx;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				starpu_vector_interface_t *local_root =
			
 
				 					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
@@ -70,7 +70,7 @@ void starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_h
 
				 }
			
 
				 
			
 
				 
			
 
				-void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
			
 
				+void starpu_vector_divide_in_2_filter_func(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	uint32_t length_first = f->filter_arg;
			
 
				 
			
@@ -97,7 +97,7 @@ void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle
 
				 		local->nx = length_first;
			
 
				 		local->elemsize = elemsize;
			
 
				 
			
 
				-		if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+		if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 			starpu_vector_interface_t *local_root =
			
 
				 				starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
@@ -118,7 +118,7 @@ void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle
 
				 		local->nx = nx - length_first;
			
 
				 		local->elemsize = elemsize;
			
 
				 
			
 
				-		if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+		if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 			starpu_vector_interface_t *local_root =
			
 
				 				starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
@@ -129,7 +129,7 @@ void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
			
 
				+void starpu_vector_list_filter_func(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	uint32_t nchunks = f->filter_arg;
			
 
				 	uint32_t *length_tab = f->filter_arg_ptr;
			
@@ -162,7 +162,7 @@ void starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_ha
 
				 			local->nx = chunk_size;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
			
 
				+			if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
			
 
				 				starpu_vector_interface_t *local_root =
			
 
				 					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -130,7 +130,7 @@ int convert_vector_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t
 
				 #endif
			
 
				 
			
 
				 /* declare a new data with the vector interface */
			
 
				-void starpu_register_vector_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				+void starpu_vector_data_register(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t nx, size_t elemsize)
			
 
				 {
			
 
				 	starpu_vector_interface_t vector = {
			
@@ -147,7 +147,7 @@ void starpu_register_vector_data(starpu_data_handle *handleptr, uint32_t home_no
 
				 
			
 
				 static uint32_t footprint_vector_interface_crc32(starpu_data_handle handle)
			
 
				 {
			
 
				-	return _starpu_crc32_be(starpu_get_vector_nx(handle), 0);
			
 
				+	return _starpu_crc32_be(starpu_vector_get_nx(handle), 0);
			
 
				 }
			
 
				 
			
 
				 static void display_vector_interface(starpu_data_handle handle, FILE *f)
			
@@ -170,7 +170,7 @@ static size_t vector_interface_get_size(starpu_data_handle handle)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_vector_nx(starpu_data_handle handle)
			
 
				+uint32_t starpu_vector_get_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
@@ -178,12 +178,12 @@ uint32_t starpu_get_vector_nx(starpu_data_handle handle)
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle)
			
 
				+uintptr_t starpu_vector_get_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				 
			
 
				 	starpu_vector_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
@@ -191,7 +191,7 @@ uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle)
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_vector_elemsize(starpu_data_handle handle)
			
 
				+size_t starpu_vector_get_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -144,7 +144,7 @@ void _starpu_memory_node_attach_queue(struct starpu_jobq_s *q, unsigned nodeid)
 
				 	pthread_rwlock_unlock(&descr.attached_queues_rwlock);
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_get_worker_memory_node(unsigned workerid)
			
 
				+unsigned starpu_worker_get_memory_node(unsigned workerid)
			
 
				 {
			
 
				 	struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
			
 
				 
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -21,7 +21,7 @@
 
				 #include <datawizard/write_back.h>
			
 
				 #include <core/dependencies/data_concurrency.h>
			
 
				 
			
 
				-int starpu_request_data_allocation(starpu_data_handle handle, uint32_t node)
			
 
				+int starpu_data_request_allocation(starpu_data_handle handle, uint32_t node)
			
 
				 {
			
 
				 	starpu_data_request_t r;
			
 
				 
			
@@ -68,7 +68,7 @@ static inline void _starpu_sync_data_with_mem_continuation(void *arg)
 
				 	
			
 
				 	if (statenode->non_blocking)
			
 
				 	{
			
 
				-		/* continuation of starpu_sync_data_with_mem_non_blocking: we
			
 
				+		/* continuation of starpu_data_sync_with_mem_non_blocking: we
			
 
				 		 * execute the callback if any  */
			
 
				 		if (statenode->callback)
			
 
				 			statenode->callback(statenode->callback_arg);
			
@@ -76,7 +76,7 @@ static inline void _starpu_sync_data_with_mem_continuation(void *arg)
 
				 		free(statenode);
			
 
				 	}
			
 
				 	else {
			
 
				-		/* continuation of starpu_sync_data_with_mem */
			
 
				+		/* continuation of starpu_data_sync_with_mem */
			
 
				 		PTHREAD_MUTEX_LOCK(&statenode->lock);
			
 
				 		statenode->finished = 1;
			
 
				 		PTHREAD_COND_SIGNAL(&statenode->cond);
			
@@ -84,8 +84,8 @@ static inline void _starpu_sync_data_with_mem_continuation(void *arg)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/* The data must be released by calling starpu_release_data_from_mem later on */
			
 
				-int starpu_sync_data_with_mem(starpu_data_handle handle, starpu_access_mode mode)
			
 
				+/* The data must be released by calling starpu_data_release_from_mem later on */
			
 
				+int starpu_data_sync_with_mem(starpu_data_handle handle, starpu_access_mode mode)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
@@ -123,8 +123,8 @@ int starpu_sync_data_with_mem(starpu_data_handle handle, starpu_access_mode mode
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/* The data must be released by calling starpu_release_data_from_mem later on */
			
 
				-int starpu_sync_data_with_mem_non_blocking(starpu_data_handle handle,
			
 
				+/* The data must be released by calling starpu_data_release_from_mem later on */
			
 
				+int starpu_data_sync_with_mem_non_blocking(starpu_data_handle handle,
			
 
				 		starpu_access_mode mode, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
@@ -154,9 +154,9 @@ int starpu_sync_data_with_mem_non_blocking(starpu_data_handle handle,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/* This function must be called after starpu_sync_data_with_mem so that the
			
 
				+/* This function must be called after starpu_data_sync_with_mem so that the
			
 
				  * application release the data */
			
 
				-void starpu_release_data_from_mem(starpu_data_handle handle)
			
 
				+void starpu_data_release_from_mem(starpu_data_handle handle)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
@@ -229,7 +229,7 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle handle, unsigned
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int starpu_prefetch_data_on_node(starpu_data_handle handle, unsigned node, unsigned async)
			
 
				+int starpu_data_prefetch_on_node(starpu_data_handle handle, unsigned node, unsigned async)
			
 
				 {
			
 
				 	return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R);
			
 
				 }
			
@@ -238,7 +238,7 @@ int starpu_prefetch_data_on_node(starpu_data_handle handle, unsigned node, unsig
 
				  *	It is possible to specify that a piece of data can be discarded without
			
 
				  *	impacting the application.
			
 
				  */
			
 
				-void starpu_advise_if_data_is_important(starpu_data_handle handle, unsigned is_important)
			
 
				+void starpu_data_advise_as_important(starpu_data_handle handle, unsigned is_important)
			
 
				 {
			
 
				 	_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
@@ -249,7 +249,7 @@ void starpu_advise_if_data_is_important(starpu_data_handle handle, unsigned is_i
 
				 		/* make sure the intermediate children is advised as well */
			
 
				 		struct starpu_data_state_t *child_handle = &handle->children[child];
			
 
				 		if (child_handle->nchildren > 0)
			
 
				-			starpu_advise_if_data_is_important(child_handle, is_important);
			
 
				+			starpu_data_advise_as_important(child_handle, is_important);
			
 
				 	}
			
 
				 
			
 
				 	handle->is_not_important = !is_important;
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -25,9 +25,9 @@ static int ncudagpus;
 
				 
			
 
				 static cudaStream_t streams[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-cudaStream_t *starpu_get_local_cuda_stream(void)
			
 
				+cudaStream_t *starpu_cuda_get_local_stream(void)
			
 
				 {
			
 
				-	int worker = starpu_get_worker_id();
			
 
				+	int worker = starpu_worker_get_id();
			
 
				 
			
 
				 	return &streams[worker];
			
 
				 }
			
@@ -43,7 +43,7 @@ static void init_context(int devid)
 
				 	/* force CUDA to initialize the context for real */
			
 
				 	cudaFree(0);
			
 
				 
			
 
				-	cures = cudaStreamCreate(starpu_get_local_cuda_stream());
			
 
				+	cures = cudaStreamCreate(starpu_cuda_get_local_stream());
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
--- a/src/util/execute_on_all.c
+++ b/src/util/execute_on_all.c
@@ -32,7 +32,7 @@ static void wrapper_func(void *buffers[] __attribute__ ((unused)), void *_args)
 
				 void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where)
			
 
				 {
			
 
				 	unsigned worker;
			
 
				-	unsigned nworkers = starpu_get_worker_count();
			
 
				+	unsigned nworkers = starpu_worker_get_count();
			
 
				 
			
 
				 	/* create a wrapper codelet */
			
 
				 	struct starpu_codelet_t wrapper_cl = {
			
--- a/src/util/malloc.c
+++ b/src/util/malloc.c
@@ -69,7 +69,7 @@ static starpu_codelet malloc_pinned_cl = {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				-int starpu_malloc_pinned_if_possible(void **A, size_t dim)
			
 
				+int starpu_data_malloc_pinned_if_possible(void **A, size_t dim)
			
 
				 {
			
 
				 	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				 		return -EDEADLK;
			
@@ -161,7 +161,7 @@ static starpu_codelet free_pinned_cl = {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				-int starpu_free_pinned_if_possible(void *A)
			
 
				+int starpu_data_free_pinned_if_possible(void *A)
			
 
				 {
			
 
				 	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				 		return -EDEADLK;
			
--- a/src/util/starpu_cublas.c
+++ b/src/util/starpu_cublas.c
@@ -31,14 +31,14 @@ static void shutdown_cublas_func(void *args __attribute__((unused)))
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-void starpu_helper_init_cublas(void)
			
 
				+void starpu_helper_cublas_init(void)
			
 
				 {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	starpu_execute_on_each_worker(init_cublas_func, NULL, STARPU_CUDA);
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-void starpu_helper_shutdown_cublas(void)
			
 
				+void starpu_helper_cublas_shutdown(void)
			
 
				 {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	starpu_execute_on_each_worker(shutdown_cublas_func, NULL, STARPU_CUDA);
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -74,7 +74,7 @@ check_PROGRAMS += 				\
 
				 	core/restart				\
			
 
				 	core/execute_on_a_specific_worker	\
			
 
				 	core/multithreaded			\
			
 
				-	core/starpu_wait_all_tasks		\
			
 
				+	core/starpu_task_wait_for_all		\
			
 
				 	core/starpu_task_wait			\
			
 
				 	core/static_restartable			\
			
 
				 	core/static_restartable_using_initializer\
			
@@ -124,8 +124,8 @@ core_execute_on_a_specific_worker_SOURCES = 	\
 
				 core_multithreaded_SOURCES =			\
			
 
				 	core/multithreaded.c
			
 
				 
			
 
				-core_starpu_wait_all_tasks_SOURCES =		\
			
 
				-	core/starpu_wait_all_tasks.c
			
 
				+core_starpu_task_wait_for_all_SOURCES =		\
			
 
				+	core/starpu_task_wait_for_all.c
			
 
				 
			
 
				 core_starpu_task_wait_SOURCES =			\
			
 
				 	core/starpu_task_wait.c
			
--- a/tests/core/declare_deps_after_submission.c
+++ b/tests/core/declare_deps_after_submission.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/tests/core/declare_deps_after_submission_synchronous.c
+++ b/tests/core/declare_deps_after_submission_synchronous.c
@@ -78,7 +78,7 @@ int main(int argc, char **argv)
 
				 		starpu_task_destroy(taskA);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/tests/core/declare_deps_in_callback.c
+++ b/tests/core/declare_deps_in_callback.c
@@ -76,7 +76,7 @@ int main(int argc, char **argv)
 
				 		starpu_task_submit(taskA);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/tests/core/execute_on_a_specific_worker.c
+++ b/tests/core/execute_on_a_specific_worker.c
@@ -51,7 +51,7 @@ static void callback(void *arg)
 
				 
			
 
				 static void codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
			
 
				 {
			
 
				-//	int id = starpu_get_worker_id();
			
 
				+//	int id = starpu_worker_get_id();
			
 
				 //	fprintf(stderr, "worker #%d\n", id);
			
 
				 }
			
 
				 
			
@@ -83,10 +83,10 @@ int main(int argc, char **argv)
 
				 {
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	starpu_malloc_pinned_if_possible((void **)&v, VECTORSIZE*sizeof(unsigned));
			
 
				-	starpu_register_vector_data(&v_handle, 0, (uintptr_t)v, VECTORSIZE, sizeof(unsigned));
			
 
				+	starpu_data_malloc_pinned_if_possible((void **)&v, VECTORSIZE*sizeof(unsigned));
			
 
				+	starpu_vector_data_register(&v_handle, 0, (uintptr_t)v, VECTORSIZE, sizeof(unsigned));
			
 
				 
			
 
				-	unsigned nworker = starpu_get_worker_count();
			
 
				+	unsigned nworker = starpu_worker_get_count();
			
 
				 
			
 
				 	cnt = nworker*N;
			
 
				 
			
--- a/tests/core/get_current_task.c
+++ b/tests/core/get_current_task.c
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 	
			
 
				 	fprintf(stderr, "#empty tasks : %d\n", ntasks);
			
 
				 
			
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/tests/core/multithreaded.c
+++ b/tests/core/multithreaded.c
@@ -57,7 +57,7 @@ void *thread_func(void *arg __attribute__((unused)))
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				 
			
 
				-	starpu_wait_all_tasks();
			
 
				+	starpu_task_wait_for_all();
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
--- a/tests/core/starpu_wait_all_tasks.c
+++ b/tests/core/starpu_wait_all_tasks.c