12 anos atrás · 922885d4da
--- a/AUTHORS
+++ b/AUTHORS
@@ -15,6 +15,7 @@ Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
 
				 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
			
 
				 Cyril Roelandt <cyril.roelandt@inria.fr>
			
 
				 Anthony Roy <theanthony33@gmail.com>
			
 
				+Corentin Salingue <corentin.salingue@gmail.com>
			
 
				 Ludovic Stordeur <ludovic.stordeur@inria.fr>
			
 
				 François Tessier <francois.tessier@inria.fr>
			
 
				 Samuel Thibault <samuel.thibault@labri.fr>
			
--- a/ChangeLog
+++ b/ChangeLog
@@ -32,6 +32,14 @@ New features:
 
				 	  the envelope.
			
 
				   * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
			
 
				     let starpu commute write accesses.
			
 
				+  * Out-of-core support, through registration of disk areas as additional memory
			
 
				+    nodes.
			
 
				+  * StarPU-MPI: new function
			
 
				+    starpu_mpi_irecv_detached_sequential_consistency which allows to
			
 
				+    enable or disable the sequential consistency for the given data
			
 
				+    handle (sequential consistency will be enabled or disabled based
			
 
				+    on the value of the function parameter and the value of the
			
 
				+    sequential consistency defined for the given data)
			
 
				 
			
 
				 Small features:
			
 
				   * Add cl_arg_free field to enable automatic free(cl_arg) on task
			
@@ -39,7 +47,15 @@ Small features:
 
				   * New functions starpu_data_acquire_cb_sequential_consistency() and
			
 
				     starpu_data_acquire_on_node_cb_sequential_consistency() which allows
			
 
				     to enable or disable sequential consistency
			
 
				+  * New configure option --enable-fxt-lock which enables additional
			
 
				+    trace events focused on locks behaviour during the execution
			
 
				 
			
 
				+Changes:
			
 
				+  * Fix of the livelock issue discovered while executing applications
			
 
				+    on a CPU+GPU cluster of machines by adding a maximum trylock 
			
 
				+    threshold before a blocking lock.
			
 
				+  * Data interfaces (variable, vector, matrix and block) now define
			
 
				+    pack und unpack functions
			
 
				 
			
 
				 StarPU 1.1.0 (svn revision xxxx)
			
 
				 ==============================================
			
--- a/Makefile.am
+++ b/Makefile.am
@@ -79,6 +79,7 @@ versinclude_HEADERS = 				\
 
				 	include/starpu_deprecated_api.h         \
			
 
				 	include/starpu_hash.h			\
			
 
				 	include/starpu_rand.h			\
			
 
				+	include/starpu_disk.h			\
			
 
				 	include/starpu_cublas.h			\
			
 
				 	include/starpu_driver.h			\
			
 
				 	include/starpu_stdlib.h			\
			
--- a/configure.ac
+++ b/configure.ac
@@ -134,8 +134,13 @@ case "$target" in
 
				   libext=a
			
 
				   AC_DEFINE(STARPU_HAVE_WINDOWS, [], [Define this on windows.])
			
 
				   ;;
			
 
				+*-*-linux*)
			
 
				+  starpu_linux=yes
			
 
				+  AC_DEFINE(STARPU_LINUX_SYS, 1, [Define to 1 on Linux])
			
 
				+  ;;
			
 
				 esac
			
 
				 AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"])
			
 
				+AM_CONDITIONAL([STARPU_LINUX_SYS], [test "x$starpu_linux" = "xyes"])
			
 
				 
			
 
				 # on Darwin, GCC targets i386 by default, so we don't have atomic ops
			
 
				 AC_CHECK_SIZEOF([void *])
			
@@ -959,13 +964,13 @@ AC_MSG_RESULT($nmaxmicdev)
 
				 AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
			
 
				 	[maximum number of MIC devices])
			
 
				 
			
 
				-AC_MSG_CHECKING(maximum number of MIC cores)
			
 
				-AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmiccore=<number>],
			
 
				-			[maximum number of MIC cores])],
			
 
				-			nmaxmiccore=$enableval, nmaxmiccore=128)
			
 
				-AC_MSG_RESULT($nmaxmiccore)
			
 
				+AC_MSG_CHECKING(maximum number of MIC threads)
			
 
				+AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
			
 
				+			[maximum number of MIC threads])],
			
 
				+			nmaxmicthreads=$enableval, nmaxmicthreads=128)
			
 
				+AC_MSG_RESULT($nmaxmicthread)
			
 
				 
			
 
				-AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmiccore],
			
 
				+AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
			
 
				 	[maximum number of MIC cores])
			
 
				 
			
 
				 AC_ARG_WITH(coi-dir,
			
@@ -1379,38 +1384,48 @@ AC_MSG_RESULT($nmaxbuffers)
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
			
 
				 		[how many buffers can be manipulated per task])
			
 
				 
			
 
				-# TODO: add option to choose maxnodes
			
 
				-if test x$enable_simgrid = xyes ; then
			
 
				-	# We still need the room for the virtual CUDA/OpenCL devices
			
 
				-	maxnodes=16
			
 
				-else
			
 
				-	# We have one memory node shared by all CPU workers, one node per GPU
			
 
				-	# and per MIC device
			
 
				-	nodes=1
			
 
				-	if test x$enable_cuda = xyes ; then
			
 
				-		# we could have used nmaxcudadev + 1, but this would certainly give an
			
 
				-		# odd number.
			
 
				-		nodes=`expr $nodes + $nmaxcudadev`
			
 
				-	fi
			
 
				-	if test x$enable_opencl = xyes ; then
			
 
				-		# we could have used nmaxcudadev + 1, but this would certainly give an
			
 
				-		# odd number.
			
 
				-		nodes=`expr $nodes + $nmaxopencldev`
			
 
				-	fi
			
 
				-	if test x$enable_mic = xyes ; then
			
 
				-		nodes=`expr $nodes + $nmaxmicdev`
			
 
				-	fi
			
 
				-	if test x$enable_rcce = xyes ; then
			
 
				-		# Only 1 memory node for the shared memory.
			
 
				-		nodes=`expr $nodes + 1`
			
 
				-	fi
			
 
				+AC_MSG_CHECKING(maximum number of nodes to use)
			
 
				+AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=<nnodes>],
			
 
				+			[maximum number of nodes])],
			
 
				+			maxnodes=$enableval, maxnodes=0)
			
 
				 
			
 
				-	# set maxnodes to the next power of 2 greater than nodes
			
 
				-	maxnodes=1
			
 
				-	while test "$maxnodes" -lt "$nodes"
			
 
				-	do
			
 
				-		maxnodes=`expr $maxnodes \* 2`
			
 
				-	done
			
 
				+if test x$maxnodes = x0 ; then
			
 
				+	if test x$enable_simgrid = xyes ; then
			
 
				+		# We still need the room for the virtual CUDA/OpenCL devices
			
 
				+		maxnodes=16
			
 
				+	else
			
 
				+		# We have one memory node shared by all CPU workers, one node per GPU
			
 
				+		# and per MIC device
			
 
				+		# we add nodes to use 4 memory disks
			
 
				+		nodes=5
			
 
				+		if test x$enable_cuda = xyes ; then
			
 
				+			# we could have used nmaxcudadev + 1, but this would certainly give an
			
 
				+			# odd number.
			
 
				+			nodes=`expr $nodes + $nmaxcudadev`
			
 
				+		fi
			
 
				+		if test x$enable_opencl = xyes ; then
			
 
				+			# we could have used nmaxcudadev + 1, but this would certainly give an
			
 
				+			# odd number.
			
 
				+			nodes=`expr $nodes + $nmaxopencldev`
			
 
				+		fi
			
 
				+		if test x$enable_mic = xyes ; then
			
 
				+			nodes=`expr $nodes + $nmaxmicdev`
			
 
				+		fi
			
 
				+		if test x$enable_rcce = xyes ; then
			
 
				+			# Only 1 memory node for the shared memory.
			
 
				+			nodes=`expr $nodes + 1`
			
 
				+		fi
			
 
				+
			
 
				+		# set maxnodes to the next power of 2 greater than nodes
			
 
				+		maxnodes=1
			
 
				+		while test "$maxnodes" -lt "$nodes"
			
 
				+		do
			
 
				+			maxnodes=`expr $maxnodes \* 2`
			
 
				+		done
			
 
				+ 	fi
			
 
				+fi
			
 
				+if test $maxnodes -gt 32 ; then
			
 
				+	AC_MSG_ERROR([selected number of nodes ($maxnodes) can not be greater than 32])
			
 
				 fi
			
 
				 
			
 
				 AC_MSG_CHECKING(maximum number of memory nodes)
			
@@ -1456,7 +1471,7 @@ AC_CHECK_FUNCS([clock_gettime])
 
				 
			
 
				 # Compute the maximum number of workers (we round it to 16 for alignment
			
 
				 # purposes).
			
 
				-nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmiccore + $nmaxsccdev + 15 \) / 16 \) `
			
 
				+nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmicthreads + $nmaxsccdev + 15 \) / 16 \) `
			
 
				 AC_MSG_CHECKING(Maximum number of workers)
			
 
				 AC_MSG_RESULT($nmaxworkers)
			
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
			
@@ -2273,6 +2288,7 @@ AC_MSG_NOTICE([
 
				 	Maximum number of CPUs:           $maxcpus
			
 
				 	Maximum number of CUDA devices:   $nmaxcudadev
			
 
				 	Maximum number of OpenCL devices: $nmaxopencldev
			
 
				+	Maximum number of MIC threads:    $nmaxmicthreads
			
 
				 	Maximum number of memory nodes:   $maxnodes
			
 
				 	Maximum number of task buffers:   $nmaxbuffers
			
 
				 
			
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -36,6 +36,7 @@ chapters =	\
 
				 	chapters/scheduling_context_hypervisor.doxy \
			
 
				 	chapters/scheduling_contexts.doxy \
			
 
				 	chapters/modularized_scheduler.doxy \
			
 
				+	chapters/out_of_core.doxy \
			
 
				 	chapters/socl_opencl_extensions.doxy \
			
 
				 	chapters/tips_and_tricks.doxy \
			
 
				 	chapters/environment_variables.doxy \
			
@@ -59,11 +60,14 @@ chapters =	\
 
				 	chapters/code/vector_scal_cuda.cu \
			
 
				 	chapters/code/vector_scal_opencl.c \
			
 
				 	chapters/code/vector_scal_opencl_codelet.cl \
			
 
				+	chapters/code/disk_copy.c \
			
 
				+	chapters/code/disk_compute.c \
			
 
				 	chapters/api/codelet_and_tasks.doxy \
			
 
				 	chapters/api/cuda_extensions.doxy \
			
 
				 	chapters/api/data_interfaces.doxy \
			
 
				 	chapters/api/data_management.doxy \
			
 
				 	chapters/api/data_partition.doxy \
			
 
				+	chapters/api/data_out_of_core.doxy \
			
 
				 	chapters/api/expert_mode.doxy \
			
 
				 	chapters/api/explicit_dependencies.doxy \
			
 
				 	chapters/api/fft_support.doxy \
			
@@ -106,7 +110,7 @@ chapters/version.sty: $(chapters)
 
				 	@if test -s timestamp_updated ; then \
			
 
				 		echo "\newcommand{\STARPUUPDATED}{"`cat timestamp_updated`"}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
			
 
				 	else \
			
 
				-		echo "\newcommand{\STARPUUPDATED}{unknown_date}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
			
 
				+		echo "\newcommand{\STARPUUPDATED}{unknown date}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
			
 
				 	fi
			
 
				 	@echo "\newcommand{\STARPUVERSION}{$(VERSION)}" >> $(top_srcdir)/doc/doxygen/chapters/version.sty
			
 
				 	@-for f in timestamp timestamp_updated timestamp_updated_month ; do \
			
@@ -146,6 +150,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 
				 	$(top_srcdir)/include/starpu.h			\
			
 
				 	$(top_srcdir)/include/starpu_data_filters.h	\
			
 
				 	$(top_srcdir)/include/starpu_data_interfaces.h	\
			
 
				+	$(top_srcdir)/include/starpu_disk.h		\
			
 
				 	$(top_srcdir)/include/starpu_worker.h		\
			
 
				 	$(top_srcdir)/include/starpu_task.h		\
			
 
				 	$(top_srcdir)/include/starpu_task_bundle.h	\
			
--- a/doc/doxygen/chapters/advanced_examples.doxy
+++ b/doc/doxygen/chapters/advanced_examples.doxy
@@ -201,7 +201,7 @@ int vector[NX];
 
				 starpu_data_handle_t handle;
			
 
				 
			
 
				 /* Declare data to StarPU */
			
 
				-starpu_vector_data_register(&handle, 0, (uintptr_t)vector,
			
 
				+starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				                             NX, sizeof(vector[0]));
			
 
				 
			
 
				 /* Partition the vector in PARTS sub-vectors */
			
@@ -394,6 +394,19 @@ there is some hidden parameter such as the number of iterations, etc.
 
				 The example in the directory <c>examples/pi</c> uses this to include
			
 
				 the number of iterations in the base.
			
 
				 
			
 
				+StarPU will automatically determine when the performance model is calibrated,
			
 
				+or rather, it will assume the performance model is calibrated until the
			
 
				+application submits a task for which the performance can not be predicted. For
			
 
				+::STARPU_HISTORY_BASED, StarPU will require 10 (::_STARPU_CALIBRATION_MINIMUM)
			
 
				+measurements for a given size before estimating that an average can be taken as
			
 
				+estimation for further executions with the same size. For
			
 
				+::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED, StarPU will require
			
 
				+10 (::_STARPU_CALIBRATION_MINIMUM) measurements, and that the minimum measured
			
 
				+data size is smaller than 90% of the maximum measured data size (i.e. the
			
 
				+measurement interval is large enough for a regression to have a meaning).
			
 
				+Calibration can also be forced by setting the \ref STARPU_CALIBRATE environment
			
 
				+variable to <c>1</c>, or even reset by setting it to <c>2</c>.
			
 
				+
			
 
				 How to use schedulers which can benefit from such performance model is explained
			
 
				 in \ref TaskSchedulingPolicy.
			
 
				 
			
@@ -1106,8 +1119,7 @@ Complex data interfaces can then be registered to StarPU.
 
				 
			
 
				 \code{.c}
			
 
				 double real = 45.0;
			
 
				-double imaginary = 12.0;
			
 
				-starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
			
 
				+double imaginary = 12.0;starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
			
 
				 starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
			
 
				 \endcode
			
 
				 
			
--- a/doc/doxygen/chapters/api/codelet_and_tasks.doxy
+++ b/doc/doxygen/chapters/api/codelet_and_tasks.doxy
@@ -82,6 +82,10 @@ specify the codelet may be executed on a MIC processing unit.
 
				 This macro is used when setting the field starpu_codelet::where to
			
 
				 specify the codelet may be executed on an SCC processing unit.
			
 
				 
			
 
				+\def STARPU_MAIN_RAM
			
 
				+\ingroup API_Codelet_And_Tasks
			
 
				+This macro is used when the RAM memory node is specified.
			
 
				+
			
 
				 \def STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
			
 
				 \deprecated
			
 
				 \ingroup API_Codelet_And_Tasks
			
--- a/doc/doxygen/chapters/api/data_interfaces.doxy
+++ b/doc/doxygen/chapters/api/data_interfaces.doxy
@@ -212,7 +212,7 @@ Here an example of how to use the function.
 
				 \code{.c}
			
 
				 float var;
			
 
				 starpu_data_handle_t var_handle;
			
 
				-starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
			
 
				+starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
			
 
				 \endcode
			
 
				 
			
 
				 \fn void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize)
			
@@ -223,7 +223,7 @@ Here an example of how to use the function.
 
				 \code{.c}
			
 
				 float vector[NX];
			
 
				 starpu_data_handle_t vector_handle;
			
 
				-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 \endcode
			
 
				 
			
 
				 \fn void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize)
			
@@ -238,7 +238,7 @@ Here an example of how to use the function.
 
				 float *matrix;
			
 
				 starpu_data_handle_t matrix_handle;
			
 
				 matrix = (float*)malloc(width * height * sizeof(float));
			
 
				-starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				+starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				 \endcode
			
 
				 
			
 
				 \fn void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize)
			
@@ -252,7 +252,7 @@ Here an example of how to use the function.
 
				 float *block;
			
 
				 starpu_data_handle_t block_handle;
			
 
				 block = (float*)malloc(nx*ny*nz*sizeof(float));
			
 
				-starpu_block_data_register(&block_handle, 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				+starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				 \endcode
			
 
				 
			
 
				 \fn void starpu_bcsr_data_register(starpu_data_handle_t *handle, unsigned home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize)
			
--- a/doc/doxygen/chapters/api/data_out_of_core.doxy
+++ b/doc/doxygen/chapters/api/data_out_of_core.doxy
@@ -0,0 +1,48 @@
 
				+/*
			
 
				+ * This file is part of the StarPU Handbook.
			
 
				+ * Copyright (C) 2013 Corentin Salingue
			
 
				+ * See the file version.doxy for copying conditions.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+/*! \defgroup API_Out_Of_Core Out Of Core
			
 
				+
			
 
				+
			
 
				+
			
 
				+\struct starpu_disk_ops
			
 
				+\ingroup API_Out_Of_Core
			
 
				+This is a set of functions to manipulate datas on disk.
			
 
				+
			
 
				+\fn int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, size_t size) 
			
 
				+\ingroup API_Out_Of_Core
			
 
				+Register a disk memory node with a set of functions to manipulate datas. <br />
			
 
				+SUCCESS: return the disk node. <br />
			
 
				+FAIL: return an error code. <br />
			
 
				+The \p size must be at least 1 MB !
			
 
				+
			
 
				+\fn void * starpu_disk_open (unsigned node, void *pos, size_t size)
			
 
				+\ingroup API_Out_Of_Core
			
 
				+Add an existing file memory in a disk node. The \p pos is defined in the starpu_disk_ops. \p size: this is a size of your file.
			
 
				+\p pos is the name of the file.
			
 
				+
			
 
				+\fn void starpu_disk_close (unsigned node, void *obj, size_t size)
			
 
				+\ingroup API_Out_Of_Core
			
 
				+Close an existing file memory opened with starpu_disk_open.
			
 
				+
			
 
				+\var starpu_disk_stdio_ops
			
 
				+\ingroup API_Out_Of_Core
			
 
				+This set uses the stdio library (fwrite, fread...) to read/write on disk. <br />
			
 
				+<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				+
			
 
				+\var starpu_disk_unistd_ops
			
 
				+\ingroup API_Out_Of_Core
			
 
				+This set uses the unistd library (write, read...) to read/write on disk. <br />
			
 
				+<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				+
			
 
				+\var starpu_disk_unistd_o_direct_ops
			
 
				+\ingroup API_Out_Of_Core
			
 
				+This set uses the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. <br />
			
 
				+<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				+Only available on Linux.
			
 
				+
			
 
				+*/
			
--- a/doc/doxygen/chapters/api/data_partition.doxy
+++ b/doc/doxygen/chapters/api/data_partition.doxy
@@ -54,12 +54,12 @@ starpu_data_partition(A_handle, &f);
 
				 \ingroup API_Data_Partition
			
 
				 This unapplies one filter, thus unpartitioning the data. The
			
 
				 pieces of data are collected back into one big piece in the
			
 
				-\p gathering_node (usually 0). Tasks working on the partitioned data must
			
 
				+\p gathering_node (usually STARPU_MAIN_RAM). Tasks working on the partitioned data must
			
 
				 be already finished when calling starpu_data_unpartition().
			
 
				 
			
 
				 Here an example of how to use the function.
			
 
				 \code{.c}
			
 
				-starpu_data_unpartition(A_handle, 0);
			
 
				+starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				 \endcode
			
 
				 
			
 
				 \fn int starpu_data_get_nb_children(starpu_data_handle_t handle)
			
--- a/doc/doxygen/chapters/api/mpi.doxy
+++ b/doc/doxygen/chapters/api/mpi.doxy
@@ -98,6 +98,22 @@ communication completes, its resources are automatically released back
 
				 to the system, there is no need to test or to wait for the completion
			
 
				 of the request.
			
 
				 
			
 
				+\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				+\ingroup API_MPI_Support
			
 
				+Posts a nonblocking receive in \p data_handle from the node \p source
			
 
				+using the message tag \p mpi_tag within the communicator \p comm. On
			
 
				+completion, the \p callback function is called with the argument \p
			
 
				+arg.
			
 
				+The parameter \p sequential_consistency allows to enable or disable
			
 
				+the sequential consistency for \p data handle (sequential consistency
			
 
				+will be enabled or disabled based on the value of the parameter \p
			
 
				+sequential_consistency and the value of the sequential consistency
			
 
				+defined for \p data_handle).
			
 
				+Similarly to the pthread detached functionality, when a detached
			
 
				+communication completes, its resources are automatically released back
			
 
				+to the system, there is no need to test or to wait for the completion
			
 
				+of the request.
			
 
				+
			
 
				 \fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
			
 
				 \ingroup API_MPI_Support
			
 
				 Returns when the operation identified by request \p req is complete.
			
--- a/doc/doxygen/chapters/basic_examples.doxy
+++ b/doc/doxygen/chapters/basic_examples.doxy
@@ -529,14 +529,14 @@ The following lines show how to declare an array of <c>NX</c> elements of type
 
				 float vector[NX];
			
 
				 
			
 
				 starpu_data_handle_t vector_handle;
			
 
				-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
			
 
				+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
			
 
				                             sizeof(vector[0]));
			
 
				 \endcode
			
 
				 
			
 
				 The first argument, called the <b>data handle</b>, is an opaque pointer which
			
 
				 designates the array in StarPU. This is also the structure which is used to
			
 
				 describe which data is used by a task. The second argument is the node number
			
 
				-where the data originally resides. Here it is 0 since the array <c>vector</c> is in
			
 
				+where the data originally resides. Here it is STARPU_MAIN_RAM since the array <c>vector</c> is in
			
 
				 the main memory. Then comes the pointer <c>vector</c> where the data can be found in main memory,
			
 
				 the number of elements in the vector and the size of each element.
			
 
				 The following shows how to construct a StarPU task that will manipulate the
			
--- a/doc/doxygen/chapters/code/disk_compute.c
+++ b/doc/doxygen/chapters/code/disk_compute.c
@@ -0,0 +1,179 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013 Corentin Salingue
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+//! [To be included]
			
 
				+/* Try to write into disk memory
			
 
				+ * Use mechanism to push datas from main ram to disk ram
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+#include <sys/types.h>
			
 
				+#include <unistd.h>
			
 
				+#include <math.h>
			
 
				+
			
 
				+#define NX (100)
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Initialize StarPU with default configuration */
			
 
				+	int ret = starpu_init(NULL);
			
 
				+
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+
			
 
				+	/* Initialize path and name */
			
 
				+	char pid_str[16];
			
 
				+	int pid = getpid();
			
 
				+	snprintf(pid_str, 16, "%d", pid);
			
 
				+
			
 
				+	char * base = "/tmp/";
			
 
				+
			
 
				+	char * name_file_start = malloc(128*sizeof(char));
			
 
				+	strcpy(name_file_start, "STARPU_DISK_COMPUTE_DATA_");
			
 
				+	strcat(name_file_start, pid_str);
			
 
				+
			
 
				+	char * name_file_end = malloc(128*sizeof(char));
			
 
				+	strcpy(name_file_end, "STARPU_DISK_COMPUTE_DATA_RESULT_");
			
 
				+	strcat(name_file_end, pid_str);
			
 
				+
			
 
				+	char * path_file_start = malloc(128*sizeof(char));
			
 
				+	strcpy(path_file_start, base);
			
 
				+	strcat(path_file_start, name_file_start);
			
 
				+
			
 
				+	char * path_file_end = malloc(128*sizeof(char));
			
 
				+	strcpy(path_file_end, base);
			
 
				+	strcat(path_file_end, name_file_end);
			
 
				+
			
 
				+
			
 
				+	/* register a disk */
			
 
				+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) base, 1024*1024*1);
			
 
				+	/* can't write on /tmp/ */
			
 
				+	if (new_dd == -ENOENT) goto enoent;
			
 
				+	
			
 
				+	unsigned dd = (unsigned) new_dd;
			
 
				+
			
 
				+	printf("TEST DISK MEMORY \n");
			
 
				+
			
 
				+	/* Imagine, you want to compute datas */
			
 
				+	int *A;
			
 
				+	int *C;
			
 
				+
			
 
				+	starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT);
			
 
				+	starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT);
			
 
				+ 
			
 
				+	unsigned int j;
			
 
				+	/* you register them in a vector */
			
 
				+	for(j = 0; j < NX; ++j)
			
 
				+	{
			
 
				+		A[j] = j;
			
 
				+		C[j] = 0;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	/* you create a file to store the vector ON the disk */
			
 
				+	FILE * f = fopen(path_file_start, "wb+");
			
 
				+	if (f == NULL)
			
 
				+		goto enoent;
			
 
				+
			
 
				+	/* store it in the file */
			
 
				+	fwrite(A, sizeof(int), NX, f);
			
 
				+
			
 
				+	/* close the file */
			
 
				+	fclose(f);
			
 
				+
			
 
				+
			
 
				+	/* create a file to store result */
			
 
				+	f = fopen(path_file_end, "wb+");
			
 
				+	if (f == NULL)
			
 
				+		goto enoent;
			
 
				+
			
 
				+	/* replace all datas by 0 */
			
 
				+	fwrite(C, sizeof(int), NX, f);
			
 
				+
			
 
				+	/* close the file */
			
 
				+	fclose(f);
			
 
				+
			
 
				+	/* And now, you want to use your datas in StarPU */
			
 
				+	/* Open the file ON the disk */
			
 
				+	void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int));
			
 
				+	void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int));
			
 
				+
			
 
				+
			
 
				+	starpu_data_handle_t vector_handleA, vector_handleC;
			
 
				+
			
 
				+	/* register vector in starpu */
			
 
				+	starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int));
			
 
				+
			
 
				+	/* and do what you want with it, here we copy it into an other vector */ 
			
 
				+	starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int));	
			
 
				+
			
 
				+	starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL);
			
 
				+
			
 
				+	/* free them */
			
 
				+	starpu_data_unregister(vector_handleA);
			
 
				+	starpu_data_unregister(vector_handleC);
			
 
				+
			
 
				+	/* close them in StarPU */
			
 
				+	starpu_disk_close(dd, data, NX*sizeof(int));
			
 
				+	starpu_disk_close(dd, data_result, NX*sizeof(int));
			
 
				+
			
 
				+	/* check results */	
			
 
				+	f = fopen(path_file_end, "rb+");
			
 
				+	if (f == NULL)
			
 
				+		goto enoent;
			
 
				+	/* take datas */
			
 
				+	int size = fread(C, sizeof(int), NX, f);
			
 
				+
			
 
				+	/* close the file */
			
 
				+	fclose(f);
			
 
				+
			
 
				+	int try = 1;
			
 
				+	for (j = 0; j < NX; ++j)
			
 
				+		if (A[j] != C[j])
			
 
				+		{
			
 
				+			printf("Fail A %d != C %d \n", A[j], C[j]);
			
 
				+			try = 0;
			
 
				+		}
			
 
				+
			
 
				+	starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+	starpu_free_flags(C, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+
			
 
				+	unlink(path_file_start);
			
 
				+	unlink(path_file_end);
			
 
				+
			
 
				+	free(name_file_start);
			
 
				+	free(name_file_end);
			
 
				+	free(path_file_start);
			
 
				+	free(path_file_end);
			
 
				+
			
 
				+	/* terminate StarPU, no task can be submitted after */
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	if(try)
			
 
				+		printf("TEST SUCCESS\n");
			
 
				+	else
			
 
				+		printf("TEST FAIL\n");
			
 
				+	return (try ? EXIT_SUCCESS : EXIT_FAILURE);
			
 
				+
			
 
				+enodev:
			
 
				+	return 77;
			
 
				+enoent:
			
 
				+	return 77;
			
 
				+}
			
 
				+//! [To be included]
			
 
				+
			
--- a/doc/doxygen/chapters/code/disk_copy.c
+++ b/doc/doxygen/chapters/code/disk_copy.c
@@ -0,0 +1,122 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013 Corentin Salingue
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+//! [To be included]
			
 
				+
			
 
				+/* Try to write into disk memory
			
 
				+ * Use mechanism to push datas from main ram to disk ram
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+#include <math.h>
			
 
				+
			
 
				+/* size of one vector */
			
 
				+#define	NX	(30*1000000/sizeof(double))
			
 
				+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	double * A,*B,*C,*D,*E,*F;
			
 
				+
			
 
				+	/* limit main ram to force to push in disk */
			
 
				+	putenv("STARPU_LIMIT_CPU_MEM=160");
			
 
				+
			
 
				+	/* Initialize StarPU with default configuration */
			
 
				+	int ret = starpu_init(NULL);
			
 
				+
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+
			
 
				+	/* register a disk */
			
 
				+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp/", 1024*1024*200);
			
 
				+	/* can't write on /tmp/ */
			
 
				+	if (new_dd == -ENOENT) goto enoent;
			
 
				+	
			
 
				+	unsigned dd = (unsigned) new_dd;
			
 
				+
			
 
				+	/* allocate two memory spaces */
			
 
				+	starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+	starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+
			
 
				+	FPRINTF(stderr, "TEST DISK MEMORY \n");
			
 
				+
			
 
				+	unsigned int j;
			
 
				+	/* initialization with bad values */
			
 
				+	for(j = 0; j < NX; ++j)
			
 
				+	{
			
 
				+		A[j] = j;
			
 
				+		F[j] = -j;
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF;
			
 
				+
			
 
				+	/* register vector in starpu */
			
 
				+	starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double));
			
 
				+	starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double));	
			
 
				+	starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double));
			
 
				+	starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double));
			
 
				+	starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double));
			
 
				+	starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double));
			
 
				+
			
 
				+	/* copy vector A->B, B->C... */
			
 
				+	starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL);
			
 
				+	starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL);
			
 
				+	starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL);
			
 
				+	starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL);
			
 
				+	starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL);
			
 
				+
			
 
				+	/* StarPU does not need to manipulate the array anymore so we can stop
			
 
				+ 	 * monitoring it */
			
 
				+
			
 
				+	/* free them */
			
 
				+	starpu_data_unregister(vector_handleA);
			
 
				+	starpu_data_unregister(vector_handleB);
			
 
				+	starpu_data_unregister(vector_handleC);
			
 
				+	starpu_data_unregister(vector_handleD);
			
 
				+	starpu_data_unregister(vector_handleE);
			
 
				+	starpu_data_unregister(vector_handleF);
			
 
				+
			
 
				+	/* check if computation is correct */
			
 
				+	int try = 1;
			
 
				+	for (j = 0; j < NX; ++j)
			
 
				+		if (A[j] != F[j])
			
 
				+		{
			
 
				+			printf("Fail A %f != F %f \n", A[j], F[j]);
			
 
				+			try = 0;
			
 
				+		}
			
 
				+
			
 
				+	/* free last vectors */
			
 
				+	starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+	starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT);
			
 
				+
			
 
				+	/* terminate StarPU, no task can be submitted after */
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	if(try)
			
 
				+		FPRINTF(stderr, "TEST SUCCESS\n");
			
 
				+	else
			
 
				+		FPRINTF(stderr, "TEST FAIL\n");
			
 
				+	return (try ? EXIT_SUCCESS : EXIT_FAILURE);
			
 
				+
			
 
				+enodev:
			
 
				+	return 77;
			
 
				+enoent:
			
 
				+	return 77;
			
 
				+}
			
 
				+
			
 
				+//! [To be included]
			
--- a/doc/doxygen/chapters/code/multiformat.c
+++ b/doc/doxygen/chapters/code/multiformat.c
@@ -57,5 +57,5 @@ struct starpu_multiformat_data_interface_ops format_ops = {
 
				     ...
			
 
				 };
			
 
				 
			
 
				-starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
			
 
				+starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops);
			
 
				 //! [To be included]
			
--- a/doc/doxygen/chapters/code/vector_scal_c.c
+++ b/doc/doxygen/chapters/code/vector_scal_c.c
@@ -79,14 +79,14 @@ int main(int argc, char **argv)
 
				      *  - the first argument of the registration method is a pointer to the
			
 
				      *    handle that should describe the data
			
 
				      *  - the second argument is the memory node where the data (ie. "vector")
			
 
				-     *    resides initially: 0 stands for an address in main memory, as
			
 
				+     *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
			
 
				      *    opposed to an adress on a GPU for instance.
			
 
				      *  - the third argument is the adress of the vector in RAM
			
 
				      *  - the fourth argument is the number of elements in the vector
			
 
				      *  - the fifth argument is the size of each element.
			
 
				      */
			
 
				     starpu_data_handle_t vector_handle;
			
 
				-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
			
 
				+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				                                 NX, sizeof(vector[0]));
			
 
				 
			
 
				     float factor = 3.14;
			
--- a/doc/doxygen/chapters/configure_options.doxy
+++ b/doc/doxygen/chapters/configure_options.doxy
@@ -22,13 +22,6 @@ the following configure options.
 
				 Enable debugging messages.
			
 
				 </dd>
			
 
				 
			
 
				-<dt>--enable-debug</dt>
			
 
				-<dd>
			
 
				-\anchor enable-debug
			
 
				-\addindex __configure__--enable-debug
			
 
				-Enable debugging messages.
			
 
				-</dd>
			
 
				-
			
 
				 <dt>--enable-fast</dt>
			
 
				 <dd>
			
 
				 \anchor enable-fast
			
@@ -363,6 +356,13 @@ Enable performance debugging through gprof.
 
				 Enable performance model debugging.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>--enable-fxt-lock</dt>
			
 
				+<dd>
			
 
				+\anchor enable-fxt-lock
			
 
				+\addindex __configure__--enable-fxt-lock
			
 
				+Enable additional trace events which describes locks behaviour.
			
 
				+</dd>
			
 
				+
			
 
				 <dt>--enable-stats</dt>
			
 
				 <dd>
			
 
				 \anchor enable-stats
			
--- a/doc/doxygen/chapters/environment_variables.doxy
+++ b/doc/doxygen/chapters/environment_variables.doxy
@@ -217,12 +217,12 @@ it is therefore necessary to disable asynchronous data transfers.
 
				 Disable asynchronous copies between CPU and MIC devices.
			
 
				 </dd>
			
 
				 
			
 
				-<dt>STARPU_DISABLE_CUDA_GPU_GPU_DIRECT</dt>
			
 
				+<dt>STARPU_ENABLE_CUDA_GPU_GPU_DIRECT</dt>
			
 
				 <dd>
			
 
				-\anchor STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
			
 
				-\addindex __env__STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
			
 
				-Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
			
 
				-instead. This permits to test the performance effect of GPU-Direct.
			
 
				+\anchor STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
			
 
				+\addindex __env__STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
			
 
				+Enable direct CUDA transfers from GPU to GPU, without copying through RAM.
			
 
				+This permits to test the performance effect of GPU-Direct.
			
 
				 </dd>
			
 
				 
			
 
				 </dl>
			
--- a/doc/doxygen/chapters/files.doxy
+++ b/doc/doxygen/chapters/files.doxy
@@ -12,6 +12,7 @@
 
				 \file starpu.h
			
 
				 \file starpu_data_filters.h
			
 
				 \file starpu_data_interfaces.h
			
 
				+\file starpu_disk.h
			
 
				 \file starpu_worker.h
			
 
				 \file starpu_task.h
			
 
				 \file starpu_task_bundle.h
			
--- a/doc/doxygen/chapters/introduction.doxy
+++ b/doc/doxygen/chapters/introduction.doxy
@@ -145,7 +145,7 @@ simply replace calling the function with submitting a task.
 
				 A \b codelet records pointers to various implementations of the same
			
 
				 theoretical function.
			
 
				 
			
 
				-A <b>memory node</b> can be either the main RAM or GPU-embedded memory.
			
 
				+A <b>memory node</b> can be either the main RAM, GPU-embedded memory or a disk memory.
			
 
				 
			
 
				 A \b bus is a link between memory nodes.
			
 
				 
			
@@ -213,6 +213,7 @@ The documentation chapters include
 
				 <li> \ref HowToOptimizePerformanceWithStarPU
			
 
				 <li> \ref PerformanceFeedback
			
 
				 <li> \ref TipsAndTricksToKnowAbout
			
 
				+<li> \ref OutOfCore
			
 
				 <li> \ref MPISupport
			
 
				 <li> \ref FFTSupport
			
 
				 <li> \ref MICSCCSupport
			
--- a/doc/doxygen/chapters/mpi_support.doxy
+++ b/doc/doxygen/chapters/mpi_support.doxy
@@ -52,7 +52,7 @@ int main(int argc, char **argv)
 
				     starpu_init(NULL);
			
 
				     starpu_mpi_initialize_extended(&rank, &size);
			
 
				 
			
 
				-    starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				+    starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				 
			
 
				     unsigned nloops = NITER;
			
 
				     unsigned loop;
			
@@ -273,7 +273,7 @@ data which will be needed by the tasks that we will execute.
 
				             int mpi_rank = my_distrib(x, y, size);
			
 
				              if (mpi_rank == my_rank)
			
 
				                 /* Owning data */
			
 
				-                starpu_variable_data_register(&data_handles[x][y], 0,
			
 
				+                starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM,
			
 
				                                               (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
			
 
				             else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
			
 
				                   || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
			
@@ -339,7 +339,7 @@ for(x = 0; x < nblocks ;  x++)
 
				 {
			
 
				     int mpi_rank = my_distrib(x, nodes);
			
 
				     if (rank == root) {
			
 
				-        starpu_vector_data_register(&data_handles[x], 0, (uintptr_t)vector[x],
			
 
				+        starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
			
 
				                                     blocks_size, sizeof(float));
			
 
				     }
			
 
				     else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) {
			
--- a/doc/doxygen/chapters/out_of_core.doxy
+++ b/doc/doxygen/chapters/out_of_core.doxy
@@ -0,0 +1,56 @@
 
				+/*
			
 
				+ * This file is part of the StarPU Handbook.
			
 
				+ * Copyright (C) 2013 Corentin Salingue
			
 
				+ * See the file version.doxy for copying conditions.
			
 
				+ */
			
 
				+
			
 
				+/*! \page OutOfCore Out Of Core
			
 
				+
			
 
				+\section Introduction Introduction
			
 
				+
			
 
				+When using StarPU, one may need to store more data than what the main memory
			
 
				+(RAM) can store. This part describes the method to add a new memory node on a
			
 
				+disk and to use it.
			
 
				+
			
 
				+\section UseANewDiskMemory Use a new disk memory
			
 
				+
			
 
				+To use a disk memory node, you have to register it with this function:
			
 
				+
			
 
				+\code{.c}
			
 
				+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp/", 1024*1024*200);
			
 
				+\endcode
			
 
				+
			
 
				+Here, we use the stdio library to realize the read/write operations, i.e.
			
 
				+fread/fwrite. This structure must have a path where to store files, as well as
			
 
				+the maximum size the software can afford storing on the disk.
			
 
				+
			
 
				+Don't forget to check if the result is correct!
			
 
				+
			
 
				+When the register function is called, StarPU will benchmark the disk. This can
			
 
				+take some time.
			
 
				+
			
 
				+<strong>Warning: the size thus has to be at least 1 MB!</strong> 
			
 
				+
			
 
				+StarPU will automatically try to evict unused data to this new disk. One can
			
 
				+also use the standard StarPU node API, see the \ref API_Standard_Memory_Library
			
 
				+and the \ref API_Data_Interfaces .
			
 
				+
			
 
				+The disk is unregistered during the starpu_shutdown().
			
 
				+
			
 
				+\section DiskFunctions Disk functions
			
 
				+
			
 
				+There are various ways to operate a disk memory node, described by the structure
			
 
				+starpu_disk_ops. For instance, the variable #starpu_disk_stdio_ops
			
 
				+uses fread/fwrite functions.
			
 
				+
			
 
				+All structures are in \ref API_Out_Of_Core .
			
 
				+
			
 
				+\section ExampleDiskCopy Examples: disk_copy
			
 
				+
			
 
				+\snippet disk_copy.c To be included
			
 
				+
			
 
				+\section ExampleDiskCompute Examples: disk_compute
			
 
				+
			
 
				+\snippet disk_compute.c To be included
			
 
				+
			
 
				+*/
			
--- a/doc/doxygen/chapters/performance_feedback.doxy
+++ b/doc/doxygen/chapters/performance_feedback.doxy
@@ -253,6 +253,10 @@ starpu_shutdown(). The trace is a binary file whose name has the form
 
				 <c>/tmp/</c> directory by default, or by the directory specified by
			
 
				 the environment variable \ref STARPU_FXT_PREFIX.
			
 
				 
			
 
				+The additional configure option \ref enable-fxt-lock "--enable-fxt-lock" can 
			
 
				+be used to generate trace events which describes the locks behaviour during 
			
 
				+the execution.
			
 
				+
			
 
				 \subsection CreatingAGanttDiagram Creating a Gantt Diagram
			
 
				 
			
 
				 When the FxT trace file <c>filename</c> has been generated, it is possible to
			
--- a/doc/doxygen/doxygen-config.cfg.in
+++ b/doc/doxygen/doxygen-config.cfg.in
@@ -25,6 +25,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 
				 			 @top_srcdir@/include/starpu_data_filters.h \
			
 
				 			 @top_srcdir@/include/starpu_data.h \
			
 
				 			 @top_srcdir@/include/starpu_data_interfaces.h \
			
 
				+			 @top_srcdir@/include/starpu_disk.h \
			
 
				 			 @top_srcdir@/include/starpu_deprecated_api.h \
			
 
				 			 @top_srcdir@/include/starpu_driver.h \
			
 
				 			 @top_srcdir@/include/starpu_expert.h \
			
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -129,6 +129,13 @@ Documentation License”.
 
				 \hypertarget{TipsAndTricksToKnowAbout}{}
			
 
				 \input{TipsAndTricksToKnowAbout}
			
 
				 
			
 
				+\chapter{Out Of Core}
			
 
				+\label{OutOfCore}
			
 
				+\hypertarget{OutOfCore}{}
			
 
				+\input{OutOfCore}
			
 
				+
			
 
				+
			
 
				+
			
 
				 \chapter{MPI Support}
			
 
				 \label{MPISupport}
			
 
				 \hypertarget{MPISupport}{}
			
@@ -195,6 +202,7 @@ Documentation License”.
 
				 \input{group__API__Data__Management}
			
 
				 \input{group__API__Data__Interfaces}
			
 
				 \input{group__API__Data__Partition}
			
 
				+\input{group__API__Out__Of__Core}
			
 
				 \input{group__API__Multiformat__Data__Interface}
			
 
				 \input{group__API__Codelet__And__Tasks}
			
 
				 \input{group__API__Insert__Task}
			
@@ -238,6 +246,7 @@ Documentation License”.
 
				 \input{starpu__data__filters_8h}
			
 
				 \input{starpu__data__interfaces_8h}
			
 
				 \input{starpu__deprecated__api_8h}
			
 
				+\input{starpu__disk_8h}
			
 
				 \input{starpu__driver_8h}
			
 
				 \input{starpu__expert_8h}
			
 
				 \input{starpu__fxt_8h}
			
--- a/doc/texinfo/chapters/advanced-examples.texi
+++ b/doc/texinfo/chapters/advanced-examples.texi
@@ -235,7 +235,7 @@ int vector[NX];
 
				 starpu_data_handle_t handle;
			
 
				 
			
 
				 /* Declare data to StarPU */
			
 
				-starpu_vector_data_register(&handle, 0, (uintptr_t)vector,
			
 
				+starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				                             NX, sizeof(vector[0]));
			
 
				 
			
 
				 /* Partition the vector in PARTS sub-vectors */
			
@@ -1002,7 +1002,7 @@ struct starpu_multiformat_data_interface_ops format_ops = @{
 
				     .cpu_elemsize = 2 * sizeof(float),
			
 
				     ...
			
 
				 @};
			
 
				-starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
			
 
				+starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops);
			
 
				 @end smallexample
			
 
				 @end cartouche
			
 
				 
			
@@ -1248,7 +1248,7 @@ Complex data interfaces can then be registered to StarPU.
 
				 @smallexample
			
 
				 double real = 45.0;
			
 
				 double imaginary = 12.0;
			
 
				-starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
			
 
				+starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
			
 
				 starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
			
 
				 @end smallexample
			
 
				 @end cartouche
			
--- a/doc/texinfo/chapters/api.texi
+++ b/doc/texinfo/chapters/api.texi
@@ -783,7 +783,7 @@ item.
 
				 @smallexample
			
 
				 float var;
			
 
				 starpu_data_handle_t var_handle;
			
 
				-starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
			
 
				+starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
			
 
				 @end smallexample
			
 
				 @end cartouche
			
 
				 @end deftypefun
			
@@ -796,7 +796,7 @@ Register the @var{nx} @var{elemsize}-byte elements pointed to by
 
				 @smallexample
			
 
				 float vector[NX];
			
 
				 starpu_data_handle_t vector_handle;
			
 
				-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
			
 
				+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
			
 
				                             sizeof(vector[0]));
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -814,7 +814,7 @@ alignment purposes.
 
				 float *matrix;
			
 
				 starpu_data_handle_t matrix_handle;
			
 
				 matrix = (float*)malloc(width * height * sizeof(float));
			
 
				-starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix,
			
 
				+starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix,
			
 
				                             width, width, height, sizeof(float));
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -831,7 +831,7 @@ between rows and between z planes.
 
				 float *block;
			
 
				 starpu_data_handle_t block_handle;
			
 
				 block = (float*)malloc(nx*ny*nz*sizeof(float));
			
 
				-starpu_block_data_register(&block_handle, 0, (uintptr_t)block,
			
 
				+starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block,
			
 
				                            nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -1584,11 +1584,11 @@ starpu_data_partition(A_handle, &f);
 
				 
			
 
				 @deftypefun void starpu_data_unpartition (starpu_data_handle_t @var{root_data}, unsigned @var{gathering_node})
			
 
				 This unapplies one filter, thus unpartitioning the data. The pieces of data are
			
 
				-collected back into one big piece in the @var{gathering_node} (usually 0). Tasks
			
 
				+collected back into one big piece in the @var{gathering_node} (usually STARPU_MAIN_RAM). Tasks
			
 
				 working on the partitioned data must be already finished when calling @code{starpu_data_unpartition}.
			
 
				 @cartouche
			
 
				 @smallexample
			
 
				-starpu_data_unpartition(A_handle, 0);
			
 
				+starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				 @end smallexample
			
 
				 @end cartouche
			
 
				 @end deftypefun
			
--- a/doc/texinfo/chapters/basic-examples.texi
+++ b/doc/texinfo/chapters/basic-examples.texi
@@ -584,7 +584,7 @@ The following lines show how to declare an array of @code{NX} elements of type
 
				 float vector[NX];
			
 
				 
			
 
				 starpu_data_handle_t vector_handle;
			
 
				-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
			
 
				+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
			
 
				                             sizeof(vector[0]));
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -860,7 +860,7 @@ int main(int argc, char **argv)
 
				 @cartouche
			
 
				 @smallexample
			
 
				     /* @b{Registering data within StarPU} */
			
 
				-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
			
 
				+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				                                 NX, sizeof(vector[0]));
			
 
				 
			
 
				     /* @b{Definition of the task} */
			
--- a/doc/texinfo/chapters/configuration.texi
+++ b/doc/texinfo/chapters/configuration.texi
@@ -477,9 +477,9 @@ it is therefore necessary to disable asynchronous data transfers.
 
				 Disable asynchronous copies between CPU and MIC devices.
			
 
				 @end defvr
			
 
				 
			
 
				-@defvr {Environment variable} STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
			
 
				-Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
			
 
				-instead. This permits to test the performance effect of GPU-Direct.
			
 
				+@defvr {Environment variable} STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
			
 
				+Enable direct CUDA transfers from GPU to GPU, without copying through RAM.
			
 
				+This permits to test the performance effect of GPU-Direct.
			
 
				 @end defvr
			
 
				 
			
 
				 @node Scheduling
			
--- a/doc/texinfo/chapters/mpi-support.texi
+++ b/doc/texinfo/chapters/mpi-support.texi
@@ -64,7 +64,7 @@ int main(int argc, char **argv)
 
				     starpu_init(NULL);
			
 
				     starpu_mpi_initialize_extended(&rank, &size);
			
 
				 
			
 
				-    starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				+    starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned));
			
 
				 
			
 
				     unsigned nloops = NITER;
			
 
				     unsigned loop;
			
@@ -310,7 +310,7 @@ data which will be needed by the tasks that we will execute.
 
				             int mpi_rank = my_distrib(x, y, size);
			
 
				              if (mpi_rank == my_rank)
			
 
				                 /* Owning data */
			
 
				-                starpu_variable_data_register(&data_handles[x][y], 0,
			
 
				+                starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM,
			
 
				                                               (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
			
 
				             else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
			
 
				                   || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
			
@@ -381,7 +381,7 @@ for(x = 0; x < nblocks ;  x++)
 
				 @{
			
 
				     int mpi_rank = my_distrib(x, nodes);
			
 
				     if (rank == root) @{
			
 
				-        starpu_vector_data_register(&data_handles[x], 0, (uintptr_t)vector[x],
			
 
				+        starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
			
 
				                                     blocks_size, sizeof(float));
			
 
				     @}
			
 
				     else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) @{
			
--- a/doc/texinfo/chapters/vector_scal_c.texi
+++ b/doc/texinfo/chapters/vector_scal_c.texi
@@ -2,7 +2,7 @@
 
				 
			
 
				 @c This file is part of the StarPU Handbook.
			
 
				 @c Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
			
 
				-@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+@c Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				 @c See the file starpu.texi for copying conditions.
			
 
				 
			
 
				 @smallexample
			
@@ -69,14 +69,14 @@ int main(int argc, char **argv)
 
				      *  - the first argument of the registration method is a pointer to the
			
 
				      *    handle that should describe the data
			
 
				      *  - the second argument is the memory node where the data (ie. "vector")
			
 
				-     *    resides initially: 0 stands for an address in main memory, as
			
 
				+     *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
			
 
				      *    opposed to an adress on a GPU for instance.
			
 
				      *  - the third argument is the adress of the vector in RAM
			
 
				      *  - the fourth argument is the number of elements in the vector
			
 
				      *  - the fifth argument is the size of each element.
			
 
				      */
			
 
				     starpu_data_handle_t vector_handle;
			
 
				-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
			
 
				+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				                                 NX, sizeof(vector[0]));
			
 
				 
			
 
				     float factor = 3.14;
			
--- a/doc/tutorial/vector_scal.c
+++ b/doc/tutorial/vector_scal.c
@@ -75,14 +75,14 @@ int main(int argc, char **argv)
 
				 	 *  - the first argument of the registration method is a pointer to the
			
 
				 	 *    handle that should describe the data
			
 
				 	 *  - the second argument is the memory node where the data (ie. "vector")
			
 
				-	 *    resides initially: 0 stands for an address in main memory, as
			
 
				+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
			
 
				 	 *    opposed to an adress on a GPU for instance.
			
 
				 	 *  - the third argument is the adress of the vector in RAM
			
 
				 	 *  - the fourth argument is the number of elements in the vector
			
 
				 	 *  - the fifth argument is the size of each element.
			
 
				 	 */
			
 
				 	starpu_data_handle_t vector_handle;
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
			
 
				 				    NX, sizeof(vector[0]));
			
 
				 
			
 
				 	float factor = 3.14;
			
--- a/examples/audio/starpu_audio_processing.c
+++ b/examples/audio/starpu_audio_processing.c
@@ -413,7 +413,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_cublas_init();
			
 
				 
			
 
				-	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
			
 
				+	starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, niter*nsamples, sizeof(float));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -458,7 +458,7 @@ int main(int argc, char **argv)
 
				 		fprintf(stderr, "Writing output data\n");
			
 
				 
			
 
				 	/* make sure that the output is in RAM before quitting StarPU */
			
 
				-	starpu_data_unpartition(A_handle, 0);
			
 
				+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(A_handle);
			
 
				 
			
 
				 	starpu_cublas_shutdown();
			
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -152,8 +152,8 @@ int main(int argc, char **argv)
 
				 	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);
			
 
				 
			
 
				 	/* Declare the data to StarPU */
			
 
				-	starpu_vector_data_register(&_handle_x, 0, (uintptr_t)_vec_x, N, sizeof(TYPE));
			
 
				-	starpu_vector_data_register(&_handle_y, 0, (uintptr_t)_vec_y, N, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));
			
 
				 
			
 
				 	/* Divide the vector into blocks */
			
 
				 	struct starpu_data_filter block_filter =
			
@@ -194,8 +194,8 @@ int main(int argc, char **argv)
 
				 	starpu_task_wait_for_all();
			
 
				 
			
 
				 enodev:
			
 
				-	starpu_data_unpartition(_handle_x, 0);
			
 
				-	starpu_data_unpartition(_handle_y, 0);
			
 
				+	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(_handle_x);
			
 
				 	starpu_data_unregister(_handle_y);
			
 
				 
			
--- a/examples/basic_examples/block.c
+++ b/examples/basic_examples/block.c
@@ -37,7 +37,7 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
				 	starpu_data_handle_t block_handle;
			
 
				         int i;
			
 
				 
			
 
				-	starpu_block_data_register(&block_handle, 0, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
			
 
				+	starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
			
 
				 
			
 
				 	starpu_codelet_init(&cl);
			
 
				 	cl.where = where;
			
--- a/examples/basic_examples/dynamic_handles.c
+++ b/examples/basic_examples/dynamic_handles.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 
				 	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
			
 
				 	     dummy_big_cl.dyn_modes[i] = STARPU_RW;
			
 
				 
			
 
				-	starpu_variable_data_register(&handle, 0, (uintptr_t)&val, sizeof(int));
			
 
				+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&val, sizeof(int));
			
 
				 
			
 
				 	task = starpu_task_create();
			
 
				 	task->synchronous = 1;
			
--- a/examples/basic_examples/mult.c
+++ b/examples/basic_examples/mult.c
@@ -174,11 +174,11 @@ static void partition_mult_data(void)
 
				 	 * node in which resides the matrix: 0 means that the 3rd argument is
			
 
				 	 * an adress in main memory.
			
 
				 	 */
			
 
				-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
			
 
				+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, 
			
 
				 		ydim, ydim, zdim, sizeof(float));
			
 
				-	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
			
 
				+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, 
			
 
				 		zdim, zdim, xdim, sizeof(float));
			
 
				-	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
			
 
				+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, 
			
 
				 		ydim, ydim, xdim, sizeof(float));
			
 
				 
			
 
				 	/* A filter is a method to partition a data into disjoint chunks, it is
			
@@ -365,9 +365,9 @@ int main(STARPU_ATTRIBUTE_UNUSED int argc,
 
				 	 * starpu_data_map_filters is called again on C_handle.
			
 
				 	 * The second argument is the memory node where the different subsets
			
 
				 	 * should be reassembled, 0 = main memory (RAM) */
			
 
				-	starpu_data_unpartition(A_handle, 0);
			
 
				-	starpu_data_unpartition(B_handle, 0);
			
 
				-	starpu_data_unpartition(C_handle, 0);
			
 
				+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	/* stop monitoring matrix C : after this, it is not possible to pass C 
			
 
				 	 * (or any subset of C) as a codelet input/output. This also implements
			
--- a/examples/basic_examples/multiformat.c
+++ b/examples/basic_examples/multiformat.c
@@ -126,7 +126,7 @@ static void
 
				 register_data(void)
			
 
				 {
			
 
				 	starpu_multiformat_data_register(&array_of_structs_handle,
			
 
				-					 0,
			
 
				+					 STARPU_MAIN_RAM,
			
 
				 					 &array_of_structs,
			
 
				 					 N_ELEMENTS,
			
 
				 					 &format_ops);
			
--- a/examples/basic_examples/variable.c
+++ b/examples/basic_examples/variable.c
@@ -50,7 +50,7 @@ int main(int argc, char **argv)
 
				         if (argc == 2) niter = atoi(argv[1]);
			
 
				         foo = 0.0f;
			
 
				 
			
 
				-	starpu_variable_data_register(&float_array_handle, 0 /* home node */,
			
 
				+	starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */,
			
 
				                                       (uintptr_t)&foo, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -141,14 +141,14 @@ int main(int argc, char **argv)
 
				 	 *  - the first argument of the registration method is a pointer to the
			
 
				 	 *    handle that should describe the data
			
 
				 	 *  - the second argument is the memory node where the data (ie. "vector")
			
 
				-	 *    resides initially: 0 stands for an address in main memory, as
			
 
				+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
			
 
				 	 *    opposed to an adress on a GPU for instance.
			
 
				 	 *  - the third argument is the adress of the vector in RAM
			
 
				 	 *  - the fourth argument is the number of elements in the vector
			
 
				 	 *  - the fifth argument is the size of each element.
			
 
				 	 */
			
 
				 	starpu_data_handle_t vector_handle;
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 
			
 
				 	float factor = 3.14;
			
 
				 
			
--- a/examples/basic_examples/vector_scal_c.c
+++ b/examples/basic_examples/vector_scal_c.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011, 2013  Université de Bordeaux 1
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -69,14 +69,14 @@ int compute_(int *F_NX, float *vector)
 
				 	 *  - the first argument of the registration method is a pointer to the
			
 
				 	 *    handle that should describe the data
			
 
				 	 *  - the second argument is the memory node where the data (ie. "vector")
			
 
				-	 *    resides initially: 0 stands for an address in main memory, as
			
 
				+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
			
 
				 	 *    opposed to an adress on a GPU for instance.
			
 
				 	 *  - the third argument is the adress of the vector in RAM
			
 
				 	 *  - the fourth argument is the number of elements in the vector
			
 
				 	 *  - the fifth argument is the size of each element.
			
 
				 	 */
			
 
				 	starpu_data_handle_t vector_handle;
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 
			
 
				 	float factor = 3.14;
			
 
				 
			
--- a/examples/binary/binary.c
+++ b/examples/binary/binary.c
@@ -42,7 +42,7 @@ int compute(char *file_name, int load_as_file)
 
				 	int ret = 0;
			
 
				 	unsigned niter = 500;
			
 
				 
			
 
				-	starpu_vector_data_register(&float_array_handle, 0, (uintptr_t)&float_array, 4, sizeof(float));
			
 
				+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 	if (load_as_file)
			
--- a/examples/callback/callback.c
+++ b/examples/callback/callback.c
@@ -59,7 +59,7 @@ int main(int argc, char **argv)
 
				 		return 77;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				-	starpu_variable_data_register(&handle, 0, (uintptr_t)&v, sizeof(int));
			
 
				+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int));
			
 
				 
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				 	task->cl = &cl;
			
--- a/examples/cg/cg.c
+++ b/examples/cg/cg.c
@@ -137,16 +137,16 @@ static void free_data(void)
 
				 
			
 
				 static void register_data(void)
			
 
				 {
			
 
				-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, n, n, n, sizeof(TYPE));
			
 
				-	starpu_vector_data_register(&b_handle, 0, (uintptr_t)b, n, sizeof(TYPE));
			
 
				-	starpu_vector_data_register(&x_handle, 0, (uintptr_t)x, n, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, n, n, n, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&b_handle, STARPU_MAIN_RAM, (uintptr_t)b, n, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)x, n, sizeof(TYPE));
			
 
				 
			
 
				-	starpu_vector_data_register(&r_handle, 0, (uintptr_t)r, n, sizeof(TYPE));
			
 
				-	starpu_vector_data_register(&d_handle, 0, (uintptr_t)d, n, sizeof(TYPE));
			
 
				-	starpu_vector_data_register(&q_handle, 0, (uintptr_t)q, n, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&r_handle, STARPU_MAIN_RAM, (uintptr_t)r, n, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&d_handle, STARPU_MAIN_RAM, (uintptr_t)d, n, sizeof(TYPE));
			
 
				+	starpu_vector_data_register(&q_handle, STARPU_MAIN_RAM, (uintptr_t)q, n, sizeof(TYPE));
			
 
				 
			
 
				-	starpu_variable_data_register(&dtq_handle, 0, (uintptr_t)&dtq, sizeof(TYPE));
			
 
				-	starpu_variable_data_register(&rtr_handle, 0, (uintptr_t)&rtr, sizeof(TYPE));
			
 
				+	starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE));
			
 
				+	starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE));
			
 
				 
			
 
				 	if (use_reduction)
			
 
				 	{
			
@@ -160,13 +160,13 @@ static void register_data(void)
 
				 
			
 
				 static void unregister_data(void)
			
 
				 {
			
 
				-	starpu_data_unpartition(A_handle, 0);
			
 
				-	starpu_data_unpartition(b_handle, 0);
			
 
				-	starpu_data_unpartition(x_handle, 0);
			
 
				+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(b_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(x_handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				-	starpu_data_unpartition(r_handle, 0);
			
 
				-	starpu_data_unpartition(d_handle, 0);
			
 
				-	starpu_data_unpartition(q_handle, 0);
			
 
				+	starpu_data_unpartition(r_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(d_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(q_handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	starpu_data_unregister(A_handle);
			
 
				 	starpu_data_unregister(b_handle);
			
--- a/examples/cholesky/cholesky_grain_tag.c
+++ b/examples/cholesky/cholesky_grain_tag.c
@@ -189,7 +189,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
@@ -250,7 +250,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 	{
			
 
				 		/* stall the application until the end of computations */
			
 
				 		starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel));
			
 
				-		starpu_data_unpartition(dataA, 0);
			
 
				+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 		starpu_data_unregister(dataA);
			
 
				 		return 0;
			
 
				 	}
			
@@ -274,7 +274,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 		free(tag_array);
			
 
				 
			
 
				-		starpu_data_unpartition(dataA, 0);
			
 
				+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 		starpu_data_unregister(dataA);
			
 
				 
			
 
				 		float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)];
			
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -183,7 +183,7 @@ static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -201,7 +201,7 @@ static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	int ret = _cholesky(dataA, nblocks);
			
 
				 
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(dataA);
			
 
				 
			
 
				 	return ret;
			
--- a/examples/cholesky/cholesky_tag.c
+++ b/examples/cholesky/cholesky_tag.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2013  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -234,7 +234,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	/* stall the application until the end of computations */
			
 
				 	starpu_tag_wait(TAG11(nblocks-1));
			
 
				 
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	end = starpu_timing_now();
			
 
				 
			
@@ -279,7 +279,7 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
--- a/examples/cholesky/cholesky_tile_tag.c
+++ b/examples/cholesky/cholesky_tile_tag.c
@@ -301,7 +301,7 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		if (x <= y)
			
 
				 		{
			
 
				-			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
			
 
				+			starpu_matrix_data_register(&A_state[y][x], STARPU_MAIN_RAM, (uintptr_t)A[y][x], 
			
 
				 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
			
 
				 		}
			
 
				 	}
			
--- a/examples/cpp/incrementer_cpp.cpp
+++ b/examples/cpp/incrementer_cpp.cpp
@@ -50,7 +50,7 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) return 77;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				-	starpu_vector_data_register(&float_array_handle, 0, (uintptr_t)&float_array, 4, sizeof(float));
			
 
				+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL);
			
--- a/examples/filters/custom_mf/custom_interface.c
+++ b/examples/filters/custom_mf/custom_interface.c
@@ -230,7 +230,7 @@ static size_t custom_interface_get_size(starpu_data_handle_t handle)
 
				 	struct custom_data_interface *data_interface;
			
 
				 
			
 
				 	data_interface = (struct custom_data_interface *)
			
 
				-				starpu_data_get_interface_on_node(handle, 0);
			
 
				+				starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 	size = data_interface->nx * data_interface->ops->cpu_elemsize;
			
 
				 	return size;
			
 
				 }
			
@@ -243,7 +243,7 @@ static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle)
 
				 static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
			
 
				 {
			
 
				 	struct custom_data_interface *ci = (struct custom_data_interface *)
			
 
				-		starpu_data_get_interface_on_node(handle, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 	fprintf(f, "Custom interface of size %d", ci->nx);
			
 
				 }
			
 
				 
			
@@ -252,7 +252,7 @@ custom_get_nx(starpu_data_handle_t handle)
 
				 {
			
 
				 	struct custom_data_interface *data_interface;
			
 
				 	data_interface = (struct custom_data_interface *)
			
 
				-				starpu_data_get_interface_on_node(handle, 0);
			
 
				+				starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 	return data_interface->nx;
			
 
				 }
			
 
				 
			
--- a/examples/filters/custom_mf/custom_mf_filter.c
+++ b/examples/filters/custom_mf/custom_mf_filter.c
@@ -110,7 +110,7 @@ register_and_partition_data(void)
 
				 		_array_of_structs[i].x = i+1.0;
			
 
				 		_array_of_structs[i].y = 42.0;
			
 
				 	}
			
 
				-	custom_data_register(&_handle, 0, &_array_of_structs, N, &format_ops);
			
 
				+	custom_data_register(&_handle, STARPU_MAIN_RAM, &_array_of_structs, N, &format_ops);
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -125,7 +125,7 @@ register_and_partition_data(void)
 
				 static void
			
 
				 unpartition_and_unregister_data(void)
			
 
				 {
			
 
				-	starpu_data_unpartition(_handle, 0);
			
 
				+	starpu_data_unpartition(_handle, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(_handle);
			
 
				 }
			
 
				 
			
--- a/examples/filters/fblock.c
+++ b/examples/filters/fblock.c
@@ -115,7 +115,7 @@ int main(int argc, char **argv)
 
				 #endif
			
 
				 
			
 
				         /* Declare data to StarPU */
			
 
				-        starpu_block_data_register(&handle, 0, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int));
			
 
				+        starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int));
			
 
				         FPRINTF(stderr, "IN  Block\n");
			
 
				         print_data(handle);
			
 
				 
			
@@ -159,7 +159,7 @@ int main(int argc, char **argv)
 
				         }
			
 
				 
			
 
				         /* Unpartition the data, unregister it from StarPU and shutdown */
			
 
				-        starpu_data_unpartition(handle, 0);
			
 
				+        starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				         print_data(handle);
			
 
				         starpu_data_unregister(handle);
			
 
				 
			
--- a/examples/filters/fmatrix.c
+++ b/examples/filters/fmatrix.c
@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Declare data to StarPU */
			
 
				-	starpu_matrix_data_register(&handle, 0, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
			
 
				+	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
			
 
				 
			
 
				         /* Partition the matrix in PARTS sub-matrices */
			
 
				 	struct starpu_data_filter f =
			
@@ -102,7 +102,7 @@ int main(int argc, char **argv)
 
				 	}
			
 
				 
			
 
				         /* Unpartition the data, unregister it from StarPU and shutdown */
			
 
				-	starpu_data_unpartition(handle, 0);
			
 
				+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				         starpu_data_unregister(handle);
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/filters/fvector.c
+++ b/examples/filters/fvector.c
@@ -63,7 +63,7 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Declare data to StarPU */
			
 
				-	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 
			
 
				         /* Partition the vector in PARTS sub-vectors */
			
 
				 	struct starpu_data_filter f =
			
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
 
				 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 	}
			
 
				 
			
 
				-	starpu_data_unpartition(handle, 0);
			
 
				+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				         starpu_data_unregister(handle);
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/examples/filters/shadow.c
+++ b/examples/filters/shadow.c
@@ -121,10 +121,10 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Declare source vector to StarPU */
			
 
				-	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
			
 
				 
			
 
				 	/* Declare destination vector to StarPU */
			
 
				-	starpu_vector_data_register(&handle2, 0, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
			
 
				 
			
 
				         /* Partition the source vector in PARTS sub-vectors with shadows */
			
 
				 	/* NOTE: the resulting handles should only be used in read-only mode,
			
@@ -163,8 +163,8 @@ int main(int argc, char **argv)
 
				 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 	}
			
 
				 
			
 
				-	starpu_data_unpartition(handle, 0);
			
 
				-	starpu_data_unpartition(handle2, 0);
			
 
				+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
			
 
				         starpu_data_unregister(handle);
			
 
				         starpu_data_unregister(handle2);
			
 
				 	starpu_shutdown();
			
--- a/examples/filters/shadow2d.c
+++ b/examples/filters/shadow2d.c
@@ -202,10 +202,10 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Declare source matrix to StarPU */
			
 
				-	starpu_matrix_data_register(&handle, 0, (uintptr_t)matrix, NX + 2*SHADOWX, NX + 2*SHADOWX, NY + 2*SHADOWY, sizeof(matrix[0][0]));
			
 
				+	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX + 2*SHADOWX, NX + 2*SHADOWX, NY + 2*SHADOWY, sizeof(matrix[0][0]));
			
 
				 
			
 
				 	/* Declare destination matrix to StarPU */
			
 
				-	starpu_matrix_data_register(&handle2, 0, (uintptr_t)matrix2, NX + PARTSX*2*SHADOWX, NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, sizeof(matrix2[0][0]));
			
 
				+	starpu_matrix_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)matrix2, NX + PARTSX*2*SHADOWX, NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, sizeof(matrix2[0][0]));
			
 
				 
			
 
				         /* Partition the source matrix in PARTSY*PARTSX sub-matrices with shadows */
			
 
				 	/* NOTE: the resulting handles should only be used in read-only mode,
			
@@ -258,8 +258,8 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_data_unpartition(handle, 0);
			
 
				-	starpu_data_unpartition(handle2, 0);
			
 
				+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
			
 
				         starpu_data_unregister(handle);
			
 
				         starpu_data_unregister(handle2);
			
 
				 	starpu_shutdown();
			
--- a/examples/filters/shadow3d.c
+++ b/examples/filters/shadow3d.c
@@ -214,13 +214,13 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Declare source matrix to StarPU */
			
 
				-	starpu_block_data_register(&handle, 0, (uintptr_t)matrix,
			
 
				+	starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix,
			
 
				 			NX + 2*SHADOWX, (NX + 2*SHADOWX) * (NY + 2*SHADOWY),
			
 
				 			NX + 2*SHADOWX, NY + 2*SHADOWY, NZ + 2*SHADOWZ,
			
 
				 			sizeof(matrix[0][0][0]));
			
 
				 
			
 
				 	/* Declare destination matrix to StarPU */
			
 
				-	starpu_block_data_register(&handle2, 0, (uintptr_t)matrix2,
			
 
				+	starpu_block_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)matrix2,
			
 
				 			NX + PARTSX*2*SHADOWX, (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY),
			
 
				 			NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, NZ + PARTSZ*2*SHADOWZ,
			
 
				 			sizeof(matrix2[0][0][0]));
			
@@ -290,8 +290,8 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_data_unpartition(handle, 0);
			
 
				-	starpu_data_unpartition(handle2, 0);
			
 
				+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
			
 
				         starpu_data_unregister(handle);
			
 
				         starpu_data_unregister(handle2);
			
 
				 	starpu_shutdown();
			
--- a/examples/heat/dw_factolu.c
+++ b/examples/heat/dw_factolu.c
@@ -750,7 +750,7 @@ void dw_factoLU(float *matA, unsigned size,
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, 
			
 
				 			size, size, sizeof(float));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
@@ -779,7 +779,7 @@ void dw_factoLU(float *matA, unsigned size,
 
				 	}
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	starpu_data_unregister(dataA);
			
 
				 
			
--- a/examples/heat/dw_factolu_grain.c
+++ b/examples/heat/dw_factolu_grain.c
@@ -213,7 +213,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 	 * (re)partition data
			
 
				 	 */
			
 
				 	starpu_data_handle_t dataA;
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	STARPU_ASSERT((size % blocksize) == 0);
			
 
				 	STARPU_ASSERT((inner_size % blocksize) == 0);
			
@@ -288,7 +288,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 	{
			
 
				 		/* we wait for the last task and we are done */
			
 
				 		starpu_tag_wait(TAG11(nblocks-1, tag_prefix));
			
 
				-		starpu_data_unpartition(dataA, 0);		
			
 
				+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);		
			
 
				 		return;
			
 
				 	}
			
 
				 	else
			
@@ -312,7 +312,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 
			
 
				 		free(tag_array);
			
 
				 
			
 
				-		starpu_data_unpartition(dataA, 0);
			
 
				+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 		starpu_data_unregister(dataA);
			
 
				 
			
 
				 		float *newmatA = &matA[inner_size*(ld+1)];
			
--- a/examples/heat/dw_factolu_tag.c
+++ b/examples/heat/dw_factolu_tag.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -297,7 +297,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -316,7 +316,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
				 	dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	starpu_data_unregister(dataA);
			
 
				 
			
--- a/examples/heat/dw_sparse_cg.c
+++ b/examples/heat/dw_sparse_cg.c
@@ -357,10 +357,10 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 
				 	starpu_data_handle_t ds_vecr, ds_vecd, ds_vecq;
			
 
				 
			
 
				 	/* first the user-allocated data */
			
 
				-	starpu_csr_data_register(&ds_matrixA, 0, nnz, nrow,
			
 
				+	starpu_csr_data_register(&ds_matrixA, STARPU_MAIN_RAM, nnz, nrow,
			
 
				 			(uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float));
			
 
				-	starpu_vector_data_register(&ds_vecx, 0, (uintptr_t)vecx, nrow, sizeof(float));
			
 
				-	starpu_vector_data_register(&ds_vecb, 0, (uintptr_t)vecb, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecx, STARPU_MAIN_RAM, (uintptr_t)vecx, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecb, STARPU_MAIN_RAM, (uintptr_t)vecb, nrow, sizeof(float));
			
 
				 
			
 
				 	/* then allocate the algorithm intern data */
			
 
				 	float *ptr_vecr, *ptr_vecd, *ptr_vecq;
			
@@ -380,9 +380,9 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 
				 	FPRINTF(stdout, "nrow = %u \n", nrow);
			
 
				 
			
 
				 	/* and register them as well */
			
 
				-	starpu_vector_data_register(&ds_vecr, 0, (uintptr_t)ptr_vecr, nrow, sizeof(float));
			
 
				-	starpu_vector_data_register(&ds_vecd, 0, (uintptr_t)ptr_vecd, nrow, sizeof(float));
			
 
				-	starpu_vector_data_register(&ds_vecq, 0, (uintptr_t)ptr_vecq, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecr, STARPU_MAIN_RAM, (uintptr_t)ptr_vecr, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecd, STARPU_MAIN_RAM, (uintptr_t)ptr_vecd, nrow, sizeof(float));
			
 
				+	starpu_vector_data_register(&ds_vecq, STARPU_MAIN_RAM, (uintptr_t)ptr_vecq, nrow, sizeof(float));
			
 
				 
			
 
				 	/* we now have the complete problem */
			
 
				 	struct cg_problem problem;
			
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
 
				 	float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f};
			
 
				 
			
 
				 	starpu_data_handle_t float_array_handle;
			
 
				-	starpu_vector_data_register(&float_array_handle, 0 /* home node */,
			
 
				+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */,
			
 
				 			(uintptr_t)&float_array, 4, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
--- a/examples/interface/complex.c
+++ b/examples/interface/complex.c
@@ -92,8 +92,8 @@ int main(int argc, char **argv)
 
				 						  &opencl_program, NULL);
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				 #endif
			
 
				-	starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
			
 
				-	starpu_complex_data_register(&handle2, 0, &copy_real, &copy_imaginary, 1);
			
 
				+	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
			
 
				+	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
			
 
				 
			
 
				 	ret = starpu_insert_task(&cl_display, STARPU_VALUE, "handle1", strlen("handle1"), STARPU_R, handle1, 0);
			
 
				 	if (ret == -ENODEV) goto end;
			
--- a/examples/interface/complex_interface.c
+++ b/examples/interface/complex_interface.c
@@ -21,7 +21,7 @@
 
				 double *starpu_complex_get_real(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	struct starpu_complex_interface *complex_interface =
			
 
				-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
			
 
				+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	return complex_interface->real;
			
 
				 }
			
@@ -29,7 +29,7 @@ double *starpu_complex_get_real(starpu_data_handle_t handle)
 
				 double *starpu_complex_get_imaginary(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	struct starpu_complex_interface *complex_interface =
			
 
				-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
			
 
				+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	return complex_interface->imaginary;
			
 
				 }
			
@@ -37,7 +37,7 @@ double *starpu_complex_get_imaginary(starpu_data_handle_t handle)
 
				 int starpu_complex_get_nx(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	struct starpu_complex_interface *complex_interface =
			
 
				-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
			
 
				+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	return complex_interface->nx;
			
 
				 }
			
--- a/examples/lu/xlu.c
+++ b/examples/lu/xlu.c
@@ -249,7 +249,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
@@ -271,7 +271,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 	int ret = dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(dataA);
			
 
				 
			
 
				 	return ret;
			
--- a/examples/lu/xlu_implicit.c
+++ b/examples/lu/xlu_implicit.c
@@ -152,7 +152,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -171,7 +171,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 	int ret = dw_codelet_facto_v3(dataA, nblocks);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(dataA);
			
 
				 	return ret;
			
 
				 }
			
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -206,7 +206,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	struct starpu_data_filter f =
			
 
				 	{
			
@@ -246,7 +246,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(dataA);
			
 
				 
			
 
				 	free(piv_description);
			
@@ -270,7 +270,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
			
 
				+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
			
 
				 			size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 	}
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -338,7 +338,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 
			
 
				 	/* monitor and partition the A matrix into blocks :
			
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
@@ -390,7 +390,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				 
			
 
				 	/* gather all the data */
			
 
				-	starpu_data_unpartition(dataA, 0);
			
 
				+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
 
				 	free(piv_description);
			
 
				 
			
 
				 	return ret;
			
@@ -413,7 +413,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
				 	for (bj = 0; bj < nblocks; bj++)
			
 
				 	for (bi = 0; bi < nblocks; bi++)
			
 
				 	{
			
 
				-		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
			
 
				+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
			
 
				 			size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 
			
--- a/examples/mandelbrot/mandelbrot.c
+++ b/examples/mandelbrot/mandelbrot.c
@@ -501,7 +501,7 @@ int main(int argc, char **argv)
 
				 	for (iby = 0; iby < nblocks; iby++)
			
 
				 	{
			
 
				 		unsigned *data = &buffer[iby*block_size*width];
			
 
				-		starpu_vector_data_register(&block_handles[iby], 0,
			
 
				+		starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM,
			
 
				                         (uintptr_t)data, block_size*width, sizeof(unsigned));
			
 
				 	}
			
 
				 
			
--- a/examples/matvecmult/matvecmult.c
+++ b/examples/matvecmult/matvecmult.c
@@ -186,9 +186,9 @@ int main(int argc, char **argv)
 
				         fillArray(mult, height);
			
 
				         matVecMult(matrix, vector, width, height, correctResult);
			
 
				 
			
 
				-	starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, width, sizeof(float));
			
 
				-	starpu_vector_data_register(&mult_handle, 0, (uintptr_t)mult, height, sizeof(float));
			
 
				+	starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, width, sizeof(float));
			
 
				+	starpu_vector_data_register(&mult_handle, STARPU_MAIN_RAM, (uintptr_t)mult, height, sizeof(float));
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         ret = starpu_opencl_load_opencl_from_file("examples/matvecmult/matvecmult_kernel.cl", &opencl_code, NULL);
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -110,11 +110,11 @@ static void init_problem_data(void)
 
				 
			
 
				 static void partition_mult_data(void)
			
 
				 {
			
 
				-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A,
			
 
				+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
			
 
				 		ydim, ydim, zdim, sizeof(TYPE));
			
 
				-	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B,
			
 
				+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
			
 
				 		zdim, zdim, xdim, sizeof(TYPE));
			
 
				-	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C,
			
 
				+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
			
 
				 		ydim, ydim, xdim, sizeof(TYPE));
			
 
				 
			
 
				 	struct starpu_data_filter vert;
			
@@ -346,9 +346,9 @@ int main(int argc, char **argv)
 
				 	FPRINTF(stderr, "GFlop/s: %.2f\n", flops/timing/1000.0);
			
 
				 
			
 
				 enodev:
			
 
				-	starpu_data_unpartition(C_handle, 0);
			
 
				-	starpu_data_unpartition(B_handle, 0);
			
 
				-	starpu_data_unpartition(A_handle, 0);
			
 
				+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	starpu_data_unregister(A_handle);
			
 
				 	starpu_data_unregister(B_handle);
			
--- a/examples/openmp/vector_scal_omp.c
+++ b/examples/openmp/vector_scal_omp.c
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	starpu_data_handle_t vector_handle;
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 
			
 
				 	float factor = 1.001;
			
 
				 
			
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -135,13 +135,13 @@ int main(int argc, char **argv)
 
				 
			
 
				 	/* Any worker may use that array now */
			
 
				 	starpu_data_handle_t sobol_qrng_direction_handle;
			
 
				-	starpu_vector_data_register(&sobol_qrng_direction_handle, 0,
			
 
				+	starpu_vector_data_register(&sobol_qrng_direction_handle, STARPU_MAIN_RAM,
			
 
				 		(uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned));
			
 
				 
			
 
				 	unsigned *cnt_array = malloc(ntasks*sizeof(unsigned));
			
 
				 	STARPU_ASSERT(cnt_array);
			
 
				 	starpu_data_handle_t cnt_array_handle;
			
 
				-	starpu_vector_data_register(&cnt_array_handle, 0, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
			
 
				+	starpu_vector_data_register(&cnt_array_handle, STARPU_MAIN_RAM, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
			
 
				 
			
 
				 	/* Use a write-through policy : when the data is modified on an
			
 
				 	 * accelerator, we know that it will only be modified once and be
			
@@ -179,7 +179,7 @@ int main(int argc, char **argv)
 
				 	starpu_task_wait_for_all();
			
 
				 
			
 
				 	/* Get the cnt_array back in main memory */
			
 
				-	starpu_data_unpartition(cnt_array_handle, 0);
			
 
				+	starpu_data_unpartition(cnt_array_handle, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(cnt_array_handle);
			
 
				 	starpu_data_unregister(sobol_qrng_direction_handle);
			
 
				 
			
--- a/examples/pi/pi_redux.c
+++ b/examples/pi/pi_redux.c
@@ -334,7 +334,7 @@ int main(int argc, char **argv)
 
				 	 * [-1,1]^2. */
			
 
				 	unsigned long shot_cnt = 0;
			
 
				 	starpu_data_handle_t shot_cnt_handle;
			
 
				-	starpu_variable_data_register(&shot_cnt_handle, 0,
			
 
				+	starpu_variable_data_register(&shot_cnt_handle, STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&shot_cnt, sizeof(shot_cnt));
			
 
				 
			
 
				 	starpu_data_set_reduction_methods(shot_cnt_handle,
			
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -159,39 +159,39 @@ int main(int argc, char **argv)
 
				 	for (frame = 0; frame < nframes; frame++)
			
 
				 	{
			
 
				 		/* register Y layer */
			
 
				-		starpu_matrix_data_register(&frame_y_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].y,
			
 
				 			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
			
 
				 
			
 
				 		starpu_data_partition(frame_y_handle[frame], &filter_y);
			
 
				 
			
 
				-		starpu_matrix_data_register(&new_frame_y_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].y,
			
 
				 			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
			
 
				 
			
 
				 		starpu_data_partition(new_frame_y_handle[frame], &filter_y);
			
 
				 
			
 
				 		/* register U layer */
			
 
				-		starpu_matrix_data_register(&frame_u_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].u,
			
 
				 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				 		starpu_data_partition(frame_u_handle[frame], &filter_uv);
			
 
				 
			
 
				-		starpu_matrix_data_register(&new_frame_u_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].u,
			
 
				 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				 		starpu_data_partition(new_frame_u_handle[frame], &filter_uv);
			
 
				 
			
 
				 		/* register V layer */
			
 
				-		starpu_matrix_data_register(&frame_v_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_in_buffer[frame].v,
			
 
				 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
 
				 		starpu_data_partition(frame_v_handle[frame], &filter_uv);
			
 
				 
			
 
				-		starpu_matrix_data_register(&new_frame_v_handle[frame], 0,
			
 
				+		starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&yuv_out_buffer[frame].v,
			
 
				 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
			
 
				 
			
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -370,13 +370,13 @@ int main(int argc, char **argv)
 
				 	unsigned block;
			
 
				 	for (block = 0; block < _nblocks; block++)
			
 
				 	{
			
 
				-		starpu_vector_data_register(&_x_handles[block], 0,
			
 
				+		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float));
			
 
				-		starpu_vector_data_register(&_y_handles[block], 0,
			
 
				+		starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float));
			
 
				 	}
			
 
				 
			
 
				-	starpu_variable_data_register(&_dot_handle, 0, (uintptr_t)&_dot, sizeof(DOT_TYPE));
			
 
				+	starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE));
			
 
				 
			
 
				 	/*
			
 
				 	 *	Compute dot product with StarPU
			
--- a/examples/reductions/minmax_reduction.c
+++ b/examples/reductions/minmax_reduction.c
@@ -161,7 +161,7 @@ int main(int argc, char **argv)
 
				 	for (block = 0; block < _nblocks; block++)
			
 
				 	{
			
 
				 		uintptr_t block_start = (uintptr_t)&_x[_entries_per_bock*block];
			
 
				-		starpu_vector_data_register(&_x_handles[block], 0, block_start,
			
 
				+		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, block_start,
			
 
				 					    _entries_per_bock, sizeof(TYPE));
			
 
				 	}
			
 
				 
			
@@ -171,7 +171,7 @@ int main(int argc, char **argv)
 
				 	/* Initialize current max */
			
 
				 	_minmax[1] = TYPE_MIN;
			
 
				 
			
 
				-	starpu_variable_data_register(&_minmax_handle, 0, (uintptr_t)_minmax, 2*sizeof(TYPE));
			
 
				+	starpu_variable_data_register(&_minmax_handle, STARPU_MAIN_RAM, (uintptr_t)_minmax, 2*sizeof(TYPE));
			
 
				 
			
 
				 	/* Set the methods to define neutral elements and to perform the reduction operation */
			
 
				 	starpu_data_set_reduction_methods(_minmax_handle, &minmax_redux_codelet, &minmax_init_codelet);
			
--- a/examples/spmd/vector_scal_spmd.c
+++ b/examples/spmd/vector_scal_spmd.c
@@ -114,7 +114,7 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	starpu_data_handle_t vector_handle;
			
 
				-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
			
 
				 
			
 
				 	float factor = 1.001;
			
 
				 
			
--- a/examples/spmv/dw_block_spmv.c
+++ b/examples/spmv/dw_block_spmv.c
@@ -47,7 +47,7 @@ void create_data(void)
 
				 	bcsr_matrix = mm_file_to_bcsr(inputfile, c, r);
			
 
				 
			
 
				 	/* declare the corresponding block CSR to the runtime */
			
 
				-	starpu_bcsr_data_register(&sparse_matrix, 0, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
			
 
				+	starpu_bcsr_data_register(&sparse_matrix, STARPU_MAIN_RAM, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
			
 
				 	                (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, 
			
 
				 			0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float));
			
 
				 
			
@@ -69,16 +69,16 @@ void create_data(void)
 
				 		vector_out_ptr[ind] = 0.0f;
			
 
				 	}
			
 
				 
			
 
				-	starpu_vector_data_register(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				-	starpu_vector_data_register(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				 }
			
 
				 
			
 
				 void unregister_data(void)
			
 
				 {
			
 
				-	starpu_data_unpartition(sparse_matrix, 0);
			
 
				+	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(sparse_matrix);
			
 
				 
			
 
				-	starpu_data_unpartition(vector_in, 0);
			
 
				+	starpu_data_unpartition(vector_in, STARPU_MAIN_RAM);
			
 
				 	starpu_data_unregister(vector_in);
			
 
				 
			
 
				 	starpu_data_unregister(vector_out);
			
@@ -98,8 +98,8 @@ void init_problem_callback(void *arg)
 
				 		printf("DONE ...\n");
			
 
				 		gettimeofday(&end, NULL);
			
 
				 
			
 
				-/*		starpu_data_unpartition(sparse_matrix, 0); */
			
 
				-		starpu_data_unpartition(vector_out, 0);
			
 
				+/*		starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); */
			
 
				+		starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);
			
 
				 
			
 
				 		sem_post(&sem);
			
 
				 	}
			
--- a/examples/spmv/spmv.c
+++ b/examples/spmv/spmv.c
@@ -192,9 +192,9 @@ int main(int argc, char **argv)
 
				 	/*
			
 
				 	 *	Register the CSR matrix and the 2 vectors
			
 
				 	 */
			
 
				-	starpu_csr_data_register(&sparse_matrix, 0, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
			
 
				-	starpu_vector_data_register(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				-	starpu_vector_data_register(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				+	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
			
 
				+	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));
			
 
				 
			
 
				 	/*
			
 
				 	 *	Partition the CSR matrix and the output vector
			
@@ -239,8 +239,8 @@ int main(int argc, char **argv)
 
				 	/*
			
 
				 	 *	Unregister the CSR matrix and the output vector
			
 
				 	 */
			
 
				-	starpu_data_unpartition(sparse_matrix, 0);
			
 
				-	starpu_data_unpartition(vector_out, 0);
			
 
				+	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
			
 
				+	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);
			
 
				 
			
 
				 	/*
			
 
				 	 *	Unregister data
			
--- a/examples/stencil/stencil-blocks.c
+++ b/examples/stencil/stencil-blocks.c
@@ -262,7 +262,7 @@ static void allocate_block_on_node(starpu_data_handle_t *handleptr, TYPE **ptr,
 
				 	memset(*ptr, 0, block_size);
			
 
				 
			
 
				 	/* Register it to StarPU */
			
 
				-	starpu_block_data_register(handleptr, 0, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
			
 
				+	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
			
 
				 }
			
 
				 
			
 
				 void display_memory_consumption(int rank)
			
--- a/gcc-plugin/tests/output-pointer.c
+++ b/gcc-plugin/tests/output-pointer.c
@@ -82,7 +82,7 @@ main (int argc, char *argv[])
 
				   expected_register_arguments.pointer = x;
			
 
				   expected_register_arguments.elements = 42;
			
 
				   expected_register_arguments.element_size = sizeof x[0];
			
 
				-  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 42, sizeof x[0]);
			
 
				+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) x, 42, sizeof x[0]);
			
 
				 
			
 
				   struct insert_task_argument expected[] =
			
 
				     {
			
--- a/gcc-plugin/tests/pointers.c
+++ b/gcc-plugin/tests/pointers.c
@@ -85,12 +85,12 @@ main (int argc, char *argv[])
 
				   expected_register_arguments.pointer = x;
			
 
				   expected_register_arguments.elements = 1;
			
 
				   expected_register_arguments.element_size = sizeof x[0];
			
 
				-  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 1, sizeof x[0]);
			
 
				+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) x, 1, sizeof x[0]);
			
 
				 
			
 
				   expected_register_arguments.pointer = y;
			
 
				   expected_register_arguments.elements = 1;
			
 
				   expected_register_arguments.element_size = sizeof *y;
			
 
				-  starpu_vector_data_register (&handle, 0, (uintptr_t) y, 1, sizeof *y);
			
 
				+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) y, 1, sizeof *y);
			
 
				 
			
 
				   struct insert_task_argument expected_pointer_task[] =
			
 
				     {
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -45,6 +45,7 @@ typedef UINT_PTR uintptr_t;
 
				 #include <starpu_thread_util.h>
			
 
				 #include <starpu_util.h>
			
 
				 #include <starpu_data.h>
			
 
				+#include <starpu_disk.h>
			
 
				 #include <starpu_data_interfaces.h>
			
 
				 #include <starpu_data_filters.h>
			
 
				 #include <starpu_stdlib.h>
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -90,15 +90,19 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
 
				 
			
 
				 int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				 
			
 
				+#define STARPU_MAIN_RAM 0
			
 
				+
			
 
				 enum starpu_node_kind
			
 
				 {
			
 
				 	STARPU_UNUSED     = 0x00,
			
 
				 	STARPU_CPU_RAM    = 0x01,
			
 
				 	STARPU_CUDA_RAM   = 0x02,
			
 
				 	STARPU_OPENCL_RAM = 0x03,
			
 
				+	STARPU_DISK_RAM   = 0x04,
			
 
				 	STARPU_MIC_RAM    = 0x05,
			
 
				 	STARPU_SCC_RAM    = 0x06,
			
 
				 	STARPU_SCC_SHM    = 0x07
			
 
				+
			
 
				 };
			
 
				 
			
 
				 unsigned starpu_worker_get_memory_node(unsigned workerid);
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -101,24 +101,24 @@ enum starpu_data_interface_id
 
				 
			
 
				 struct starpu_data_interface_ops
			
 
				 {
			
 
				-	void (*register_data_handle)(starpu_data_handle_t handle,
			
 
				-				     unsigned home_node, void *data_interface);
			
 
				-	starpu_ssize_t (*allocate_data_on_node)(void *data_interface, unsigned node);
			
 
				-	void (*free_data_on_node)(void *data_interface, unsigned node);
			
 
				+	void		 (*register_data_handle)	(starpu_data_handle_t handle,
			
 
				+								unsigned home_node, void *data_interface);
			
 
				+	starpu_ssize_t	 (*allocate_data_on_node)	(void *data_interface, unsigned node);
			
 
				+	void 		 (*free_data_on_node)		(void *data_interface, unsigned node);
			
 
				 	const struct starpu_data_copy_methods *copy_methods;
			
 
				-	void * (*handle_to_pointer)(starpu_data_handle_t handle, unsigned node);
			
 
				-	size_t (*get_size)(starpu_data_handle_t handle);
			
 
				-	uint32_t (*footprint)(starpu_data_handle_t handle);
			
 
				-	int (*compare)(void *data_interface_a, void *data_interface_b);
			
 
				-	void (*display)(starpu_data_handle_t handle, FILE *f);
			
 
				+	void * 		 (*handle_to_pointer)		(starpu_data_handle_t handle, unsigned node);
			
 
				+	size_t 		 (*get_size)			(starpu_data_handle_t handle);
			
 
				+	uint32_t 	 (*footprint)			(starpu_data_handle_t handle);
			
 
				+	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
			
 
				+	void 		 (*display)			(starpu_data_handle_t handle, FILE *f);
			
 
				 	enum starpu_data_interface_id interfaceid;
			
 
				 	size_t interface_size;
			
 
				 
			
 
				 	int is_multiformat;
			
 
				 	struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
			
 
				 
			
 
				-	int (*pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
			
 
				-	int (*unpack_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
			
 
				+	int (*pack_data) (starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
			
 
				+	int (*unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
			
 
				 };
			
 
				 
			
 
				 int starpu_data_interface_get_next_id(void);
			
--- a/include/starpu_disk.h
+++ b/include/starpu_disk.h
@@ -0,0 +1,50 @@
 
				+
			
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013 Corentin Salingue
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DISK_H__
			
 
				+#define __STARPU_DISK_H__
			
 
				+
			
 
				+/* list of functions to use on disk */
			
 
				+struct starpu_disk_ops {
			
 
				+ 	 void *  (*alloc)  (void *base, size_t size);
			
 
				+	 void    (*free)   (void *base, void *obj, size_t size);
			
 
				+	 void *  (*open)   (void *base, void *pos, size_t size);     /* open an existing file */
			
 
				+	 void    (*close)  (void *base, void *obj, size_t size);
			
 
				+	ssize_t  (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);        /* ~= pread */
			
 
				+	ssize_t  (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size); 
			
 
				+	/* readv, writev, read2d, write2d, etc. */
			
 
				+	 void *  (*plug)   (void *parameter);
			
 
				+	 void    (*unplug) (void *base);
			
 
				+	  int    (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
			
 
				+	  int    (*bandwidth) (unsigned node);
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/* Posix functions to use disk memory */
			
 
				+extern struct starpu_disk_ops starpu_disk_stdio_ops;
			
 
				+extern struct starpu_disk_ops starpu_disk_unistd_ops;
			
 
				+extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
			
 
				+
			
 
				+/*functions to add an existing memory */
			
 
				+void starpu_disk_close(unsigned node, void *obj, size_t size);
			
 
				+
			
 
				+void * starpu_disk_open(unsigned node, void *pos, size_t size);
			
 
				+
			
 
				+/* interface to create and to free a memory disk */
			
 
				+int starpu_disk_register(struct starpu_disk_ops * func, void *parameter, size_t size);
			
 
				+
			
 
				+#endif /* __STARPU_DISK_H__ */
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -82,10 +82,10 @@ extern "C"
 
				 #else
			
 
				 #  if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS)
			
 
				 #    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) *(int*)NULL = 0; } while(0)
			
 
				-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; }} while(0)
			
 
				+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; }} while(0)
			
 
				 #  else
			
 
				 #    define STARPU_ASSERT(x)		assert(x)
			
 
				-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); } ; assert(x); } while(0)
			
 
				+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); } ; assert(x); } while(0)
			
 
				 
			
 
				 #  endif
			
 
				 #endif
			
--- a/mpi/examples/complex/mpi_complex.c
+++ b/mpi/examples/complex/mpi_complex.c
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 
				 			imaginary[1] = 0.0;
			
 
				 		}
			
 
				 
			
 
				-		starpu_complex_data_register(&handle, 0, real, imaginary, 2);
			
 
				+		starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2);
			
 
				 		starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);
			
 
				 
			
 
				 		if (rank == 0)
			
--- a/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
+++ b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
@@ -85,7 +85,7 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 
				 			if (mpi_rank == rank)
			
 
				 			{
			
 
				 				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
			
 
				-				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)matA[x][y],
			
 
				+				starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)matA[x][y],
			
 
				 						ld, size/nblocks, size/nblocks, sizeof(float));
			
 
				 			}
			
 
				 #warning TODO: make better test to only register what is needed
			
--- a/mpi/examples/mpi_lu/plu_example.c
+++ b/mpi/examples/mpi_lu/plu_example.c
@@ -242,7 +242,7 @@ static void init_matrix(int rank)
 
				 				}
			
 
				 
			
 
				 				/* Register it to StarPU */
			
 
				-				starpu_matrix_data_register(handleptr, 0,
			
 
				+				starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM,
			
 
				 					(uintptr_t)*blockptr, size/nblocks,
			
 
				 					size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 			}
			
@@ -261,7 +261,7 @@ static void init_matrix(int rank)
 
				 #ifdef SINGLE_TMP11
			
 
				 	starpu_malloc((void **)&tmp_11_block, blocksize);
			
 
				 	allocated_memory_extra += blocksize;
			
 
				-	starpu_matrix_data_register(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
			
 
				+	starpu_matrix_data_register(&tmp_11_block_handle, STARPU_MAIN_RAM, (uintptr_t)tmp_11_block,
			
 
				 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 #else
			
 
				 	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
			
@@ -276,7 +276,7 @@ static void init_matrix(int rank)
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_11_block[k]);
			
 
				 
			
 
				-			starpu_matrix_data_register(&tmp_11_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_11_block_handles[k], STARPU_MAIN_RAM,
			
 
				 				(uintptr_t)tmp_11_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -311,7 +311,7 @@ static void init_matrix(int rank)
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_12_block[k]);
			
 
				 
			
 
				-			starpu_matrix_data_register(&tmp_12_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_12_block_handles[k], STARPU_MAIN_RAM,
			
 
				 				(uintptr_t)tmp_12_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -322,7 +322,7 @@ static void init_matrix(int rank)
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_21_block[k]);
			
 
				 
			
 
				-			starpu_matrix_data_register(&tmp_21_block_handles[k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_21_block_handles[k], STARPU_MAIN_RAM,
			
 
				 				(uintptr_t)tmp_21_block[k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -334,7 +334,7 @@ static void init_matrix(int rank)
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_12_block[i][k]);
			
 
				 
			
 
				-			starpu_matrix_data_register(&tmp_12_block_handles[i][k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_12_block_handles[i][k], STARPU_MAIN_RAM,
			
 
				 				(uintptr_t)tmp_12_block[i][k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
@@ -345,7 +345,7 @@ static void init_matrix(int rank)
 
				 			allocated_memory_extra += blocksize;
			
 
				 			STARPU_ASSERT(tmp_21_block[i][k]);
			
 
				 
			
 
				-			starpu_matrix_data_register(&tmp_21_block_handles[i][k], 0,
			
 
				+			starpu_matrix_data_register(&tmp_21_block_handles[i][k], STARPU_MAIN_RAM,
			
 
				 				(uintptr_t)tmp_21_block[i][k],
			
 
				 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
			
 
				 		}
			
--- a/mpi/examples/stencil/stencil5.c
+++ b/mpi/examples/stencil/stencil5.c
@@ -108,7 +108,7 @@ int main(int argc, char **argv)
 
				 			if (mpi_rank == my_rank)
			
 
				 			{
			
 
				 				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
			
 
				-				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
			
 
				+				starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
			
 
				 			}
			
 
				 			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
			
 
				 				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
			
--- a/mpi/include/starpu_mpi.h
+++ b/mpi/include/starpu_mpi.h
@@ -40,6 +40,8 @@ int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
 
				 int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
			
 
				 int starpu_mpi_barrier(MPI_Comm comm);
			
 
				 
			
 
				+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
			
 
				+
			
 
				 int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
			
 
				 int starpu_mpi_initialize(void) STARPU_DEPRECATED;
			
 
				 int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -32,8 +32,12 @@ static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type
 
				 #endif
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
			
 
				 							int dest, int mpi_tag, MPI_Comm comm,
			
 
				-							unsigned detached, void (*callback)(void *), void *arg);
			
 
				-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg);
			
 
				+							unsigned detached, void (*callback)(void *), void *arg,
			
 
				+							int sequential_consistency);
			
 
				+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle,
			
 
				+							int source, int mpi_tag, MPI_Comm comm,
			
 
				+							unsigned detached, void (*callback)(void *), void *arg,
			
 
				+							int sequential_consistency);
			
 
				 static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req);
			
 
				 
			
 
				 /* The list of requests that have been newly submitted by the application */
			
@@ -73,6 +77,7 @@ struct _starpu_mpi_copy_handle
 
				  /********************************************************/
			
 
				 
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_req_hashmap = NULL;
			
 
				+/** stores data which have been received by MPI but have not been requested by the application */
			
 
				 static struct _starpu_mpi_copy_handle *_starpu_mpi_copy_handle_hashmap = NULL;
			
 
				 
			
 
				 static struct _starpu_mpi_req* find_req(int mpi_tag)
			
@@ -99,7 +104,7 @@ static void add_req(struct _starpu_mpi_req *req)
 
				 	{
			
 
				 		_STARPU_MPI_DEBUG(3, "Error add_req : request %p with tag %d already in the hashmap. \n", req, req->mpi_tag);
			
 
				 		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
			
 
				-		if (seq_const)
			
 
				+		if (seq_const &&  req->sequential_consistency)
			
 
				 		{
			
 
				 			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
			
 
				 		}
			
@@ -213,6 +218,7 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 	req->internal_req = NULL;
			
 
				 	req->is_internal_req = 0;
			
 
				 	req->envelope = NULL;
			
 
				+	req->sequential_consistency = 1;
			
 
				  }
			
 
				 
			
 
				  /********************************************************/
			
@@ -225,7 +231,8 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 							       int srcdst, int mpi_tag, MPI_Comm comm,
			
 
				 							       unsigned detached, void (*callback)(void *), void *arg,
			
 
				 							       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				-							       enum starpu_data_access_mode mode)
			
 
				+							       enum starpu_data_access_mode mode,
			
 
				+							       int sequential_consistency)
			
 
				  {
			
 
				 
			
 
				 	 _STARPU_MPI_LOG_IN();
			
@@ -245,11 +252,12 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 	 req->callback = callback;
			
 
				 	 req->callback_arg = arg;
			
 
				 	 req->func = func;
			
 
				+	 req->sequential_consistency = sequential_consistency;
			
 
				 
			
 
				 	 /* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	  * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
			
 
				 	  * the request is actually submitted */
			
 
				-	 starpu_data_acquire_cb(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req);
			
 
				+	 starpu_data_acquire_cb_sequential_consistency(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req, sequential_consistency);
			
 
				 
			
 
				 	 _STARPU_MPI_LOG_OUT();
			
 
				 	 return req;
			
@@ -343,9 +351,10 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
			
 
				 							int dest, int mpi_tag, MPI_Comm comm,
			
 
				-							unsigned detached, void (*callback)(void *), void *arg)
			
 
				+							unsigned detached, void (*callback)(void *), void *arg,
			
 
				+							int sequential_consistency)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
			
@@ -354,7 +363,7 @@ int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 
				 	STARPU_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
			
 
				 
			
 
				 	struct _starpu_mpi_req *req;
			
 
				-	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL);
			
 
				+	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL, 1);
			
 
				 
			
 
				 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common");
			
 
				 	*public_req = req;
			
@@ -367,7 +376,7 @@ int starpu_mpi_isend_detached(starpu_data_handle_t data_handle,
 
				 			      int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				-	_starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 1, callback, arg);
			
 
				+	_starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 1, callback, arg, 1);
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return 0;
			
@@ -420,9 +429,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
			
 
				+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
			
@@ -438,7 +447,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 
				 		starpu_data_set_tag(data_handle, mpi_tag);
			
 
				 
			
 
				 	struct _starpu_mpi_req *req;
			
 
				-	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL);
			
 
				+	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1);
			
 
				 
			
 
				 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
			
 
				 	*public_req = req;
			
@@ -458,7 +467,15 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 
				 	if (tag == -1)
			
 
				 		starpu_data_set_tag(data_handle, mpi_tag);
			
 
				 
			
 
				-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg);
			
 
				+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1);
			
 
				+	_STARPU_MPI_LOG_OUT();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				+{
			
 
				+	_STARPU_MPI_LOG_IN();
			
 
				+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency);
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return 0;
			
@@ -1230,7 +1247,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 					add_chandle(chandle);
			
 
				 
			
 
				 					_STARPU_MPI_DEBUG(3, "Posting internal detached irecv on copy_handle with tag %d from src %d ..\n", chandle->mpi_tag, status.MPI_SOURCE);
			
 
				-					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL);
			
 
				+					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1);
			
 
				 					chandle->req->is_internal_req = 1;
			
 
				 
			
 
				 					// We wait until the request is pushed in the
			
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -144,6 +144,8 @@ LIST_TYPE(_starpu_mpi_req,
 
				 
			
 
				 	int is_internal_req;
			
 
				 	struct _starpu_mpi_req *internal_req;
			
 
				+
			
 
				+	int sequential_consistency;
			
 
				 );
			
 
				 
			
 
				 #ifdef __cplusplus
			
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -77,6 +77,7 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
 
				 ########################
			
 
				 
			
 
				 starpu_mpi_TESTS =				\
			
 
				+	datatypes				\
			
 
				 	pingpong				\
			
 
				 	mpi_test				\
			
 
				 	mpi_isend				\
			
@@ -104,6 +105,7 @@ starpu_mpi_TESTS =				\
 
				 	user_defined_datatype
			
 
				 
			
 
				 noinst_PROGRAMS =				\
			
 
				+	datatypes				\
			
 
				 	pingpong				\
			
 
				 	mpi_test				\
			
 
				 	mpi_isend				\
			
@@ -146,6 +148,8 @@ mpi_detached_tag_LDADD =				\
 
				 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 mpi_redux_LDADD =					\
			
 
				 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
 
				+datatypes_LDADD =					\
			
 
				+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 pingpong_LDADD =					\
			
 
				 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 mpi_test_LDADD =					\
			
--- a/mpi/tests/block_interface.c
+++ b/mpi/tests/block_interface.c
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 
				 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
			
 
				 		}
			
 
				 
			
 
				-		starpu_block_data_register(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
@@ -77,7 +77,7 @@ int main(int argc, char **argv)
 
				 		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
			
 
				 		assert(block);
			
 
				 
			
 
				-		starpu_block_data_register(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)block, SIZE, SIZE*SIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
--- a/mpi/tests/block_interface_pinned.c
+++ b/mpi/tests/block_interface_pinned.c
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
 
				 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
			
 
				 		}
			
 
				 
			
 
				-		starpu_block_data_register(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
 
				 			SIZE*SIZE*SIZE*sizeof(float));
			
 
				 		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
			
 
				 
			
 
				-		starpu_block_data_register(&block_handle, 0,
			
 
				+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
			
 
				 			(uintptr_t)block, SIZE, SIZE*SIZE,
			
 
				 			SIZE, SIZE, SIZE, sizeof(float));
			
 
				 	}
			
--- a/mpi/tests/datatypes.c
+++ b/mpi/tests/datatypes.c
@@ -0,0 +1,310 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013  Centre National de la Recherche Scientifique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu_mpi.h>
			
 
				+#include <stdlib.h>
			
 
				+#include "helper.h"
			
 
				+
			
 
				+typedef void (*check_func)(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error);
			
 
				+
			
 
				+void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
			
 
				+{
			
 
				+	int ret;
			
 
				+	float *v_s, *v_r;
			
 
				+
			
 
				+	STARPU_ASSERT(starpu_variable_get_elemsize(handle_s) == starpu_variable_get_elemsize(handle_r));
			
 
				+
			
 
				+	v_s = (float *)starpu_variable_get_local_ptr(handle_s);
			
 
				+	v_r = (float *)starpu_variable_get_local_ptr(handle_r);
			
 
				+
			
 
				+	if (*v_s == *v_r)
			
 
				+	{
			
 
				+		FPRINTF_MPI("Success with variable value: %f == %f\n", *v_s, *v_r);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		*error = 1;
			
 
				+		FPRINTF_MPI("Error with variable value: %f != %f\n", *v_s, *v_r);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void check_vector(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
			
 
				+{
			
 
				+	int ret, i;
			
 
				+	int nx;
			
 
				+	int *v_r, *v_s;
			
 
				+
			
 
				+	STARPU_ASSERT(starpu_vector_get_elemsize(handle_s) == starpu_vector_get_elemsize(handle_r));
			
 
				+	STARPU_ASSERT(starpu_vector_get_nx(handle_s) == starpu_vector_get_nx(handle_r));
			
 
				+
			
 
				+	nx = starpu_vector_get_nx(handle_r);
			
 
				+	v_r = (int *)starpu_vector_get_local_ptr(handle_r);
			
 
				+	v_s = (int *)starpu_vector_get_local_ptr(handle_s);
			
 
				+
			
 
				+	for(i=0 ; i<nx ; i++)
			
 
				+	{
			
 
				+		if (v_s[i] == v_r[i])
			
 
				+		{
			
 
				+			FPRINTF_MPI("Success with vector[%d] value: %d == %d\n", i, v_s[i], v_r[i]);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			*error = 1;
			
 
				+			FPRINTF_MPI("Error with vector[%d] value: %d != %d\n", i, v_s[i], v_r[i]);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void check_matrix(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
			
 
				+{
			
 
				+	STARPU_ASSERT(starpu_matrix_get_elemsize(handle_s) == starpu_matrix_get_elemsize(handle_r));
			
 
				+	STARPU_ASSERT(starpu_matrix_get_nx(handle_s) == starpu_matrix_get_nx(handle_r));
			
 
				+	STARPU_ASSERT(starpu_matrix_get_ny(handle_s) == starpu_matrix_get_ny(handle_r));
			
 
				+	STARPU_ASSERT(starpu_matrix_get_local_ld(handle_s) == starpu_matrix_get_local_ld(handle_r));
			
 
				+
			
 
				+	char *matrix_s = (char *)starpu_matrix_get_local_ptr(handle_s);
			
 
				+	char *matrix_r = (char *)starpu_matrix_get_local_ptr(handle_r);
			
 
				+
			
 
				+	int nx = starpu_matrix_get_nx(handle_s);
			
 
				+	int ny = starpu_matrix_get_ny(handle_s);
			
 
				+	int ldy = starpu_matrix_get_local_ld(handle_s);
			
 
				+
			
 
				+	int x, y;
			
 
				+
			
 
				+	for(y=0 ; y<ny ; y++)
			
 
				+		for(x=0 ; x<nx ; x++)
			
 
				+		{
			
 
				+			int index=(y*ldy)+x;
			
 
				+			if (matrix_s[index] == matrix_r[index])
			
 
				+			{
			
 
				+				FPRINTF_MPI("Success with matrix[%d,%d --> %d] value: %c == %c\n", x, y, index, matrix_s[index], matrix_r[index]);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				*error = 1;
			
 
				+				FPRINTF_MPI("Error with matrix[%d,%d --> %d] value: %c != %c\n", x, y, index, matrix_s[index], matrix_r[index]);
			
 
				+			}
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
			
 
				+{
			
 
				+	STARPU_ASSERT(starpu_block_get_elemsize(handle_s) == starpu_block_get_elemsize(handle_r));
			
 
				+	STARPU_ASSERT(starpu_block_get_nx(handle_s) == starpu_block_get_nx(handle_r));
			
 
				+	STARPU_ASSERT(starpu_block_get_ny(handle_s) == starpu_block_get_ny(handle_r));
			
 
				+	STARPU_ASSERT(starpu_block_get_nz(handle_s) == starpu_block_get_nz(handle_r));
			
 
				+	STARPU_ASSERT(starpu_block_get_local_ldy(handle_s) == starpu_block_get_local_ldy(handle_r));
			
 
				+	STARPU_ASSERT(starpu_block_get_local_ldz(handle_s) == starpu_block_get_local_ldz(handle_r));
			
 
				+
			
 
				+	float *block_s = (float *)starpu_block_get_local_ptr(handle_s);
			
 
				+	float *block_r = (float *)starpu_block_get_local_ptr(handle_r);
			
 
				+
			
 
				+	int nx = starpu_block_get_nx(handle_s);
			
 
				+	int ny = starpu_block_get_ny(handle_s);
			
 
				+	int nz = starpu_block_get_nz(handle_s);
			
 
				+
			
 
				+	int ldy = starpu_block_get_local_ldy(handle_s);
			
 
				+	int ldz = starpu_block_get_local_ldz(handle_s);
			
 
				+
			
 
				+	int x, y, z;
			
 
				+
			
 
				+	for(z=0 ; z<nz ; z++)
			
 
				+		for(y=0 ; y<ny ; y++)
			
 
				+			for(x=0 ; x<nx ; x++)
			
 
				+			{
			
 
				+				int index=(z*ldz)+(y*ldy)+x;
			
 
				+				if (block_s[index] == block_r[index])
			
 
				+				{
			
 
				+					FPRINTF_MPI("Success with block[%d,%d,%d --> %d] value: %f == %f\n", x, y, z, index, block_s[index], block_r[index]);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					*error = 1;
			
 
				+					FPRINTF_MPI("Error with block[%d,%d,%d --> %d] value: %f != %f\n", x, y, z, index, block_s[index], block_r[index]);
			
 
				+				}
			
 
				+			}
			
 
				+}
			
 
				+
			
 
				+void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int tag_s, starpu_data_handle_t handle_r, int tag_r, int *error, check_func func)
			
 
				+{
			
 
				+	int ret;
			
 
				+	MPI_Status status;
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+		ret = starpu_mpi_send(handle_s, node, tag_s, MPI_COMM_WORLD);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
			
 
				+		ret = starpu_mpi_recv(handle_r, node, tag_r, MPI_COMM_WORLD, &status);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
			
 
				+
			
 
				+		func(handle_s, handle_r, error);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		ret = starpu_mpi_recv(handle_s, node, tag_s, MPI_COMM_WORLD, &status);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
			
 
				+		ret = starpu_mpi_send(handle_s, node, tag_r, MPI_COMM_WORLD);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	int ret, rank, size;
			
 
				+	int error=0;
			
 
				+
			
 
				+	int nx=3;
			
 
				+	int ny=2;
			
 
				+	int nz=4;
			
 
				+
			
 
				+	MPI_Init(NULL, NULL);
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	MPI_Comm_size(MPI_COMM_WORLD, &size);
			
 
				+
			
 
				+	if (size < 2)
			
 
				+	{
			
 
				+		if (rank == 0)
			
 
				+			FPRINTF(stderr, "We need at least 2 processes.\n");
			
 
				+
			
 
				+		MPI_Finalize();
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+	ret = starpu_mpi_init(NULL, NULL, 0);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+		MPI_Status status;
			
 
				+
			
 
				+		{
			
 
				+			float v = 42.12;
			
 
				+			starpu_data_handle_t variable_handle[2];
			
 
				+			starpu_variable_data_register(&variable_handle[0], 0, (uintptr_t)&v, sizeof(v));
			
 
				+			starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
			
 
				+
			
 
				+			send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, &error, check_variable);
			
 
				+
			
 
				+			starpu_data_unregister(variable_handle[0]);
			
 
				+			starpu_data_unregister(variable_handle[1]);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			int vector[4] = {1, 2, 3, 4};
			
 
				+			starpu_data_handle_t vector_handle[2];
			
 
				+
			
 
				+			starpu_vector_data_register(&vector_handle[0], 0, (uintptr_t)vector, 4, sizeof(vector[0]));
			
 
				+			starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
			
 
				+
			
 
				+			send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, &error, check_vector);
			
 
				+
			
 
				+			starpu_data_unregister(vector_handle[0]);
			
 
				+			starpu_data_unregister(vector_handle[1]);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			char *matrix, n='a';
			
 
				+			int x, y;
			
 
				+			starpu_data_handle_t matrix_handle[2];
			
 
				+
			
 
				+			matrix = (char*)malloc(nx*ny*nz*sizeof(char));
			
 
				+			assert(matrix);
			
 
				+			for(y=0 ; y<ny ; y++)
			
 
				+			{
			
 
				+				for(x=0 ; x<nx ; x++)
			
 
				+				{
			
 
				+					matrix[(y*nx)+x] = n++;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			starpu_matrix_data_register(&matrix_handle[0], 0, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
			
 
				+			starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
			
 
				+
			
 
				+			send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, &error, check_matrix);
			
 
				+
			
 
				+			starpu_data_unregister(matrix_handle[0]);
			
 
				+			starpu_data_unregister(matrix_handle[1]);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			float *block, n=1.0;
			
 
				+			int x, y, z;
			
 
				+			starpu_data_handle_t block_handle[2];
			
 
				+
			
 
				+			block = (float*)malloc(nx*ny*nz*sizeof(float));
			
 
				+			assert(block);
			
 
				+			for(z=0 ; z<nz ; z++)
			
 
				+			{
			
 
				+				for(y=0 ; y<ny ; y++)
			
 
				+				{
			
 
				+					for(x=0 ; x<nx ; x++)
			
 
				+					{
			
 
				+						block[(z*nx*ny)+(y*nx)+x] = n++;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			starpu_block_data_register(&block_handle[0], 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				+			starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				+
			
 
				+			send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, &error, check_block);
			
 
				+
			
 
				+			starpu_data_unregister(block_handle[0]);
			
 
				+			starpu_data_unregister(block_handle[1]);
			
 
				+		}
			
 
				+	}
			
 
				+	else if (rank == 1)
			
 
				+	{
			
 
				+		MPI_Status status;
			
 
				+
			
 
				+		{
			
 
				+			starpu_data_handle_t variable_handle;
			
 
				+			starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float));
			
 
				+			send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL);
			
 
				+			starpu_data_unregister(variable_handle);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			starpu_data_handle_t vector_handle;
			
 
				+			starpu_vector_data_register(&vector_handle, -1, (uintptr_t)NULL, 4, sizeof(int));
			
 
				+			send_recv_and_check(rank, 0, vector_handle, 0x43, NULL, 0x2337, NULL, NULL);
			
 
				+			starpu_data_unregister(vector_handle);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			starpu_data_handle_t matrix_handle;
			
 
				+			starpu_matrix_data_register(&matrix_handle, -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
			
 
				+			send_recv_and_check(rank, 0, matrix_handle, 0x75, NULL, 0x8555, NULL, NULL);
			
 
				+			starpu_data_unregister(matrix_handle);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			starpu_data_handle_t block_handle;
			
 
				+			starpu_block_data_register(&block_handle, -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
			
 
				+			send_recv_and_check(rank, 0, block_handle, 0x73, NULL, 0x8337, NULL, NULL);
			
 
				+			starpu_data_unregister(block_handle);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	starpu_mpi_shutdown();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	MPI_Finalize();
			
 
				+
			
 
				+	return rank == 0 ? error : 0;
			
 
				+}
			
--- a/mpi/tests/helper.h
+++ b/mpi/tests/helper.h