瀏覽代碼

merge trunk up to 10467

Simon Archipoff 12 年之前
父節點
當前提交
922885d4da
共有 100 個文件被更改,包括 1157 次插入275 次删除
  1. 1 0
      AUTHORS
  2. 16 0
      ChangeLog
  3. 1 0
      Makefile.am
  4. 54 38
      configure.ac
  5. 6 1
      doc/doxygen/Makefile.am
  6. 15 3
      doc/doxygen/chapters/advanced_examples.doxy
  7. 4 0
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  8. 4 4
      doc/doxygen/chapters/api/data_interfaces.doxy
  9. 48 0
      doc/doxygen/chapters/api/data_out_of_core.doxy
  10. 2 2
      doc/doxygen/chapters/api/data_partition.doxy
  11. 16 0
      doc/doxygen/chapters/api/mpi.doxy
  12. 2 2
      doc/doxygen/chapters/basic_examples.doxy
  13. 179 0
      doc/doxygen/chapters/code/disk_compute.c
  14. 122 0
      doc/doxygen/chapters/code/disk_copy.c
  15. 1 1
      doc/doxygen/chapters/code/multiformat.c
  16. 2 2
      doc/doxygen/chapters/code/vector_scal_c.c
  17. 7 7
      doc/doxygen/chapters/configure_options.doxy
  18. 5 5
      doc/doxygen/chapters/environment_variables.doxy
  19. 1 0
      doc/doxygen/chapters/files.doxy
  20. 2 1
      doc/doxygen/chapters/introduction.doxy
  21. 3 3
      doc/doxygen/chapters/mpi_support.doxy
  22. 56 0
      doc/doxygen/chapters/out_of_core.doxy
  23. 4 0
      doc/doxygen/chapters/performance_feedback.doxy
  24. 1 0
      doc/doxygen/doxygen-config.cfg.in
  25. 9 0
      doc/doxygen/refman.tex
  26. 3 3
      doc/texinfo/chapters/advanced-examples.texi
  27. 6 6
      doc/texinfo/chapters/api.texi
  28. 2 2
      doc/texinfo/chapters/basic-examples.texi
  29. 3 3
      doc/texinfo/chapters/configuration.texi
  30. 3 3
      doc/texinfo/chapters/mpi-support.texi
  31. 3 3
      doc/texinfo/chapters/vector_scal_c.texi
  32. 2 2
      doc/tutorial/vector_scal.c
  33. 2 2
      examples/audio/starpu_audio_processing.c
  34. 4 4
      examples/axpy/axpy.c
  35. 1 1
      examples/basic_examples/block.c
  36. 1 1
      examples/basic_examples/dynamic_handles.c
  37. 6 6
      examples/basic_examples/mult.c
  38. 1 1
      examples/basic_examples/multiformat.c
  39. 1 1
      examples/basic_examples/variable.c
  40. 2 2
      examples/basic_examples/vector_scal.c
  41. 3 3
      examples/basic_examples/vector_scal_c.c
  42. 1 1
      examples/binary/binary.c
  43. 1 1
      examples/callback/callback.c
  44. 14 14
      examples/cg/cg.c
  45. 3 3
      examples/cholesky/cholesky_grain_tag.c
  46. 2 2
      examples/cholesky/cholesky_implicit.c
  47. 3 3
      examples/cholesky/cholesky_tag.c
  48. 1 1
      examples/cholesky/cholesky_tile_tag.c
  49. 1 1
      examples/cpp/incrementer_cpp.cpp
  50. 3 3
      examples/filters/custom_mf/custom_interface.c
  51. 2 2
      examples/filters/custom_mf/custom_mf_filter.c
  52. 2 2
      examples/filters/fblock.c
  53. 2 2
      examples/filters/fmatrix.c
  54. 2 2
      examples/filters/fvector.c
  55. 4 4
      examples/filters/shadow.c
  56. 4 4
      examples/filters/shadow2d.c
  57. 4 4
      examples/filters/shadow3d.c
  58. 2 2
      examples/heat/dw_factolu.c
  59. 3 3
      examples/heat/dw_factolu_grain.c
  60. 3 3
      examples/heat/dw_factolu_tag.c
  61. 6 6
      examples/heat/dw_sparse_cg.c
  62. 1 1
      examples/incrementer/incrementer.c
  63. 2 2
      examples/interface/complex.c
  64. 3 3
      examples/interface/complex_interface.c
  65. 2 2
      examples/lu/xlu.c
  66. 2 2
      examples/lu/xlu_implicit.c
  67. 3 3
      examples/lu/xlu_implicit_pivot.c
  68. 3 3
      examples/lu/xlu_pivot.c
  69. 1 1
      examples/mandelbrot/mandelbrot.c
  70. 3 3
      examples/matvecmult/matvecmult.c
  71. 6 6
      examples/mult/xgemm.c
  72. 1 1
      examples/openmp/vector_scal_omp.c
  73. 3 3
      examples/pi/pi.c
  74. 1 1
      examples/pi/pi_redux.c
  75. 6 6
      examples/ppm_downscaler/yuv_downscaler.c
  76. 3 3
      examples/reductions/dot_product.c
  77. 2 2
      examples/reductions/minmax_reduction.c
  78. 1 1
      examples/spmd/vector_scal_spmd.c
  79. 7 7
      examples/spmv/dw_block_spmv.c
  80. 5 5
      examples/spmv/spmv.c
  81. 1 1
      examples/stencil/stencil-blocks.c
  82. 1 1
      gcc-plugin/tests/output-pointer.c
  83. 2 2
      gcc-plugin/tests/pointers.c
  84. 1 0
      include/starpu.h
  85. 4 0
      include/starpu_data.h
  86. 11 11
      include/starpu_data_interfaces.h
  87. 50 0
      include/starpu_disk.h
  88. 2 2
      include/starpu_util.h
  89. 1 1
      mpi/examples/complex/mpi_complex.c
  90. 1 1
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
  91. 7 7
      mpi/examples/mpi_lu/plu_example.c
  92. 1 1
      mpi/examples/stencil/stencil5.c
  93. 2 0
      mpi/include/starpu_mpi.h
  94. 31 14
      mpi/src/starpu_mpi.c
  95. 2 0
      mpi/src/starpu_mpi_private.h
  96. 4 0
      mpi/tests/Makefile.am
  97. 2 2
      mpi/tests/block_interface.c
  98. 2 2
      mpi/tests/block_interface_pinned.c
  99. 310 0
      mpi/tests/datatypes.c
  100. 0 0
      mpi/tests/helper.h

+ 1 - 0
AUTHORS

@@ -15,6 +15,7 @@ Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
 Cyril Roelandt <cyril.roelandt@inria.fr>
 Anthony Roy <theanthony33@gmail.com>
+Corentin Salingue <corentin.salingue@gmail.com>
 Ludovic Stordeur <ludovic.stordeur@inria.fr>
 François Tessier <francois.tessier@inria.fr>
 Samuel Thibault <samuel.thibault@labri.fr>

+ 16 - 0
ChangeLog

@@ -32,6 +32,14 @@ New features:
 	  the envelope.
   * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
     let starpu commute write accesses.
+  * Out-of-core support, through registration of disk areas as additional memory
+    nodes.
+  * StarPU-MPI: new function
+    starpu_mpi_irecv_detached_sequential_consistency which allows to
+    enable or disable the sequential consistency for the given data
+    handle (sequential consistency will be enabled or disabled based
+    on the value of the function parameter and the value of the
+    sequential consistency defined for the given data)
 
 Small features:
   * Add cl_arg_free field to enable automatic free(cl_arg) on task
@@ -39,7 +47,15 @@ Small features:
   * New functions starpu_data_acquire_cb_sequential_consistency() and
     starpu_data_acquire_on_node_cb_sequential_consistency() which allows
     to enable or disable sequential consistency
+  * New configure option --enable-fxt-lock which enables additional
+    trace events focused on locks behaviour during the execution
 
+Changes:
+  * Fix of the livelock issue discovered while executing applications
+    on a CPU+GPU cluster of machines by adding a maximum trylock 
+    threshold before a blocking lock.
+  * Data interfaces (variable, vector, matrix and block) now define
+    pack und unpack functions
 
 StarPU 1.1.0 (svn revision xxxx)
 ==============================================

+ 1 - 0
Makefile.am

@@ -79,6 +79,7 @@ versinclude_HEADERS = 				\
 	include/starpu_deprecated_api.h         \
 	include/starpu_hash.h			\
 	include/starpu_rand.h			\
+	include/starpu_disk.h			\
 	include/starpu_cublas.h			\
 	include/starpu_driver.h			\
 	include/starpu_stdlib.h			\

+ 54 - 38
configure.ac

@@ -134,8 +134,13 @@ case "$target" in
   libext=a
   AC_DEFINE(STARPU_HAVE_WINDOWS, [], [Define this on windows.])
   ;;
+*-*-linux*)
+  starpu_linux=yes
+  AC_DEFINE(STARPU_LINUX_SYS, 1, [Define to 1 on Linux])
+  ;;
 esac
 AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"])
+AM_CONDITIONAL([STARPU_LINUX_SYS], [test "x$starpu_linux" = "xyes"])
 
 # on Darwin, GCC targets i386 by default, so we don't have atomic ops
 AC_CHECK_SIZEOF([void *])
@@ -959,13 +964,13 @@ AC_MSG_RESULT($nmaxmicdev)
 AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
 	[maximum number of MIC devices])
 
-AC_MSG_CHECKING(maximum number of MIC cores)
-AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmiccore=<number>],
-			[maximum number of MIC cores])],
-			nmaxmiccore=$enableval, nmaxmiccore=128)
-AC_MSG_RESULT($nmaxmiccore)
+AC_MSG_CHECKING(maximum number of MIC threads)
+AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
+			[maximum number of MIC threads])],
+			nmaxmicthreads=$enableval, nmaxmicthreads=128)
+AC_MSG_RESULT($nmaxmicthread)
 
-AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmiccore],
+AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
 	[maximum number of MIC cores])
 
 AC_ARG_WITH(coi-dir,
@@ -1379,38 +1384,48 @@ AC_MSG_RESULT($nmaxbuffers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 		[how many buffers can be manipulated per task])
 
-# TODO: add option to choose maxnodes
-if test x$enable_simgrid = xyes ; then
-	# We still need the room for the virtual CUDA/OpenCL devices
-	maxnodes=16
-else
-	# We have one memory node shared by all CPU workers, one node per GPU
-	# and per MIC device
-	nodes=1
-	if test x$enable_cuda = xyes ; then
-		# we could have used nmaxcudadev + 1, but this would certainly give an
-		# odd number.
-		nodes=`expr $nodes + $nmaxcudadev`
-	fi
-	if test x$enable_opencl = xyes ; then
-		# we could have used nmaxcudadev + 1, but this would certainly give an
-		# odd number.
-		nodes=`expr $nodes + $nmaxopencldev`
-	fi
-	if test x$enable_mic = xyes ; then
-		nodes=`expr $nodes + $nmaxmicdev`
-	fi
-	if test x$enable_rcce = xyes ; then
-		# Only 1 memory node for the shared memory.
-		nodes=`expr $nodes + 1`
-	fi
+AC_MSG_CHECKING(maximum number of nodes to use)
+AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=<nnodes>],
+			[maximum number of nodes])],
+			maxnodes=$enableval, maxnodes=0)
 
-	# set maxnodes to the next power of 2 greater than nodes
-	maxnodes=1
-	while test "$maxnodes" -lt "$nodes"
-	do
-		maxnodes=`expr $maxnodes \* 2`
-	done
+if test x$maxnodes = x0 ; then
+	if test x$enable_simgrid = xyes ; then
+		# We still need the room for the virtual CUDA/OpenCL devices
+		maxnodes=16
+	else
+		# We have one memory node shared by all CPU workers, one node per GPU
+		# and per MIC device
+		# we add nodes to use 4 memory disks
+		nodes=5
+		if test x$enable_cuda = xyes ; then
+			# we could have used nmaxcudadev + 1, but this would certainly give an
+			# odd number.
+			nodes=`expr $nodes + $nmaxcudadev`
+		fi
+		if test x$enable_opencl = xyes ; then
+			# we could have used nmaxcudadev + 1, but this would certainly give an
+			# odd number.
+			nodes=`expr $nodes + $nmaxopencldev`
+		fi
+		if test x$enable_mic = xyes ; then
+			nodes=`expr $nodes + $nmaxmicdev`
+		fi
+		if test x$enable_rcce = xyes ; then
+			# Only 1 memory node for the shared memory.
+			nodes=`expr $nodes + 1`
+		fi
+
+		# set maxnodes to the next power of 2 greater than nodes
+		maxnodes=1
+		while test "$maxnodes" -lt "$nodes"
+		do
+			maxnodes=`expr $maxnodes \* 2`
+		done
+ 	fi
+fi
+if test $maxnodes -gt 32 ; then
+	AC_MSG_ERROR([selected number of nodes ($maxnodes) can not be greater than 32])
 fi
 
 AC_MSG_CHECKING(maximum number of memory nodes)
@@ -1456,7 +1471,7 @@ AC_CHECK_FUNCS([clock_gettime])
 
 # Compute the maximum number of workers (we round it to 16 for alignment
 # purposes).
-nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmiccore + $nmaxsccdev + 15 \) / 16 \) `
+nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmicthreads + $nmaxsccdev + 15 \) / 16 \) `
 AC_MSG_CHECKING(Maximum number of workers)
 AC_MSG_RESULT($nmaxworkers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
@@ -2273,6 +2288,7 @@ AC_MSG_NOTICE([
 	Maximum number of CPUs:           $maxcpus
 	Maximum number of CUDA devices:   $nmaxcudadev
 	Maximum number of OpenCL devices: $nmaxopencldev
+	Maximum number of MIC threads:    $nmaxmicthreads
 	Maximum number of memory nodes:   $maxnodes
 	Maximum number of task buffers:   $nmaxbuffers
 

+ 6 - 1
doc/doxygen/Makefile.am

@@ -36,6 +36,7 @@ chapters =	\
 	chapters/scheduling_context_hypervisor.doxy \
 	chapters/scheduling_contexts.doxy \
 	chapters/modularized_scheduler.doxy \
+	chapters/out_of_core.doxy \
 	chapters/socl_opencl_extensions.doxy \
 	chapters/tips_and_tricks.doxy \
 	chapters/environment_variables.doxy \
@@ -59,11 +60,14 @@ chapters =	\
 	chapters/code/vector_scal_cuda.cu \
 	chapters/code/vector_scal_opencl.c \
 	chapters/code/vector_scal_opencl_codelet.cl \
+	chapters/code/disk_copy.c \
+	chapters/code/disk_compute.c \
 	chapters/api/codelet_and_tasks.doxy \
 	chapters/api/cuda_extensions.doxy \
 	chapters/api/data_interfaces.doxy \
 	chapters/api/data_management.doxy \
 	chapters/api/data_partition.doxy \
+	chapters/api/data_out_of_core.doxy \
 	chapters/api/expert_mode.doxy \
 	chapters/api/explicit_dependencies.doxy \
 	chapters/api/fft_support.doxy \
@@ -106,7 +110,7 @@ chapters/version.sty: $(chapters)
 	@if test -s timestamp_updated ; then \
 		echo "\newcommand{\STARPUUPDATED}{"`cat timestamp_updated`"}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
 	else \
-		echo "\newcommand{\STARPUUPDATED}{unknown_date}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
+		echo "\newcommand{\STARPUUPDATED}{unknown date}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
 	fi
 	@echo "\newcommand{\STARPUVERSION}{$(VERSION)}" >> $(top_srcdir)/doc/doxygen/chapters/version.sty
 	@-for f in timestamp timestamp_updated timestamp_updated_month ; do \
@@ -146,6 +150,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/include/starpu.h			\
 	$(top_srcdir)/include/starpu_data_filters.h	\
 	$(top_srcdir)/include/starpu_data_interfaces.h	\
+	$(top_srcdir)/include/starpu_disk.h		\
 	$(top_srcdir)/include/starpu_worker.h		\
 	$(top_srcdir)/include/starpu_task.h		\
 	$(top_srcdir)/include/starpu_task_bundle.h	\

+ 15 - 3
doc/doxygen/chapters/advanced_examples.doxy

@@ -201,7 +201,7 @@ int vector[NX];
 starpu_data_handle_t handle;
 
 /* Declare data to StarPU */
-starpu_vector_data_register(&handle, 0, (uintptr_t)vector,
+starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                             NX, sizeof(vector[0]));
 
 /* Partition the vector in PARTS sub-vectors */
@@ -394,6 +394,19 @@ there is some hidden parameter such as the number of iterations, etc.
 The example in the directory <c>examples/pi</c> uses this to include
 the number of iterations in the base.
 
+StarPU will automatically determine when the performance model is calibrated,
+or rather, it will assume the performance model is calibrated until the
+application submits a task for which the performance can not be predicted. For
+::STARPU_HISTORY_BASED, StarPU will require 10 (::_STARPU_CALIBRATION_MINIMUM)
+measurements for a given size before estimating that an average can be taken as
+estimation for further executions with the same size. For
+::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED, StarPU will require
+10 (::_STARPU_CALIBRATION_MINIMUM) measurements, and that the minimum measured
+data size is smaller than 90% of the maximum measured data size (i.e. the
+measurement interval is large enough for a regression to have a meaning).
+Calibration can also be forced by setting the \ref STARPU_CALIBRATE environment
+variable to <c>1</c>, or even reset by setting it to <c>2</c>.
+
 How to use schedulers which can benefit from such performance model is explained
 in \ref TaskSchedulingPolicy.
 
@@ -1106,8 +1119,7 @@ Complex data interfaces can then be registered to StarPU.
 
 \code{.c}
 double real = 45.0;
-double imaginary = 12.0;
-starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
+double imaginary = 12.0;starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
 \endcode
 

+ 4 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -82,6 +82,10 @@ specify the codelet may be executed on a MIC processing unit.
 This macro is used when setting the field starpu_codelet::where to
 specify the codelet may be executed on an SCC processing unit.
 
+\def STARPU_MAIN_RAM
+\ingroup API_Codelet_And_Tasks
+This macro is used when the RAM memory node is specified.
+
 \def STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
 \deprecated
 \ingroup API_Codelet_And_Tasks

+ 4 - 4
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -212,7 +212,7 @@ Here an example of how to use the function.
 \code{.c}
 float var;
 starpu_data_handle_t var_handle;
-starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
+starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 \endcode
 
 \fn void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize)
@@ -223,7 +223,7 @@ Here an example of how to use the function.
 \code{.c}
 float vector[NX];
 starpu_data_handle_t vector_handle;
-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 \endcode
 
 \fn void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize)
@@ -238,7 +238,7 @@ Here an example of how to use the function.
 float *matrix;
 starpu_data_handle_t matrix_handle;
 matrix = (float*)malloc(width * height * sizeof(float));
-starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
+starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
 \endcode
 
 \fn void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize)
@@ -252,7 +252,7 @@ Here an example of how to use the function.
 float *block;
 starpu_data_handle_t block_handle;
 block = (float*)malloc(nx*ny*nz*sizeof(float));
-starpu_block_data_register(&block_handle, 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
+starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
 \endcode
 
 \fn void starpu_bcsr_data_register(starpu_data_handle_t *handle, unsigned home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize)

+ 48 - 0
doc/doxygen/chapters/api/data_out_of_core.doxy

@@ -0,0 +1,48 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2013 Corentin Salingue
+ * See the file version.doxy for copying conditions.
+ */
+
+
+/*! \defgroup API_Out_Of_Core Out Of Core
+
+
+
+\struct starpu_disk_ops
+\ingroup API_Out_Of_Core
+This is a set of functions to manipulate datas on disk.
+
+\fn int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, size_t size) 
+\ingroup API_Out_Of_Core
+Register a disk memory node with a set of functions to manipulate datas. <br />
+SUCCESS: return the disk node. <br />
+FAIL: return an error code. <br />
+The \p size must be at least 1 MB !
+
+\fn void * starpu_disk_open (unsigned node, void *pos, size_t size)
+\ingroup API_Out_Of_Core
+Add an existing file memory in a disk node. The \p pos is defined in the starpu_disk_ops. \p size: this is a size of your file.
+\p pos is the name of the file.
+
+\fn void starpu_disk_close (unsigned node, void *obj, size_t size)
+\ingroup API_Out_Of_Core
+Close an existing file memory opened with starpu_disk_open.
+
+\var starpu_disk_stdio_ops
+\ingroup API_Out_Of_Core
+This set uses the stdio library (fwrite, fread...) to read/write on disk. <br />
+<strong>Warning: It creates one file per allocation !</strong>  <br />
+
+\var starpu_disk_unistd_ops
+\ingroup API_Out_Of_Core
+This set uses the unistd library (write, read...) to read/write on disk. <br />
+<strong>Warning: It creates one file per allocation !</strong>  <br />
+
+\var starpu_disk_unistd_o_direct_ops
+\ingroup API_Out_Of_Core
+This set uses the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. <br />
+<strong>Warning: It creates one file per allocation !</strong>  <br />
+Only available on Linux.
+
+*/

+ 2 - 2
doc/doxygen/chapters/api/data_partition.doxy

@@ -54,12 +54,12 @@ starpu_data_partition(A_handle, &f);
 \ingroup API_Data_Partition
 This unapplies one filter, thus unpartitioning the data. The
 pieces of data are collected back into one big piece in the
-\p gathering_node (usually 0). Tasks working on the partitioned data must
+\p gathering_node (usually STARPU_MAIN_RAM). Tasks working on the partitioned data must
 be already finished when calling starpu_data_unpartition().
 
 Here an example of how to use the function.
 \code{.c}
-starpu_data_unpartition(A_handle, 0);
+starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
 \endcode
 
 \fn int starpu_data_get_nb_children(starpu_data_handle_t handle)

+ 16 - 0
doc/doxygen/chapters/api/mpi.doxy

@@ -98,6 +98,22 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
+\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+\ingroup API_MPI_Support
+Posts a nonblocking receive in \p data_handle from the node \p source
+using the message tag \p mpi_tag within the communicator \p comm. On
+completion, the \p callback function is called with the argument \p
+arg.
+The parameter \p sequential_consistency allows to enable or disable
+the sequential consistency for \p data handle (sequential consistency
+will be enabled or disabled based on the value of the parameter \p
+sequential_consistency and the value of the sequential consistency
+defined for \p data_handle).
+Similarly to the pthread detached functionality, when a detached
+communication completes, its resources are automatically released back
+to the system, there is no need to test or to wait for the completion
+of the request.
+
 \fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
 \ingroup API_MPI_Support
 Returns when the operation identified by request \p req is complete.

+ 2 - 2
doc/doxygen/chapters/basic_examples.doxy

@@ -529,14 +529,14 @@ The following lines show how to declare an array of <c>NX</c> elements of type
 float vector[NX];
 
 starpu_data_handle_t vector_handle;
-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
                             sizeof(vector[0]));
 \endcode
 
 The first argument, called the <b>data handle</b>, is an opaque pointer which
 designates the array in StarPU. This is also the structure which is used to
 describe which data is used by a task. The second argument is the node number
-where the data originally resides. Here it is 0 since the array <c>vector</c> is in
+where the data originally resides. Here it is STARPU_MAIN_RAM since the array <c>vector</c> is in
 the main memory. Then comes the pointer <c>vector</c> where the data can be found in main memory,
 the number of elements in the vector and the size of each element.
 The following shows how to construct a StarPU task that will manipulate the

+ 179 - 0
doc/doxygen/chapters/code/disk_compute.c

@@ -0,0 +1,179 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013 Corentin Salingue
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+//! [To be included]
+/* Try to write into disk memory
+ * Use mechanism to push datas from main ram to disk ram
+ */
+
+#include <starpu.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <math.h>
+
+#define NX (100)
+
+int main(int argc, char **argv)
+{
+	/* Initialize StarPU with default configuration */
+	int ret = starpu_init(NULL);
+
+	if (ret == -ENODEV) goto enodev;
+
+	/* Initialize path and name */
+	char pid_str[16];
+	int pid = getpid();
+	snprintf(pid_str, 16, "%d", pid);
+
+	char * base = "/tmp/";
+
+	char * name_file_start = malloc(128*sizeof(char));
+	strcpy(name_file_start, "STARPU_DISK_COMPUTE_DATA_");
+	strcat(name_file_start, pid_str);
+
+	char * name_file_end = malloc(128*sizeof(char));
+	strcpy(name_file_end, "STARPU_DISK_COMPUTE_DATA_RESULT_");
+	strcat(name_file_end, pid_str);
+
+	char * path_file_start = malloc(128*sizeof(char));
+	strcpy(path_file_start, base);
+	strcat(path_file_start, name_file_start);
+
+	char * path_file_end = malloc(128*sizeof(char));
+	strcpy(path_file_end, base);
+	strcat(path_file_end, name_file_end);
+
+
+	/* register a disk */
+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) base, 1024*1024*1);
+	/* can't write on /tmp/ */
+	if (new_dd == -ENOENT) goto enoent;
+	
+	unsigned dd = (unsigned) new_dd;
+
+	printf("TEST DISK MEMORY \n");
+
+	/* Imagine, you want to compute datas */
+	int *A;
+	int *C;
+
+	starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT);
+	starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT);
+ 
+	unsigned int j;
+	/* you register them in a vector */
+	for(j = 0; j < NX; ++j)
+	{
+		A[j] = j;
+		C[j] = 0;
+	}
+
+
+
+
+	/* you create a file to store the vector ON the disk */
+	FILE * f = fopen(path_file_start, "wb+");
+	if (f == NULL)
+		goto enoent;
+
+	/* store it in the file */
+	fwrite(A, sizeof(int), NX, f);
+
+	/* close the file */
+	fclose(f);
+
+
+	/* create a file to store result */
+	f = fopen(path_file_end, "wb+");
+	if (f == NULL)
+		goto enoent;
+
+	/* replace all datas by 0 */
+	fwrite(C, sizeof(int), NX, f);
+
+	/* close the file */
+	fclose(f);
+
+	/* And now, you want to use your datas in StarPU */
+	/* Open the file ON the disk */
+	void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int));
+	void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int));
+
+
+	starpu_data_handle_t vector_handleA, vector_handleC;
+
+	/* register vector in starpu */
+	starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int));
+
+	/* and do what you want with it, here we copy it into an other vector */ 
+	starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int));	
+
+	starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL);
+
+	/* free them */
+	starpu_data_unregister(vector_handleA);
+	starpu_data_unregister(vector_handleC);
+
+	/* close them in StarPU */
+	starpu_disk_close(dd, data, NX*sizeof(int));
+	starpu_disk_close(dd, data_result, NX*sizeof(int));
+
+	/* check results */	
+	f = fopen(path_file_end, "rb+");
+	if (f == NULL)
+		goto enoent;
+	/* take datas */
+	int size = fread(C, sizeof(int), NX, f);
+
+	/* close the file */
+	fclose(f);
+
+	int try = 1;
+	for (j = 0; j < NX; ++j)
+		if (A[j] != C[j])
+		{
+			printf("Fail A %d != C %d \n", A[j], C[j]);
+			try = 0;
+		}
+
+	starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
+	starpu_free_flags(C, NX*sizeof(double), STARPU_MALLOC_COUNT);
+
+	unlink(path_file_start);
+	unlink(path_file_end);
+
+	free(name_file_start);
+	free(name_file_end);
+	free(path_file_start);
+	free(path_file_end);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	if(try)
+		printf("TEST SUCCESS\n");
+	else
+		printf("TEST FAIL\n");
+	return (try ? EXIT_SUCCESS : EXIT_FAILURE);
+
+enodev:
+	return 77;
+enoent:
+	return 77;
+}
+//! [To be included]
+

+ 122 - 0
doc/doxygen/chapters/code/disk_copy.c

@@ -0,0 +1,122 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013 Corentin Salingue
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+//! [To be included]
+
+/* Try to write into disk memory
+ * Use mechanism to push datas from main ram to disk ram
+ */
+
+#include <starpu.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+/* size of one vector */
+#define	NX	(30*1000000/sizeof(double))
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+
+int main(int argc, char **argv)
+{
+	double * A,*B,*C,*D,*E,*F;
+
+	/* limit main ram to force to push in disk */
+	putenv("STARPU_LIMIT_CPU_MEM=160");
+
+	/* Initialize StarPU with default configuration */
+	int ret = starpu_init(NULL);
+
+	if (ret == -ENODEV) goto enodev;
+
+	/* register a disk */
+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp/", 1024*1024*200);
+	/* can't write on /tmp/ */
+	if (new_dd == -ENOENT) goto enoent;
+	
+	unsigned dd = (unsigned) new_dd;
+
+	/* allocate two memory spaces */
+	starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT);
+	starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT);
+
+	FPRINTF(stderr, "TEST DISK MEMORY \n");
+
+	unsigned int j;
+	/* initialization with bad values */
+	for(j = 0; j < NX; ++j)
+	{
+		A[j] = j;
+		F[j] = -j;
+	}
+
+	starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF;
+
+	/* register vector in starpu */
+	starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double));
+	starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double));	
+	starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double));
+	starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double));
+	starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double));
+	starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double));
+
+	/* copy vector A->B, B->C... */
+	starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL);
+	starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL);
+	starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL);
+	starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL);
+	starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL);
+
+	/* StarPU does not need to manipulate the array anymore so we can stop
+ 	 * monitoring it */
+
+	/* free them */
+	starpu_data_unregister(vector_handleA);
+	starpu_data_unregister(vector_handleB);
+	starpu_data_unregister(vector_handleC);
+	starpu_data_unregister(vector_handleD);
+	starpu_data_unregister(vector_handleE);
+	starpu_data_unregister(vector_handleF);
+
+	/* check if computation is correct */
+	int try = 1;
+	for (j = 0; j < NX; ++j)
+		if (A[j] != F[j])
+		{
+			printf("Fail A %f != F %f \n", A[j], F[j]);
+			try = 0;
+		}
+
+	/* free last vectors */
+	starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
+	starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	if(try)
+		FPRINTF(stderr, "TEST SUCCESS\n");
+	else
+		FPRINTF(stderr, "TEST FAIL\n");
+	return (try ? EXIT_SUCCESS : EXIT_FAILURE);
+
+enodev:
+	return 77;
+enoent:
+	return 77;
+}
+
+//! [To be included]

+ 1 - 1
doc/doxygen/chapters/code/multiformat.c

@@ -57,5 +57,5 @@ struct starpu_multiformat_data_interface_ops format_ops = {
     ...
 };
 
-starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
+starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops);
 //! [To be included]

+ 2 - 2
doc/doxygen/chapters/code/vector_scal_c.c

@@ -79,14 +79,14 @@ int main(int argc, char **argv)
      *  - the first argument of the registration method is a pointer to the
      *    handle that should describe the data
      *  - the second argument is the memory node where the data (ie. "vector")
-     *    resides initially: 0 stands for an address in main memory, as
+     *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
      *    opposed to an adress on a GPU for instance.
      *  - the third argument is the adress of the vector in RAM
      *  - the fourth argument is the number of elements in the vector
      *  - the fifth argument is the size of each element.
      */
     starpu_data_handle_t vector_handle;
-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                                 NX, sizeof(vector[0]));
 
     float factor = 3.14;

+ 7 - 7
doc/doxygen/chapters/configure_options.doxy

@@ -22,13 +22,6 @@ the following configure options.
 Enable debugging messages.
 </dd>
 
-<dt>--enable-debug</dt>
-<dd>
-\anchor enable-debug
-\addindex __configure__--enable-debug
-Enable debugging messages.
-</dd>
-
 <dt>--enable-fast</dt>
 <dd>
 \anchor enable-fast
@@ -363,6 +356,13 @@ Enable performance debugging through gprof.
 Enable performance model debugging.
 </dd>
 
+<dt>--enable-fxt-lock</dt>
+<dd>
+\anchor enable-fxt-lock
+\addindex __configure__--enable-fxt-lock
+Enable additional trace events which describes locks behaviour.
+</dd>
+
 <dt>--enable-stats</dt>
 <dd>
 \anchor enable-stats

+ 5 - 5
doc/doxygen/chapters/environment_variables.doxy

@@ -217,12 +217,12 @@ it is therefore necessary to disable asynchronous data transfers.
 Disable asynchronous copies between CPU and MIC devices.
 </dd>
 
-<dt>STARPU_DISABLE_CUDA_GPU_GPU_DIRECT</dt>
+<dt>STARPU_ENABLE_CUDA_GPU_GPU_DIRECT</dt>
 <dd>
-\anchor STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
-\addindex __env__STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
-Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
-instead. This permits to test the performance effect of GPU-Direct.
+\anchor STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
+\addindex __env__STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
+Enable direct CUDA transfers from GPU to GPU, without copying through RAM.
+This permits to test the performance effect of GPU-Direct.
 </dd>
 
 </dl>

+ 1 - 0
doc/doxygen/chapters/files.doxy

@@ -12,6 +12,7 @@
 \file starpu.h
 \file starpu_data_filters.h
 \file starpu_data_interfaces.h
+\file starpu_disk.h
 \file starpu_worker.h
 \file starpu_task.h
 \file starpu_task_bundle.h

+ 2 - 1
doc/doxygen/chapters/introduction.doxy

@@ -145,7 +145,7 @@ simply replace calling the function with submitting a task.
 A \b codelet records pointers to various implementations of the same
 theoretical function.
 
-A <b>memory node</b> can be either the main RAM or GPU-embedded memory.
+A <b>memory node</b> can be either the main RAM, GPU-embedded memory or a disk memory.
 
 A \b bus is a link between memory nodes.
 
@@ -213,6 +213,7 @@ The documentation chapters include
 <li> \ref HowToOptimizePerformanceWithStarPU
 <li> \ref PerformanceFeedback
 <li> \ref TipsAndTricksToKnowAbout
+<li> \ref OutOfCore
 <li> \ref MPISupport
 <li> \ref FFTSupport
 <li> \ref MICSCCSupport

+ 3 - 3
doc/doxygen/chapters/mpi_support.doxy

@@ -52,7 +52,7 @@ int main(int argc, char **argv)
     starpu_init(NULL);
     starpu_mpi_initialize_extended(&rank, &size);
 
-    starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
+    starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned));
 
     unsigned nloops = NITER;
     unsigned loop;
@@ -273,7 +273,7 @@ data which will be needed by the tasks that we will execute.
             int mpi_rank = my_distrib(x, y, size);
              if (mpi_rank == my_rank)
                 /* Owning data */
-                starpu_variable_data_register(&data_handles[x][y], 0,
+                starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM,
                                               (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
             else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
                   || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
@@ -339,7 +339,7 @@ for(x = 0; x < nblocks ;  x++)
 {
     int mpi_rank = my_distrib(x, nodes);
     if (rank == root) {
-        starpu_vector_data_register(&data_handles[x], 0, (uintptr_t)vector[x],
+        starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
                                     blocks_size, sizeof(float));
     }
     else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) {

+ 56 - 0
doc/doxygen/chapters/out_of_core.doxy

@@ -0,0 +1,56 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2013 Corentin Salingue
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \page OutOfCore Out Of Core
+
+\section Introduction Introduction
+
+When using StarPU, one may need to store more data than what the main memory
+(RAM) can store. This part describes the method to add a new memory node on a
+disk and to use it.
+
+\section UseANewDiskMemory Use a new disk memory
+
+To use a disk memory node, you have to register it with this function:
+
+\code{.c}
+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp/", 1024*1024*200);
+\endcode
+
+Here, we use the stdio library to realize the read/write operations, i.e.
+fread/fwrite. This structure must have a path where to store files, as well as
+the maximum size the software can afford storing on the disk.
+
+Don't forget to check if the result is correct!
+
+When the register function is called, StarPU will benchmark the disk. This can
+take some time.
+
+<strong>Warning: the size thus has to be at least 1 MB!</strong> 
+
+StarPU will automatically try to evict unused data to this new disk. One can
+also use the standard StarPU node API, see the \ref API_Standard_Memory_Library
+and the \ref API_Data_Interfaces .
+
+The disk is unregistered during the starpu_shutdown().
+
+\section DiskFunctions Disk functions
+
+There are various ways to operate a disk memory node, described by the structure
+starpu_disk_ops. For instance, the variable #starpu_disk_stdio_ops
+uses fread/fwrite functions.
+
+All structures are in \ref API_Out_Of_Core .
+
+\section ExampleDiskCopy Examples: disk_copy
+
+\snippet disk_copy.c To be included
+
+\section ExampleDiskCompute Examples: disk_compute
+
+\snippet disk_compute.c To be included
+
+*/

+ 4 - 0
doc/doxygen/chapters/performance_feedback.doxy

@@ -253,6 +253,10 @@ starpu_shutdown(). The trace is a binary file whose name has the form
 <c>/tmp/</c> directory by default, or by the directory specified by
 the environment variable \ref STARPU_FXT_PREFIX.
 
+The additional configure option \ref enable-fxt-lock "--enable-fxt-lock" can 
+be used to generate trace events which describes the locks behaviour during 
+the execution.
+
 \subsection CreatingAGanttDiagram Creating a Gantt Diagram
 
 When the FxT trace file <c>filename</c> has been generated, it is possible to

+ 1 - 0
doc/doxygen/doxygen-config.cfg.in

@@ -25,6 +25,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/include/starpu_data_filters.h \
 			 @top_srcdir@/include/starpu_data.h \
 			 @top_srcdir@/include/starpu_data_interfaces.h \
+			 @top_srcdir@/include/starpu_disk.h \
 			 @top_srcdir@/include/starpu_deprecated_api.h \
 			 @top_srcdir@/include/starpu_driver.h \
 			 @top_srcdir@/include/starpu_expert.h \

+ 9 - 0
doc/doxygen/refman.tex

@@ -129,6 +129,13 @@ Documentation License”.
 \hypertarget{TipsAndTricksToKnowAbout}{}
 \input{TipsAndTricksToKnowAbout}
 
+\chapter{Out Of Core}
+\label{OutOfCore}
+\hypertarget{OutOfCore}{}
+\input{OutOfCore}
+
+
+
 \chapter{MPI Support}
 \label{MPISupport}
 \hypertarget{MPISupport}{}
@@ -195,6 +202,7 @@ Documentation License”.
 \input{group__API__Data__Management}
 \input{group__API__Data__Interfaces}
 \input{group__API__Data__Partition}
+\input{group__API__Out__Of__Core}
 \input{group__API__Multiformat__Data__Interface}
 \input{group__API__Codelet__And__Tasks}
 \input{group__API__Insert__Task}
@@ -238,6 +246,7 @@ Documentation License”.
 \input{starpu__data__filters_8h}
 \input{starpu__data__interfaces_8h}
 \input{starpu__deprecated__api_8h}
+\input{starpu__disk_8h}
 \input{starpu__driver_8h}
 \input{starpu__expert_8h}
 \input{starpu__fxt_8h}

+ 3 - 3
doc/texinfo/chapters/advanced-examples.texi

@@ -235,7 +235,7 @@ int vector[NX];
 starpu_data_handle_t handle;
 
 /* Declare data to StarPU */
-starpu_vector_data_register(&handle, 0, (uintptr_t)vector,
+starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                             NX, sizeof(vector[0]));
 
 /* Partition the vector in PARTS sub-vectors */
@@ -1002,7 +1002,7 @@ struct starpu_multiformat_data_interface_ops format_ops = @{
     .cpu_elemsize = 2 * sizeof(float),
     ...
 @};
-starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
+starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops);
 @end smallexample
 @end cartouche
 
@@ -1248,7 +1248,7 @@ Complex data interfaces can then be registered to StarPU.
 @smallexample
 double real = 45.0;
 double imaginary = 12.0;
-starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
+starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
 @end smallexample
 @end cartouche

+ 6 - 6
doc/texinfo/chapters/api.texi

@@ -783,7 +783,7 @@ item.
 @smallexample
 float var;
 starpu_data_handle_t var_handle;
-starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
+starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 @end smallexample
 @end cartouche
 @end deftypefun
@@ -796,7 +796,7 @@ Register the @var{nx} @var{elemsize}-byte elements pointed to by
 @smallexample
 float vector[NX];
 starpu_data_handle_t vector_handle;
-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
                             sizeof(vector[0]));
 @end smallexample
 @end cartouche
@@ -814,7 +814,7 @@ alignment purposes.
 float *matrix;
 starpu_data_handle_t matrix_handle;
 matrix = (float*)malloc(width * height * sizeof(float));
-starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix,
+starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix,
                             width, width, height, sizeof(float));
 @end smallexample
 @end cartouche
@@ -831,7 +831,7 @@ between rows and between z planes.
 float *block;
 starpu_data_handle_t block_handle;
 block = (float*)malloc(nx*ny*nz*sizeof(float));
-starpu_block_data_register(&block_handle, 0, (uintptr_t)block,
+starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block,
                            nx, nx*ny, nx, ny, nz, sizeof(float));
 @end smallexample
 @end cartouche
@@ -1584,11 +1584,11 @@ starpu_data_partition(A_handle, &f);
 
 @deftypefun void starpu_data_unpartition (starpu_data_handle_t @var{root_data}, unsigned @var{gathering_node})
 This unapplies one filter, thus unpartitioning the data. The pieces of data are
-collected back into one big piece in the @var{gathering_node} (usually 0). Tasks
+collected back into one big piece in the @var{gathering_node} (usually STARPU_MAIN_RAM). Tasks
 working on the partitioned data must be already finished when calling @code{starpu_data_unpartition}.
 @cartouche
 @smallexample
-starpu_data_unpartition(A_handle, 0);
+starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
 @end smallexample
 @end cartouche
 @end deftypefun

+ 2 - 2
doc/texinfo/chapters/basic-examples.texi

@@ -584,7 +584,7 @@ The following lines show how to declare an array of @code{NX} elements of type
 float vector[NX];
 
 starpu_data_handle_t vector_handle;
-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX,
                             sizeof(vector[0]));
 @end smallexample
 @end cartouche
@@ -860,7 +860,7 @@ int main(int argc, char **argv)
 @cartouche
 @smallexample
     /* @b{Registering data within StarPU} */
-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                                 NX, sizeof(vector[0]));
 
     /* @b{Definition of the task} */

+ 3 - 3
doc/texinfo/chapters/configuration.texi

@@ -477,9 +477,9 @@ it is therefore necessary to disable asynchronous data transfers.
 Disable asynchronous copies between CPU and MIC devices.
 @end defvr
 
-@defvr {Environment variable} STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
-Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
-instead. This permits to test the performance effect of GPU-Direct.
+@defvr {Environment variable} STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
+Enable direct CUDA transfers from GPU to GPU, without copying through RAM.
+This permits to test the performance effect of GPU-Direct.
 @end defvr
 
 @node Scheduling

+ 3 - 3
doc/texinfo/chapters/mpi-support.texi

@@ -64,7 +64,7 @@ int main(int argc, char **argv)
     starpu_init(NULL);
     starpu_mpi_initialize_extended(&rank, &size);
 
-    starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
+    starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned));
 
     unsigned nloops = NITER;
     unsigned loop;
@@ -310,7 +310,7 @@ data which will be needed by the tasks that we will execute.
             int mpi_rank = my_distrib(x, y, size);
              if (mpi_rank == my_rank)
                 /* Owning data */
-                starpu_variable_data_register(&data_handles[x][y], 0,
+                starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM,
                                               (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
             else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
                   || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
@@ -381,7 +381,7 @@ for(x = 0; x < nblocks ;  x++)
 @{
     int mpi_rank = my_distrib(x, nodes);
     if (rank == root) @{
-        starpu_vector_data_register(&data_handles[x], 0, (uintptr_t)vector[x],
+        starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
                                     blocks_size, sizeof(float));
     @}
     else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) @{

+ 3 - 3
doc/texinfo/chapters/vector_scal_c.texi

@@ -2,7 +2,7 @@
 
 @c This file is part of the StarPU Handbook.
 @c Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
-@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+@c Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 @c See the file starpu.texi for copying conditions.
 
 @smallexample
@@ -69,14 +69,14 @@ int main(int argc, char **argv)
      *  - the first argument of the registration method is a pointer to the
      *    handle that should describe the data
      *  - the second argument is the memory node where the data (ie. "vector")
-     *    resides initially: 0 stands for an address in main memory, as
+     *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
      *    opposed to an adress on a GPU for instance.
      *  - the third argument is the adress of the vector in RAM
      *  - the fourth argument is the number of elements in the vector
      *  - the fifth argument is the size of each element.
      */
     starpu_data_handle_t vector_handle;
-    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
+    starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                                 NX, sizeof(vector[0]));
 
     float factor = 3.14;

+ 2 - 2
doc/tutorial/vector_scal.c

@@ -75,14 +75,14 @@ int main(int argc, char **argv)
 	 *  - the first argument of the registration method is a pointer to the
 	 *    handle that should describe the data
 	 *  - the second argument is the memory node where the data (ie. "vector")
-	 *    resides initially: 0 stands for an address in main memory, as
+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
 	 *    opposed to an adress on a GPU for instance.
 	 *  - the third argument is the adress of the vector in RAM
 	 *  - the fourth argument is the number of elements in the vector
 	 *  - the fifth argument is the size of each element.
 	 */
 	starpu_data_handle_t vector_handle;
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
 				    NX, sizeof(vector[0]));
 
 	float factor = 3.14;

+ 2 - 2
examples/audio/starpu_audio_processing.c

@@ -413,7 +413,7 @@ int main(int argc, char **argv)
 
 	starpu_cublas_init();
 
-	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
+	starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, niter*nsamples, sizeof(float));
 
 	struct starpu_data_filter f =
 	{
@@ -458,7 +458,7 @@ int main(int argc, char **argv)
 		fprintf(stderr, "Writing output data\n");
 
 	/* make sure that the output is in RAM before quitting StarPU */
-	starpu_data_unpartition(A_handle, 0);
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
 	starpu_data_unregister(A_handle);
 
 	starpu_cublas_shutdown();

+ 4 - 4
examples/axpy/axpy.c

@@ -152,8 +152,8 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);
 
 	/* Declare the data to StarPU */
-	starpu_vector_data_register(&_handle_x, 0, (uintptr_t)_vec_x, N, sizeof(TYPE));
-	starpu_vector_data_register(&_handle_y, 0, (uintptr_t)_vec_y, N, sizeof(TYPE));
+	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
+	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));
 
 	/* Divide the vector into blocks */
 	struct starpu_data_filter block_filter =
@@ -194,8 +194,8 @@ int main(int argc, char **argv)
 	starpu_task_wait_for_all();
 
 enodev:
-	starpu_data_unpartition(_handle_x, 0);
-	starpu_data_unpartition(_handle_y, 0);
+	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
+	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
 	starpu_data_unregister(_handle_x);
 	starpu_data_unregister(_handle_y);
 

+ 1 - 1
examples/basic_examples/block.c

@@ -37,7 +37,7 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 	starpu_data_handle_t block_handle;
         int i;
 
-	starpu_block_data_register(&block_handle, 0, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
+	starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
 
 	starpu_codelet_init(&cl);
 	cl.where = where;

+ 1 - 1
examples/basic_examples/dynamic_handles.c

@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
 	     dummy_big_cl.dyn_modes[i] = STARPU_RW;
 
-	starpu_variable_data_register(&handle, 0, (uintptr_t)&val, sizeof(int));
+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&val, sizeof(int));
 
 	task = starpu_task_create();
 	task->synchronous = 1;

+ 6 - 6
examples/basic_examples/mult.c

@@ -174,11 +174,11 @@ static void partition_mult_data(void)
 	 * node in which resides the matrix: 0 means that the 3rd argument is
 	 * an adress in main memory.
 	 */
-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(float));
-	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(float));
-	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(float));
 
 	/* A filter is a method to partition a data into disjoint chunks, it is
@@ -365,9 +365,9 @@ int main(STARPU_ATTRIBUTE_UNUSED int argc,
 	 * starpu_data_map_filters is called again on C_handle.
 	 * The second argument is the memory node where the different subsets
 	 * should be reassembled, 0 = main memory (RAM) */
-	starpu_data_unpartition(A_handle, 0);
-	starpu_data_unpartition(B_handle, 0);
-	starpu_data_unpartition(C_handle, 0);
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
 
 	/* stop monitoring matrix C : after this, it is not possible to pass C 
 	 * (or any subset of C) as a codelet input/output. This also implements

+ 1 - 1
examples/basic_examples/multiformat.c

@@ -126,7 +126,7 @@ static void
 register_data(void)
 {
 	starpu_multiformat_data_register(&array_of_structs_handle,
-					 0,
+					 STARPU_MAIN_RAM,
 					 &array_of_structs,
 					 N_ELEMENTS,
 					 &format_ops);

+ 1 - 1
examples/basic_examples/variable.c

@@ -50,7 +50,7 @@ int main(int argc, char **argv)
         if (argc == 2) niter = atoi(argv[1]);
         foo = 0.0f;
 
-	starpu_variable_data_register(&float_array_handle, 0 /* home node */,
+	starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */,
                                       (uintptr_t)&foo, sizeof(float));
 
 #ifdef STARPU_USE_OPENCL

+ 2 - 2
examples/basic_examples/vector_scal.c

@@ -141,14 +141,14 @@ int main(int argc, char **argv)
 	 *  - the first argument of the registration method is a pointer to the
 	 *    handle that should describe the data
 	 *  - the second argument is the memory node where the data (ie. "vector")
-	 *    resides initially: 0 stands for an address in main memory, as
+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
 	 *    opposed to an adress on a GPU for instance.
 	 *  - the third argument is the adress of the vector in RAM
 	 *  - the fourth argument is the number of elements in the vector
 	 *  - the fifth argument is the size of each element.
 	 */
 	starpu_data_handle_t vector_handle;
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 
 	float factor = 3.14;
 

+ 3 - 3
examples/basic_examples/vector_scal_c.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2013  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -69,14 +69,14 @@ int compute_(int *F_NX, float *vector)
 	 *  - the first argument of the registration method is a pointer to the
 	 *    handle that should describe the data
 	 *  - the second argument is the memory node where the data (ie. "vector")
-	 *    resides initially: 0 stands for an address in main memory, as
+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
 	 *    opposed to an adress on a GPU for instance.
 	 *  - the third argument is the adress of the vector in RAM
 	 *  - the fourth argument is the number of elements in the vector
 	 *  - the fifth argument is the size of each element.
 	 */
 	starpu_data_handle_t vector_handle;
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 
 	float factor = 3.14;
 

+ 1 - 1
examples/binary/binary.c

@@ -42,7 +42,7 @@ int compute(char *file_name, int load_as_file)
 	int ret = 0;
 	unsigned niter = 500;
 
-	starpu_vector_data_register(&float_array_handle, 0, (uintptr_t)&float_array, 4, sizeof(float));
+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float));
 
 #ifdef STARPU_USE_OPENCL
 	if (load_as_file)

+ 1 - 1
examples/callback/callback.c

@@ -59,7 +59,7 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
-	starpu_variable_data_register(&handle, 0, (uintptr_t)&v, sizeof(int));
+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int));
 
 	struct starpu_task *task = starpu_task_create();
 	task->cl = &cl;

+ 14 - 14
examples/cg/cg.c

@@ -137,16 +137,16 @@ static void free_data(void)
 
 static void register_data(void)
 {
-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, n, n, n, sizeof(TYPE));
-	starpu_vector_data_register(&b_handle, 0, (uintptr_t)b, n, sizeof(TYPE));
-	starpu_vector_data_register(&x_handle, 0, (uintptr_t)x, n, sizeof(TYPE));
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, n, n, n, sizeof(TYPE));
+	starpu_vector_data_register(&b_handle, STARPU_MAIN_RAM, (uintptr_t)b, n, sizeof(TYPE));
+	starpu_vector_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)x, n, sizeof(TYPE));
 
-	starpu_vector_data_register(&r_handle, 0, (uintptr_t)r, n, sizeof(TYPE));
-	starpu_vector_data_register(&d_handle, 0, (uintptr_t)d, n, sizeof(TYPE));
-	starpu_vector_data_register(&q_handle, 0, (uintptr_t)q, n, sizeof(TYPE));
+	starpu_vector_data_register(&r_handle, STARPU_MAIN_RAM, (uintptr_t)r, n, sizeof(TYPE));
+	starpu_vector_data_register(&d_handle, STARPU_MAIN_RAM, (uintptr_t)d, n, sizeof(TYPE));
+	starpu_vector_data_register(&q_handle, STARPU_MAIN_RAM, (uintptr_t)q, n, sizeof(TYPE));
 
-	starpu_variable_data_register(&dtq_handle, 0, (uintptr_t)&dtq, sizeof(TYPE));
-	starpu_variable_data_register(&rtr_handle, 0, (uintptr_t)&rtr, sizeof(TYPE));
+	starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE));
+	starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE));
 
 	if (use_reduction)
 	{
@@ -160,13 +160,13 @@ static void register_data(void)
 
 static void unregister_data(void)
 {
-	starpu_data_unpartition(A_handle, 0);
-	starpu_data_unpartition(b_handle, 0);
-	starpu_data_unpartition(x_handle, 0);
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(b_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(x_handle, STARPU_MAIN_RAM);
 
-	starpu_data_unpartition(r_handle, 0);
-	starpu_data_unpartition(d_handle, 0);
-	starpu_data_unpartition(q_handle, 0);
+	starpu_data_unpartition(r_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(d_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(q_handle, STARPU_MAIN_RAM);
 
 	starpu_data_unregister(A_handle);
 	starpu_data_unregister(b_handle);

+ 3 - 3
examples/cholesky/cholesky_grain_tag.c

@@ -189,7 +189,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
@@ -250,7 +250,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	{
 		/* stall the application until the end of computations */
 		starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel));
-		starpu_data_unpartition(dataA, 0);
+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 		starpu_data_unregister(dataA);
 		return 0;
 	}
@@ -274,7 +274,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 		free(tag_array);
 
-		starpu_data_unpartition(dataA, 0);
+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 		starpu_data_unregister(dataA);
 
 		float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)];

+ 2 - 2
examples/cholesky/cholesky_implicit.c

@@ -183,7 +183,7 @@ static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	struct starpu_data_filter f =
 	{
@@ -201,7 +201,7 @@ static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	int ret = _cholesky(dataA, nblocks);
 
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unregister(dataA);
 
 	return ret;

+ 3 - 3
examples/cholesky/cholesky_tag.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -234,7 +234,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	/* stall the application until the end of computations */
 	starpu_tag_wait(TAG11(nblocks-1));
 
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 
 	end = starpu_timing_now();
 
@@ -279,7 +279,7 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 

+ 1 - 1
examples/cholesky/cholesky_tile_tag.c

@@ -301,7 +301,7 @@ int main(int argc, char **argv)
 	{
 		if (x <= y)
 		{
-			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
+			starpu_matrix_data_register(&A_state[y][x], STARPU_MAIN_RAM, (uintptr_t)A[y][x], 
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 		}
 	}

+ 1 - 1
examples/cpp/incrementer_cpp.cpp

@@ -50,7 +50,7 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
-	starpu_vector_data_register(&float_array_handle, 0, (uintptr_t)&float_array, 4, sizeof(float));
+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float));
 
 #ifdef STARPU_USE_OPENCL
         ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL);

+ 3 - 3
examples/filters/custom_mf/custom_interface.c

@@ -230,7 +230,7 @@ static size_t custom_interface_get_size(starpu_data_handle_t handle)
 	struct custom_data_interface *data_interface;
 
 	data_interface = (struct custom_data_interface *)
-				starpu_data_get_interface_on_node(handle, 0);
+				starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	size = data_interface->nx * data_interface->ops->cpu_elemsize;
 	return size;
 }
@@ -243,7 +243,7 @@ static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle)
 static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
 {
 	struct custom_data_interface *ci = (struct custom_data_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	fprintf(f, "Custom interface of size %d", ci->nx);
 }
 
@@ -252,7 +252,7 @@ custom_get_nx(starpu_data_handle_t handle)
 {
 	struct custom_data_interface *data_interface;
 	data_interface = (struct custom_data_interface *)
-				starpu_data_get_interface_on_node(handle, 0);
+				starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	return data_interface->nx;
 }
 

+ 2 - 2
examples/filters/custom_mf/custom_mf_filter.c

@@ -110,7 +110,7 @@ register_and_partition_data(void)
 		_array_of_structs[i].x = i+1.0;
 		_array_of_structs[i].y = 42.0;
 	}
-	custom_data_register(&_handle, 0, &_array_of_structs, N, &format_ops);
+	custom_data_register(&_handle, STARPU_MAIN_RAM, &_array_of_structs, N, &format_ops);
 
 	struct starpu_data_filter f =
 	{
@@ -125,7 +125,7 @@ register_and_partition_data(void)
 static void
 unpartition_and_unregister_data(void)
 {
-	starpu_data_unpartition(_handle, 0);
+	starpu_data_unpartition(_handle, STARPU_MAIN_RAM);
 	starpu_data_unregister(_handle);
 }
 

+ 2 - 2
examples/filters/fblock.c

@@ -115,7 +115,7 @@ int main(int argc, char **argv)
 #endif
 
         /* Declare data to StarPU */
-        starpu_block_data_register(&handle, 0, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int));
+        starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int));
         FPRINTF(stderr, "IN  Block\n");
         print_data(handle);
 
@@ -159,7 +159,7 @@ int main(int argc, char **argv)
         }
 
         /* Unpartition the data, unregister it from StarPU and shutdown */
-        starpu_data_unpartition(handle, 0);
+        starpu_data_unpartition(handle, STARPU_MAIN_RAM);
         print_data(handle);
         starpu_data_unregister(handle);
 

+ 2 - 2
examples/filters/fmatrix.c

@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Declare data to StarPU */
-	starpu_matrix_data_register(&handle, 0, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
+	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
 
         /* Partition the matrix in PARTS sub-matrices */
 	struct starpu_data_filter f =
@@ -102,7 +102,7 @@ int main(int argc, char **argv)
 	}
 
         /* Unpartition the data, unregister it from StarPU and shutdown */
-	starpu_data_unpartition(handle, 0);
+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
         starpu_data_unregister(handle);
 	starpu_shutdown();
 

+ 2 - 2
examples/filters/fvector.c

@@ -63,7 +63,7 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Declare data to StarPU */
-	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 
         /* Partition the vector in PARTS sub-vectors */
 	struct starpu_data_filter f =
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
-	starpu_data_unpartition(handle, 0);
+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
         starpu_data_unregister(handle);
 	starpu_shutdown();
 

+ 4 - 4
examples/filters/shadow.c

@@ -121,10 +121,10 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Declare source vector to StarPU */
-	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
 
 	/* Declare destination vector to StarPU */
-	starpu_vector_data_register(&handle2, 0, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
+	starpu_vector_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
 
         /* Partition the source vector in PARTS sub-vectors with shadows */
 	/* NOTE: the resulting handles should only be used in read-only mode,
@@ -163,8 +163,8 @@ int main(int argc, char **argv)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
-	starpu_data_unpartition(handle, 0);
-	starpu_data_unpartition(handle2, 0);
+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
         starpu_data_unregister(handle);
         starpu_data_unregister(handle2);
 	starpu_shutdown();

+ 4 - 4
examples/filters/shadow2d.c

@@ -202,10 +202,10 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Declare source matrix to StarPU */
-	starpu_matrix_data_register(&handle, 0, (uintptr_t)matrix, NX + 2*SHADOWX, NX + 2*SHADOWX, NY + 2*SHADOWY, sizeof(matrix[0][0]));
+	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX + 2*SHADOWX, NX + 2*SHADOWX, NY + 2*SHADOWY, sizeof(matrix[0][0]));
 
 	/* Declare destination matrix to StarPU */
-	starpu_matrix_data_register(&handle2, 0, (uintptr_t)matrix2, NX + PARTSX*2*SHADOWX, NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, sizeof(matrix2[0][0]));
+	starpu_matrix_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)matrix2, NX + PARTSX*2*SHADOWX, NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, sizeof(matrix2[0][0]));
 
         /* Partition the source matrix in PARTSY*PARTSX sub-matrices with shadows */
 	/* NOTE: the resulting handles should only be used in read-only mode,
@@ -258,8 +258,8 @@ int main(int argc, char **argv)
 		}
 	}
 
-	starpu_data_unpartition(handle, 0);
-	starpu_data_unpartition(handle2, 0);
+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
         starpu_data_unregister(handle);
         starpu_data_unregister(handle2);
 	starpu_shutdown();

+ 4 - 4
examples/filters/shadow3d.c

@@ -214,13 +214,13 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Declare source matrix to StarPU */
-	starpu_block_data_register(&handle, 0, (uintptr_t)matrix,
+	starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix,
 			NX + 2*SHADOWX, (NX + 2*SHADOWX) * (NY + 2*SHADOWY),
 			NX + 2*SHADOWX, NY + 2*SHADOWY, NZ + 2*SHADOWZ,
 			sizeof(matrix[0][0][0]));
 
 	/* Declare destination matrix to StarPU */
-	starpu_block_data_register(&handle2, 0, (uintptr_t)matrix2,
+	starpu_block_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)matrix2,
 			NX + PARTSX*2*SHADOWX, (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY),
 			NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, NZ + PARTSZ*2*SHADOWZ,
 			sizeof(matrix2[0][0][0]));
@@ -290,8 +290,8 @@ int main(int argc, char **argv)
 		}
 	}
 
-	starpu_data_unpartition(handle, 0);
-	starpu_data_unpartition(handle2, 0);
+	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
         starpu_data_unregister(handle);
         starpu_data_unregister(handle2);
 	starpu_shutdown();

+ 2 - 2
examples/heat/dw_factolu.c

@@ -750,7 +750,7 @@ void dw_factoLU(float *matA, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, 
 			size, size, sizeof(float));
 
 	struct starpu_data_filter f =
@@ -779,7 +779,7 @@ void dw_factoLU(float *matA, unsigned size,
 	}
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 
 	starpu_data_unregister(dataA);
 

+ 3 - 3
examples/heat/dw_factolu_grain.c

@@ -213,7 +213,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	 * (re)partition data
 	 */
 	starpu_data_handle_t dataA;
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	STARPU_ASSERT((size % blocksize) == 0);
 	STARPU_ASSERT((inner_size % blocksize) == 0);
@@ -288,7 +288,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	{
 		/* we wait for the last task and we are done */
 		starpu_tag_wait(TAG11(nblocks-1, tag_prefix));
-		starpu_data_unpartition(dataA, 0);		
+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);		
 		return;
 	}
 	else
@@ -312,7 +312,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
 		free(tag_array);
 
-		starpu_data_unpartition(dataA, 0);
+		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 		starpu_data_unregister(dataA);
 
 		float *newmatA = &matA[inner_size*(ld+1)];

+ 3 - 3
examples/heat/dw_factolu_tag.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -297,7 +297,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	struct starpu_data_filter f =
 	{
@@ -316,7 +316,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 	dw_codelet_facto_v3(dataA, nblocks);
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 
 	starpu_data_unregister(dataA);
 

+ 6 - 6
examples/heat/dw_sparse_cg.c

@@ -357,10 +357,10 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 	starpu_data_handle_t ds_vecr, ds_vecd, ds_vecq;
 
 	/* first the user-allocated data */
-	starpu_csr_data_register(&ds_matrixA, 0, nnz, nrow,
+	starpu_csr_data_register(&ds_matrixA, STARPU_MAIN_RAM, nnz, nrow,
 			(uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float));
-	starpu_vector_data_register(&ds_vecx, 0, (uintptr_t)vecx, nrow, sizeof(float));
-	starpu_vector_data_register(&ds_vecb, 0, (uintptr_t)vecb, nrow, sizeof(float));
+	starpu_vector_data_register(&ds_vecx, STARPU_MAIN_RAM, (uintptr_t)vecx, nrow, sizeof(float));
+	starpu_vector_data_register(&ds_vecb, STARPU_MAIN_RAM, (uintptr_t)vecb, nrow, sizeof(float));
 
 	/* then allocate the algorithm intern data */
 	float *ptr_vecr, *ptr_vecd, *ptr_vecq;
@@ -380,9 +380,9 @@ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz,
 	FPRINTF(stdout, "nrow = %u \n", nrow);
 
 	/* and register them as well */
-	starpu_vector_data_register(&ds_vecr, 0, (uintptr_t)ptr_vecr, nrow, sizeof(float));
-	starpu_vector_data_register(&ds_vecd, 0, (uintptr_t)ptr_vecd, nrow, sizeof(float));
-	starpu_vector_data_register(&ds_vecq, 0, (uintptr_t)ptr_vecq, nrow, sizeof(float));
+	starpu_vector_data_register(&ds_vecr, STARPU_MAIN_RAM, (uintptr_t)ptr_vecr, nrow, sizeof(float));
+	starpu_vector_data_register(&ds_vecd, STARPU_MAIN_RAM, (uintptr_t)ptr_vecd, nrow, sizeof(float));
+	starpu_vector_data_register(&ds_vecq, STARPU_MAIN_RAM, (uintptr_t)ptr_vecq, nrow, sizeof(float));
 
 	/* we now have the complete problem */
 	struct cg_problem problem;

+ 1 - 1
examples/incrementer/incrementer.c

@@ -55,7 +55,7 @@ int main(int argc, char **argv)
 	float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f};
 
 	starpu_data_handle_t float_array_handle;
-	starpu_vector_data_register(&float_array_handle, 0 /* home node */,
+	starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */,
 			(uintptr_t)&float_array, 4, sizeof(float));
 
 #ifdef STARPU_USE_OPENCL

+ 2 - 2
examples/interface/complex.c

@@ -92,8 +92,8 @@ int main(int argc, char **argv)
 						  &opencl_program, NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
 #endif
-	starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
-	starpu_complex_data_register(&handle2, 0, &copy_real, &copy_imaginary, 1);
+	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
+	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
 
 	ret = starpu_insert_task(&cl_display, STARPU_VALUE, "handle1", strlen("handle1"), STARPU_R, handle1, 0);
 	if (ret == -ENODEV) goto end;

+ 3 - 3
examples/interface/complex_interface.c

@@ -21,7 +21,7 @@
 double *starpu_complex_get_real(starpu_data_handle_t handle)
 {
 	struct starpu_complex_interface *complex_interface =
-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 	return complex_interface->real;
 }
@@ -29,7 +29,7 @@ double *starpu_complex_get_real(starpu_data_handle_t handle)
 double *starpu_complex_get_imaginary(starpu_data_handle_t handle)
 {
 	struct starpu_complex_interface *complex_interface =
-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 	return complex_interface->imaginary;
 }
@@ -37,7 +37,7 @@ double *starpu_complex_get_imaginary(starpu_data_handle_t handle)
 int starpu_complex_get_nx(starpu_data_handle_t handle)
 {
 	struct starpu_complex_interface *complex_interface =
-		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
+		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 	return complex_interface->nx;
 }

+ 2 - 2
examples/lu/xlu.c

@@ -249,7 +249,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
@@ -271,7 +271,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 	int ret = dw_codelet_facto_v3(dataA, nblocks);
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unregister(dataA);
 
 	return ret;

+ 2 - 2
examples/lu/xlu_implicit.c

@@ -152,7 +152,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	struct starpu_data_filter f =
 	{
@@ -171,7 +171,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 	int ret = dw_codelet_facto_v3(dataA, nblocks);
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unregister(dataA);
 	return ret;
 }

+ 3 - 3
examples/lu/xlu_implicit_pivot.c

@@ -206,7 +206,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	struct starpu_data_filter f =
 	{
@@ -246,7 +246,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unregister(dataA);
 
 	free(piv_description);
@@ -270,7 +270,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	for (bj = 0; bj < nblocks; bj++)
 	for (bi = 0; bi < nblocks; bi++)
 	{
-		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM,
 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
 			size/nblocks, size/nblocks, sizeof(TYPE));
 	}

+ 3 - 3
examples/lu/xlu_pivot.c

@@ -338,7 +338,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
@@ -390,7 +390,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 
 	/* gather all the data */
-	starpu_data_unpartition(dataA, 0);
+	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	free(piv_description);
 
 	return ret;
@@ -413,7 +413,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	for (bj = 0; bj < nblocks; bj++)
 	for (bi = 0; bi < nblocks; bi++)
 	{
-		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
+		starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM,
 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
 			size/nblocks, size/nblocks, sizeof(TYPE));
 

+ 1 - 1
examples/mandelbrot/mandelbrot.c

@@ -501,7 +501,7 @@ int main(int argc, char **argv)
 	for (iby = 0; iby < nblocks; iby++)
 	{
 		unsigned *data = &buffer[iby*block_size*width];
-		starpu_vector_data_register(&block_handles[iby], 0,
+		starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM,
                         (uintptr_t)data, block_size*width, sizeof(unsigned));
 	}
 

+ 3 - 3
examples/matvecmult/matvecmult.c

@@ -186,9 +186,9 @@ int main(int argc, char **argv)
         fillArray(mult, height);
         matVecMult(matrix, vector, width, height, correctResult);
 
-	starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix, width, width, height, sizeof(float));
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, width, sizeof(float));
-	starpu_vector_data_register(&mult_handle, 0, (uintptr_t)mult, height, sizeof(float));
+	starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, width, sizeof(float));
+	starpu_vector_data_register(&mult_handle, STARPU_MAIN_RAM, (uintptr_t)mult, height, sizeof(float));
 
 #ifdef STARPU_USE_OPENCL
         ret = starpu_opencl_load_opencl_from_file("examples/matvecmult/matvecmult_kernel.cl", &opencl_code, NULL);

+ 6 - 6
examples/mult/xgemm.c

@@ -110,11 +110,11 @@ static void init_problem_data(void)
 
 static void partition_mult_data(void)
 {
-	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A,
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
 		ydim, ydim, zdim, sizeof(TYPE));
-	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B,
+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
 		zdim, zdim, xdim, sizeof(TYPE));
-	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C,
+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
 		ydim, ydim, xdim, sizeof(TYPE));
 
 	struct starpu_data_filter vert;
@@ -346,9 +346,9 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "GFlop/s: %.2f\n", flops/timing/1000.0);
 
 enodev:
-	starpu_data_unpartition(C_handle, 0);
-	starpu_data_unpartition(B_handle, 0);
-	starpu_data_unpartition(A_handle, 0);
+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
 
 	starpu_data_unregister(A_handle);
 	starpu_data_unregister(B_handle);

+ 1 - 1
examples/openmp/vector_scal_omp.c

@@ -96,7 +96,7 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_data_handle_t vector_handle;
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 
 	float factor = 1.001;
 

+ 3 - 3
examples/pi/pi.c

@@ -135,13 +135,13 @@ int main(int argc, char **argv)
 
 	/* Any worker may use that array now */
 	starpu_data_handle_t sobol_qrng_direction_handle;
-	starpu_vector_data_register(&sobol_qrng_direction_handle, 0,
+	starpu_vector_data_register(&sobol_qrng_direction_handle, STARPU_MAIN_RAM,
 		(uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned));
 
 	unsigned *cnt_array = malloc(ntasks*sizeof(unsigned));
 	STARPU_ASSERT(cnt_array);
 	starpu_data_handle_t cnt_array_handle;
-	starpu_vector_data_register(&cnt_array_handle, 0, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
+	starpu_vector_data_register(&cnt_array_handle, STARPU_MAIN_RAM, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
 
 	/* Use a write-through policy : when the data is modified on an
 	 * accelerator, we know that it will only be modified once and be
@@ -179,7 +179,7 @@ int main(int argc, char **argv)
 	starpu_task_wait_for_all();
 
 	/* Get the cnt_array back in main memory */
-	starpu_data_unpartition(cnt_array_handle, 0);
+	starpu_data_unpartition(cnt_array_handle, STARPU_MAIN_RAM);
 	starpu_data_unregister(cnt_array_handle);
 	starpu_data_unregister(sobol_qrng_direction_handle);
 

+ 1 - 1
examples/pi/pi_redux.c

@@ -334,7 +334,7 @@ int main(int argc, char **argv)
 	 * [-1,1]^2. */
 	unsigned long shot_cnt = 0;
 	starpu_data_handle_t shot_cnt_handle;
-	starpu_variable_data_register(&shot_cnt_handle, 0,
+	starpu_variable_data_register(&shot_cnt_handle, STARPU_MAIN_RAM,
 			(uintptr_t)&shot_cnt, sizeof(shot_cnt));
 
 	starpu_data_set_reduction_methods(shot_cnt_handle,

+ 6 - 6
examples/ppm_downscaler/yuv_downscaler.c

@@ -159,39 +159,39 @@ int main(int argc, char **argv)
 	for (frame = 0; frame < nframes; frame++)
 	{
 		/* register Y layer */
-		starpu_matrix_data_register(&frame_y_handle[frame], 0,
+		starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_in_buffer[frame].y,
 			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
 
 		starpu_data_partition(frame_y_handle[frame], &filter_y);
 
-		starpu_matrix_data_register(&new_frame_y_handle[frame], 0,
+		starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_out_buffer[frame].y,
 			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
 
 		starpu_data_partition(new_frame_y_handle[frame], &filter_y);
 
 		/* register U layer */
-		starpu_matrix_data_register(&frame_u_handle[frame], 0,
+		starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_in_buffer[frame].u,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_data_partition(frame_u_handle[frame], &filter_uv);
 
-		starpu_matrix_data_register(&new_frame_u_handle[frame], 0,
+		starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_out_buffer[frame].u,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 
 		starpu_data_partition(new_frame_u_handle[frame], &filter_uv);
 
 		/* register V layer */
-		starpu_matrix_data_register(&frame_v_handle[frame], 0,
+		starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_in_buffer[frame].v,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_data_partition(frame_v_handle[frame], &filter_uv);
 
-		starpu_matrix_data_register(&new_frame_v_handle[frame], 0,
+		starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
 			(uintptr_t)&yuv_out_buffer[frame].v,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 

+ 3 - 3
examples/reductions/dot_product.c

@@ -370,13 +370,13 @@ int main(int argc, char **argv)
 	unsigned block;
 	for (block = 0; block < _nblocks; block++)
 	{
-		starpu_vector_data_register(&_x_handles[block], 0,
+		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM,
 			(uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float));
-		starpu_vector_data_register(&_y_handles[block], 0,
+		starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM,
 			(uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float));
 	}
 
-	starpu_variable_data_register(&_dot_handle, 0, (uintptr_t)&_dot, sizeof(DOT_TYPE));
+	starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE));
 
 	/*
 	 *	Compute dot product with StarPU

+ 2 - 2
examples/reductions/minmax_reduction.c

@@ -161,7 +161,7 @@ int main(int argc, char **argv)
 	for (block = 0; block < _nblocks; block++)
 	{
 		uintptr_t block_start = (uintptr_t)&_x[_entries_per_bock*block];
-		starpu_vector_data_register(&_x_handles[block], 0, block_start,
+		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, block_start,
 					    _entries_per_bock, sizeof(TYPE));
 	}
 
@@ -171,7 +171,7 @@ int main(int argc, char **argv)
 	/* Initialize current max */
 	_minmax[1] = TYPE_MIN;
 
-	starpu_variable_data_register(&_minmax_handle, 0, (uintptr_t)_minmax, 2*sizeof(TYPE));
+	starpu_variable_data_register(&_minmax_handle, STARPU_MAIN_RAM, (uintptr_t)_minmax, 2*sizeof(TYPE));
 
 	/* Set the methods to define neutral elements and to perform the reduction operation */
 	starpu_data_set_reduction_methods(_minmax_handle, &minmax_redux_codelet, &minmax_init_codelet);

+ 1 - 1
examples/spmd/vector_scal_spmd.c

@@ -114,7 +114,7 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_data_handle_t vector_handle;
-	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 
 	float factor = 1.001;
 

+ 7 - 7
examples/spmv/dw_block_spmv.c

@@ -47,7 +47,7 @@ void create_data(void)
 	bcsr_matrix = mm_file_to_bcsr(inputfile, c, r);
 
 	/* declare the corresponding block CSR to the runtime */
-	starpu_bcsr_data_register(&sparse_matrix, 0, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
+	starpu_bcsr_data_register(&sparse_matrix, STARPU_MAIN_RAM, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
 	                (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, 
 			0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float));
 
@@ -69,16 +69,16 @@ void create_data(void)
 		vector_out_ptr[ind] = 0.0f;
 	}
 
-	starpu_vector_data_register(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
-	starpu_vector_data_register(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
+	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
+	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));
 }
 
 void unregister_data(void)
 {
-	starpu_data_unpartition(sparse_matrix, 0);
+	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
 	starpu_data_unregister(sparse_matrix);
 
-	starpu_data_unpartition(vector_in, 0);
+	starpu_data_unpartition(vector_in, STARPU_MAIN_RAM);
 	starpu_data_unregister(vector_in);
 
 	starpu_data_unregister(vector_out);
@@ -98,8 +98,8 @@ void init_problem_callback(void *arg)
 		printf("DONE ...\n");
 		gettimeofday(&end, NULL);
 
-/*		starpu_data_unpartition(sparse_matrix, 0); */
-		starpu_data_unpartition(vector_out, 0);
+/*		starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); */
+		starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);
 
 		sem_post(&sem);
 	}

+ 5 - 5
examples/spmv/spmv.c

@@ -192,9 +192,9 @@ int main(int argc, char **argv)
 	/*
 	 *	Register the CSR matrix and the 2 vectors
 	 */
-	starpu_csr_data_register(&sparse_matrix, 0, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
-	starpu_vector_data_register(&vector_in, 0, (uintptr_t)vector_in_ptr, size, sizeof(float));
-	starpu_vector_data_register(&vector_out, 0, (uintptr_t)vector_out_ptr, size, sizeof(float));
+	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
+	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
+	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));
 
 	/*
 	 *	Partition the CSR matrix and the output vector
@@ -239,8 +239,8 @@ int main(int argc, char **argv)
 	/*
 	 *	Unregister the CSR matrix and the output vector
 	 */
-	starpu_data_unpartition(sparse_matrix, 0);
-	starpu_data_unpartition(vector_out, 0);
+	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
+	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);
 
 	/*
 	 *	Unregister data

+ 1 - 1
examples/stencil/stencil-blocks.c

@@ -262,7 +262,7 @@ static void allocate_block_on_node(starpu_data_handle_t *handleptr, TYPE **ptr,
 	memset(*ptr, 0, block_size);
 
 	/* Register it to StarPU */
-	starpu_block_data_register(handleptr, 0, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
+	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
 }
 
 void display_memory_consumption(int rank)

+ 1 - 1
gcc-plugin/tests/output-pointer.c

@@ -82,7 +82,7 @@ main (int argc, char *argv[])
   expected_register_arguments.pointer = x;
   expected_register_arguments.elements = 42;
   expected_register_arguments.element_size = sizeof x[0];
-  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 42, sizeof x[0]);
+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) x, 42, sizeof x[0]);
 
   struct insert_task_argument expected[] =
     {

+ 2 - 2
gcc-plugin/tests/pointers.c

@@ -85,12 +85,12 @@ main (int argc, char *argv[])
   expected_register_arguments.pointer = x;
   expected_register_arguments.elements = 1;
   expected_register_arguments.element_size = sizeof x[0];
-  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 1, sizeof x[0]);
+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) x, 1, sizeof x[0]);
 
   expected_register_arguments.pointer = y;
   expected_register_arguments.elements = 1;
   expected_register_arguments.element_size = sizeof *y;
-  starpu_vector_data_register (&handle, 0, (uintptr_t) y, 1, sizeof *y);
+  starpu_vector_data_register (&handle, STARPU_MAIN_RAM, (uintptr_t) y, 1, sizeof *y);
 
   struct insert_task_argument expected_pointer_task[] =
     {

+ 1 - 0
include/starpu.h

@@ -45,6 +45,7 @@ typedef UINT_PTR uintptr_t;
 #include <starpu_thread_util.h>
 #include <starpu_util.h>
 #include <starpu_data.h>
+#include <starpu_disk.h>
 #include <starpu_data_interfaces.h>
 #include <starpu_data_filters.h>
 #include <starpu_stdlib.h>

+ 4 - 0
include/starpu_data.h

@@ -90,15 +90,19 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
 
 int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
 
+#define STARPU_MAIN_RAM 0
+
 enum starpu_node_kind
 {
 	STARPU_UNUSED     = 0x00,
 	STARPU_CPU_RAM    = 0x01,
 	STARPU_CUDA_RAM   = 0x02,
 	STARPU_OPENCL_RAM = 0x03,
+	STARPU_DISK_RAM   = 0x04,
 	STARPU_MIC_RAM    = 0x05,
 	STARPU_SCC_RAM    = 0x06,
 	STARPU_SCC_SHM    = 0x07
+
 };
 
 unsigned starpu_worker_get_memory_node(unsigned workerid);

+ 11 - 11
include/starpu_data_interfaces.h

@@ -101,24 +101,24 @@ enum starpu_data_interface_id
 
 struct starpu_data_interface_ops
 {
-	void (*register_data_handle)(starpu_data_handle_t handle,
-				     unsigned home_node, void *data_interface);
-	starpu_ssize_t (*allocate_data_on_node)(void *data_interface, unsigned node);
-	void (*free_data_on_node)(void *data_interface, unsigned node);
+	void		 (*register_data_handle)	(starpu_data_handle_t handle,
+								unsigned home_node, void *data_interface);
+	starpu_ssize_t	 (*allocate_data_on_node)	(void *data_interface, unsigned node);
+	void 		 (*free_data_on_node)		(void *data_interface, unsigned node);
 	const struct starpu_data_copy_methods *copy_methods;
-	void * (*handle_to_pointer)(starpu_data_handle_t handle, unsigned node);
-	size_t (*get_size)(starpu_data_handle_t handle);
-	uint32_t (*footprint)(starpu_data_handle_t handle);
-	int (*compare)(void *data_interface_a, void *data_interface_b);
-	void (*display)(starpu_data_handle_t handle, FILE *f);
+	void * 		 (*handle_to_pointer)		(starpu_data_handle_t handle, unsigned node);
+	size_t 		 (*get_size)			(starpu_data_handle_t handle);
+	uint32_t 	 (*footprint)			(starpu_data_handle_t handle);
+	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
+	void 		 (*display)			(starpu_data_handle_t handle, FILE *f);
 	enum starpu_data_interface_id interfaceid;
 	size_t interface_size;
 
 	int is_multiformat;
 	struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
 
-	int (*pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
-	int (*unpack_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+	int (*pack_data) (starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
+	int (*unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
 };
 
 int starpu_data_interface_get_next_id(void);

+ 50 - 0
include/starpu_disk.h

@@ -0,0 +1,50 @@
+
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013 Corentin Salingue
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_DISK_H__
+#define __STARPU_DISK_H__
+
+/* list of functions to use on disk */
+struct starpu_disk_ops {
+ 	 void *  (*alloc)  (void *base, size_t size);
+	 void    (*free)   (void *base, void *obj, size_t size);
+	 void *  (*open)   (void *base, void *pos, size_t size);     /* open an existing file */
+	 void    (*close)  (void *base, void *obj, size_t size);
+	ssize_t  (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);        /* ~= pread */
+	ssize_t  (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size); 
+	/* readv, writev, read2d, write2d, etc. */
+	 void *  (*plug)   (void *parameter);
+	 void    (*unplug) (void *base);
+	  int    (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
+	  int    (*bandwidth) (unsigned node);
+};
+
+
+/* Posix functions to use disk memory */
+extern struct starpu_disk_ops starpu_disk_stdio_ops;
+extern struct starpu_disk_ops starpu_disk_unistd_ops;
+extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
+
+/*functions to add an existing memory */
+void starpu_disk_close(unsigned node, void *obj, size_t size);
+
+void * starpu_disk_open(unsigned node, void *pos, size_t size);
+
+/* interface to create and to free a memory disk */
+int starpu_disk_register(struct starpu_disk_ops * func, void *parameter, size_t size);
+
+#endif /* __STARPU_DISK_H__ */

+ 2 - 2
include/starpu_util.h

@@ -82,10 +82,10 @@ extern "C"
 #else
 #  if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS)
 #    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) *(int*)NULL = 0; } while(0)
-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; }} while(0)
+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; }} while(0)
 #  else
 #    define STARPU_ASSERT(x)		assert(x)
-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); } ; assert(x); } while(0)
+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n", __starpu_func__, ## __VA_ARGS__); } ; assert(x); } while(0)
 
 #  endif
 #endif

+ 1 - 1
mpi/examples/complex/mpi_complex.c

@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 			imaginary[1] = 0.0;
 		}
 
-		starpu_complex_data_register(&handle, 0, real, imaginary, 2);
+		starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2);
 		starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);
 
 		if (rank == 0)

+ 1 - 1
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -85,7 +85,7 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 			if (mpi_rank == rank)
 			{
 				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)matA[x][y],
+				starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)matA[x][y],
 						ld, size/nblocks, size/nblocks, sizeof(float));
 			}
 #warning TODO: make better test to only register what is needed

+ 7 - 7
mpi/examples/mpi_lu/plu_example.c

@@ -242,7 +242,7 @@ static void init_matrix(int rank)
 				}
 
 				/* Register it to StarPU */
-				starpu_matrix_data_register(handleptr, 0,
+				starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM,
 					(uintptr_t)*blockptr, size/nblocks,
 					size/nblocks, size/nblocks, sizeof(TYPE));
 			}
@@ -261,7 +261,7 @@ static void init_matrix(int rank)
 #ifdef SINGLE_TMP11
 	starpu_malloc((void **)&tmp_11_block, blocksize);
 	allocated_memory_extra += blocksize;
-	starpu_matrix_data_register(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
+	starpu_matrix_data_register(&tmp_11_block_handle, STARPU_MAIN_RAM, (uintptr_t)tmp_11_block,
 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 #else
 	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
@@ -276,7 +276,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_11_block[k]);
 
-			starpu_matrix_data_register(&tmp_11_block_handles[k], 0,
+			starpu_matrix_data_register(&tmp_11_block_handles[k], STARPU_MAIN_RAM,
 				(uintptr_t)tmp_11_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -311,7 +311,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[k]);
 
-			starpu_matrix_data_register(&tmp_12_block_handles[k], 0,
+			starpu_matrix_data_register(&tmp_12_block_handles[k], STARPU_MAIN_RAM,
 				(uintptr_t)tmp_12_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -322,7 +322,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[k]);
 
-			starpu_matrix_data_register(&tmp_21_block_handles[k], 0,
+			starpu_matrix_data_register(&tmp_21_block_handles[k], STARPU_MAIN_RAM,
 				(uintptr_t)tmp_21_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -334,7 +334,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[i][k]);
 
-			starpu_matrix_data_register(&tmp_12_block_handles[i][k], 0,
+			starpu_matrix_data_register(&tmp_12_block_handles[i][k], STARPU_MAIN_RAM,
 				(uintptr_t)tmp_12_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -345,7 +345,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[i][k]);
 
-			starpu_matrix_data_register(&tmp_21_block_handles[i][k], 0,
+			starpu_matrix_data_register(&tmp_21_block_handles[i][k], STARPU_MAIN_RAM,
 				(uintptr_t)tmp_21_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}

+ 1 - 1
mpi/examples/stencil/stencil5.c

@@ -108,7 +108,7 @@ int main(int argc, char **argv)
 			if (mpi_rank == my_rank)
 			{
 				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
-				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
+				starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
 			}
 			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
 				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))

+ 2 - 0
mpi/include/starpu_mpi.h

@@ -40,6 +40,8 @@ int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
 int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
 int starpu_mpi_barrier(MPI_Comm comm);
 
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+
 int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
 int starpu_mpi_initialize(void) STARPU_DEPRECATED;
 int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;

+ 31 - 14
mpi/src/starpu_mpi.c

@@ -32,8 +32,12 @@ static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type
 #endif
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 							int dest, int mpi_tag, MPI_Comm comm,
-							unsigned detached, void (*callback)(void *), void *arg);
-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg);
+							unsigned detached, void (*callback)(void *), void *arg,
+							int sequential_consistency);
+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle,
+							int source, int mpi_tag, MPI_Comm comm,
+							unsigned detached, void (*callback)(void *), void *arg,
+							int sequential_consistency);
 static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req);
 
 /* The list of requests that have been newly submitted by the application */
@@ -73,6 +77,7 @@ struct _starpu_mpi_copy_handle
  /********************************************************/
 
 static struct _starpu_mpi_req *_starpu_mpi_req_hashmap = NULL;
+/** stores data which have been received by MPI but have not been requested by the application */
 static struct _starpu_mpi_copy_handle *_starpu_mpi_copy_handle_hashmap = NULL;
 
 static struct _starpu_mpi_req* find_req(int mpi_tag)
@@ -99,7 +104,7 @@ static void add_req(struct _starpu_mpi_req *req)
 	{
 		_STARPU_MPI_DEBUG(3, "Error add_req : request %p with tag %d already in the hashmap. \n", req, req->mpi_tag);
 		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
-		if (seq_const)
+		if (seq_const &&  req->sequential_consistency)
 		{
 			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
 		}
@@ -213,6 +218,7 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 	req->internal_req = NULL;
 	req->is_internal_req = 0;
 	req->envelope = NULL;
+	req->sequential_consistency = 1;
  }
 
  /********************************************************/
@@ -225,7 +231,8 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 							       int srcdst, int mpi_tag, MPI_Comm comm,
 							       unsigned detached, void (*callback)(void *), void *arg,
 							       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-							       enum starpu_data_access_mode mode)
+							       enum starpu_data_access_mode mode,
+							       int sequential_consistency)
  {
 
 	 _STARPU_MPI_LOG_IN();
@@ -245,11 +252,12 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 	 req->callback = callback;
 	 req->callback_arg = arg;
 	 req->func = func;
+	 req->sequential_consistency = sequential_consistency;
 
 	 /* Asynchronously request StarPU to fetch the data in main memory: when
 	  * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
 	  * the request is actually submitted */
-	 starpu_data_acquire_cb(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req);
+	 starpu_data_acquire_cb_sequential_consistency(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req, sequential_consistency);
 
 	 _STARPU_MPI_LOG_OUT();
 	 return req;
@@ -343,9 +351,10 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 							int dest, int mpi_tag, MPI_Comm comm,
-							unsigned detached, void (*callback)(void *), void *arg)
+							unsigned detached, void (*callback)(void *), void *arg,
+							int sequential_consistency)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R);
+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency);
 }
 
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
@@ -354,7 +363,7 @@ int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 	STARPU_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
 
 	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL);
+	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL, 1);
 
 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common");
 	*public_req = req;
@@ -367,7 +376,7 @@ int starpu_mpi_isend_detached(starpu_data_handle_t data_handle,
 			      int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
-	_starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 1, callback, arg);
+	_starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 1, callback, arg, 1);
 
 	_STARPU_MPI_LOG_OUT();
 	return 0;
@@ -420,9 +429,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 	_STARPU_MPI_LOG_OUT();
 }
 
-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W);
+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency);
 }
 
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
@@ -438,7 +447,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 		starpu_data_set_tag(data_handle, mpi_tag);
 
 	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL);
+	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1);
 
 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
 	*public_req = req;
@@ -458,7 +467,15 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 	if (tag == -1)
 		starpu_data_set_tag(data_handle, mpi_tag);
 
-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg);
+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1);
+	_STARPU_MPI_LOG_OUT();
+	return 0;
+}
+
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+{
+	_STARPU_MPI_LOG_IN();
+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency);
 
 	_STARPU_MPI_LOG_OUT();
 	return 0;
@@ -1230,7 +1247,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					add_chandle(chandle);
 
 					_STARPU_MPI_DEBUG(3, "Posting internal detached irecv on copy_handle with tag %d from src %d ..\n", chandle->mpi_tag, status.MPI_SOURCE);
-					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL);
+					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1);
 					chandle->req->is_internal_req = 1;
 
 					// We wait until the request is pushed in the

+ 2 - 0
mpi/src/starpu_mpi_private.h

@@ -144,6 +144,8 @@ LIST_TYPE(_starpu_mpi_req,
 
 	int is_internal_req;
 	struct _starpu_mpi_req *internal_req;
+
+	int sequential_consistency;
 );
 
 #ifdef __cplusplus

+ 4 - 0
mpi/tests/Makefile.am

@@ -77,6 +77,7 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
 ########################
 
 starpu_mpi_TESTS =				\
+	datatypes				\
 	pingpong				\
 	mpi_test				\
 	mpi_isend				\
@@ -104,6 +105,7 @@ starpu_mpi_TESTS =				\
 	user_defined_datatype
 
 noinst_PROGRAMS =				\
+	datatypes				\
 	pingpong				\
 	mpi_test				\
 	mpi_isend				\
@@ -146,6 +148,8 @@ mpi_detached_tag_LDADD =				\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 mpi_redux_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+datatypes_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 pingpong_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 mpi_test_LDADD =					\

+ 2 - 2
mpi/tests/block_interface.c

@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
 		}
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}
@@ -77,7 +77,7 @@ int main(int argc, char **argv)
 		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
 		assert(block);
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, SIZE, SIZE*SIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}

+ 2 - 2
mpi/tests/block_interface_pinned.c

@@ -69,7 +69,7 @@ int main(int argc, char **argv)
 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
 		}
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
 			SIZE*SIZE*SIZE*sizeof(float));
 		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, SIZE, SIZE*SIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}

+ 310 - 0
mpi/tests/datatypes.c

@@ -0,0 +1,310 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <stdlib.h>
+#include "helper.h"
+
+typedef void (*check_func)(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error);
+
+void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
+{
+	int ret;
+	float *v_s, *v_r;
+
+	STARPU_ASSERT(starpu_variable_get_elemsize(handle_s) == starpu_variable_get_elemsize(handle_r));
+
+	v_s = (float *)starpu_variable_get_local_ptr(handle_s);
+	v_r = (float *)starpu_variable_get_local_ptr(handle_r);
+
+	if (*v_s == *v_r)
+	{
+		FPRINTF_MPI("Success with variable value: %f == %f\n", *v_s, *v_r);
+	}
+	else
+	{
+		*error = 1;
+		FPRINTF_MPI("Error with variable value: %f != %f\n", *v_s, *v_r);
+	}
+}
+
+void check_vector(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
+{
+	int ret, i;
+	int nx;
+	int *v_r, *v_s;
+
+	STARPU_ASSERT(starpu_vector_get_elemsize(handle_s) == starpu_vector_get_elemsize(handle_r));
+	STARPU_ASSERT(starpu_vector_get_nx(handle_s) == starpu_vector_get_nx(handle_r));
+
+	nx = starpu_vector_get_nx(handle_r);
+	v_r = (int *)starpu_vector_get_local_ptr(handle_r);
+	v_s = (int *)starpu_vector_get_local_ptr(handle_s);
+
+	for(i=0 ; i<nx ; i++)
+	{
+		if (v_s[i] == v_r[i])
+		{
+			FPRINTF_MPI("Success with vector[%d] value: %d == %d\n", i, v_s[i], v_r[i]);
+		}
+		else
+		{
+			*error = 1;
+			FPRINTF_MPI("Error with vector[%d] value: %d != %d\n", i, v_s[i], v_r[i]);
+		}
+	}
+}
+
+void check_matrix(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
+{
+	STARPU_ASSERT(starpu_matrix_get_elemsize(handle_s) == starpu_matrix_get_elemsize(handle_r));
+	STARPU_ASSERT(starpu_matrix_get_nx(handle_s) == starpu_matrix_get_nx(handle_r));
+	STARPU_ASSERT(starpu_matrix_get_ny(handle_s) == starpu_matrix_get_ny(handle_r));
+	STARPU_ASSERT(starpu_matrix_get_local_ld(handle_s) == starpu_matrix_get_local_ld(handle_r));
+
+	char *matrix_s = (char *)starpu_matrix_get_local_ptr(handle_s);
+	char *matrix_r = (char *)starpu_matrix_get_local_ptr(handle_r);
+
+	int nx = starpu_matrix_get_nx(handle_s);
+	int ny = starpu_matrix_get_ny(handle_s);
+	int ldy = starpu_matrix_get_local_ld(handle_s);
+
+	int x, y;
+
+	for(y=0 ; y<ny ; y++)
+		for(x=0 ; x<nx ; x++)
+		{
+			int index=(y*ldy)+x;
+			if (matrix_s[index] == matrix_r[index])
+			{
+				FPRINTF_MPI("Success with matrix[%d,%d --> %d] value: %c == %c\n", x, y, index, matrix_s[index], matrix_r[index]);
+			}
+			else
+			{
+				*error = 1;
+				FPRINTF_MPI("Error with matrix[%d,%d --> %d] value: %c != %c\n", x, y, index, matrix_s[index], matrix_r[index]);
+			}
+		}
+}
+
+void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
+{
+	STARPU_ASSERT(starpu_block_get_elemsize(handle_s) == starpu_block_get_elemsize(handle_r));
+	STARPU_ASSERT(starpu_block_get_nx(handle_s) == starpu_block_get_nx(handle_r));
+	STARPU_ASSERT(starpu_block_get_ny(handle_s) == starpu_block_get_ny(handle_r));
+	STARPU_ASSERT(starpu_block_get_nz(handle_s) == starpu_block_get_nz(handle_r));
+	STARPU_ASSERT(starpu_block_get_local_ldy(handle_s) == starpu_block_get_local_ldy(handle_r));
+	STARPU_ASSERT(starpu_block_get_local_ldz(handle_s) == starpu_block_get_local_ldz(handle_r));
+
+	float *block_s = (float *)starpu_block_get_local_ptr(handle_s);
+	float *block_r = (float *)starpu_block_get_local_ptr(handle_r);
+
+	int nx = starpu_block_get_nx(handle_s);
+	int ny = starpu_block_get_ny(handle_s);
+	int nz = starpu_block_get_nz(handle_s);
+
+	int ldy = starpu_block_get_local_ldy(handle_s);
+	int ldz = starpu_block_get_local_ldz(handle_s);
+
+	int x, y, z;
+
+	for(z=0 ; z<nz ; z++)
+		for(y=0 ; y<ny ; y++)
+			for(x=0 ; x<nx ; x++)
+			{
+				int index=(z*ldz)+(y*ldy)+x;
+				if (block_s[index] == block_r[index])
+				{
+					FPRINTF_MPI("Success with block[%d,%d,%d --> %d] value: %f == %f\n", x, y, z, index, block_s[index], block_r[index]);
+				}
+				else
+				{
+					*error = 1;
+					FPRINTF_MPI("Error with block[%d,%d,%d --> %d] value: %f != %f\n", x, y, z, index, block_s[index], block_r[index]);
+				}
+			}
+}
+
+void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int tag_s, starpu_data_handle_t handle_r, int tag_r, int *error, check_func func)
+{
+	int ret;
+	MPI_Status status;
+
+	if (rank == 0)
+	{
+		ret = starpu_mpi_send(handle_s, node, tag_s, MPI_COMM_WORLD);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
+		ret = starpu_mpi_recv(handle_r, node, tag_r, MPI_COMM_WORLD, &status);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
+
+		func(handle_s, handle_r, error);
+	}
+	else
+	{
+		ret = starpu_mpi_recv(handle_s, node, tag_s, MPI_COMM_WORLD, &status);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
+		ret = starpu_mpi_send(handle_s, node, tag_r, MPI_COMM_WORLD);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size;
+	int error=0;
+
+	int nx=3;
+	int ny=2;
+	int nz=4;
+
+	MPI_Init(NULL, NULL);
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+	if (size < 2)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 2 processes.\n");
+
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (rank == 0)
+	{
+		MPI_Status status;
+
+		{
+			float v = 42.12;
+			starpu_data_handle_t variable_handle[2];
+			starpu_variable_data_register(&variable_handle[0], 0, (uintptr_t)&v, sizeof(v));
+			starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
+
+			send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, &error, check_variable);
+
+			starpu_data_unregister(variable_handle[0]);
+			starpu_data_unregister(variable_handle[1]);
+		}
+
+		{
+			int vector[4] = {1, 2, 3, 4};
+			starpu_data_handle_t vector_handle[2];
+
+			starpu_vector_data_register(&vector_handle[0], 0, (uintptr_t)vector, 4, sizeof(vector[0]));
+			starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
+
+			send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, &error, check_vector);
+
+			starpu_data_unregister(vector_handle[0]);
+			starpu_data_unregister(vector_handle[1]);
+		}
+
+		{
+			char *matrix, n='a';
+			int x, y;
+			starpu_data_handle_t matrix_handle[2];
+
+			matrix = (char*)malloc(nx*ny*nz*sizeof(char));
+			assert(matrix);
+			for(y=0 ; y<ny ; y++)
+			{
+				for(x=0 ; x<nx ; x++)
+				{
+					matrix[(y*nx)+x] = n++;
+				}
+			}
+
+			starpu_matrix_data_register(&matrix_handle[0], 0, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
+			starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
+
+			send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, &error, check_matrix);
+
+			starpu_data_unregister(matrix_handle[0]);
+			starpu_data_unregister(matrix_handle[1]);
+		}
+
+		{
+			float *block, n=1.0;
+			int x, y, z;
+			starpu_data_handle_t block_handle[2];
+
+			block = (float*)malloc(nx*ny*nz*sizeof(float));
+			assert(block);
+			for(z=0 ; z<nz ; z++)
+			{
+				for(y=0 ; y<ny ; y++)
+				{
+					for(x=0 ; x<nx ; x++)
+					{
+						block[(z*nx*ny)+(y*nx)+x] = n++;
+					}
+				}
+			}
+
+			starpu_block_data_register(&block_handle[0], 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
+			starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
+
+			send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, &error, check_block);
+
+			starpu_data_unregister(block_handle[0]);
+			starpu_data_unregister(block_handle[1]);
+		}
+	}
+	else if (rank == 1)
+	{
+		MPI_Status status;
+
+		{
+			starpu_data_handle_t variable_handle;
+			starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float));
+			send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL);
+			starpu_data_unregister(variable_handle);
+		}
+
+		{
+			starpu_data_handle_t vector_handle;
+			starpu_vector_data_register(&vector_handle, -1, (uintptr_t)NULL, 4, sizeof(int));
+			send_recv_and_check(rank, 0, vector_handle, 0x43, NULL, 0x2337, NULL, NULL);
+			starpu_data_unregister(vector_handle);
+		}
+
+		{
+			starpu_data_handle_t matrix_handle;
+			starpu_matrix_data_register(&matrix_handle, -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
+			send_recv_and_check(rank, 0, matrix_handle, 0x75, NULL, 0x8555, NULL, NULL);
+			starpu_data_unregister(matrix_handle);
+		}
+
+		{
+			starpu_data_handle_t block_handle;
+			starpu_block_data_register(&block_handle, -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
+			send_recv_and_check(rank, 0, block_handle, 0x73, NULL, 0x8337, NULL, NULL);
+			starpu_data_unregister(block_handle);
+		}
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	MPI_Finalize();
+
+	return rank == 0 ? error : 0;
+}

+ 0 - 0
mpi/tests/helper.h


部分文件因文件數量過多而無法顯示