Browse Source

new merge with the trunk

Andra Hugo 12 years ago
parent
commit
a6a9a0ac35

+ 6 - 0
ChangeLog

@@ -36,6 +36,9 @@ New features:
   * SOCL
   * SOCL
         - Manual mapping of commands on specific devices is now possible
         - Manual mapping of commands on specific devices is now possible
   * New interface: COO matrix.
   * New interface: COO matrix.
+  * Data interfaces: The pack operation of user-defined data interface
+    defines a new parameter count which should be set to the size of
+    the buffer created by the packing of the data.
   * MPI:
   * MPI:
         - Communication statistics for MPI can only be enabled at
         - Communication statistics for MPI can only be enabled at
 	  execution time by defining the environment variable
 	  execution time by defining the environment variable
@@ -50,6 +53,9 @@ New features:
         - Collective detached operations have new parameters, a
         - Collective detached operations have new parameters, a
 	  callback function and a argument. This is to be consistent
 	  callback function and a argument. This is to be consistent
 	  with the detached point-to-point communications.
 	  with the detached point-to-point communications.
+        - When exchanging user-defined data interfaces, the size of
+	  the data is the size returned by the pack operation, i.e
+	  data with dynamic size can now be exchanged with StarPU-MPI.
 
 
 Changes:
 Changes:
   * Fix the block filter functions.
   * Fix the block filter functions.

+ 57 - 59
configure.ac

@@ -1451,88 +1451,86 @@ AC_ARG_ENABLE(blas-lib,
  ])
  ])
 
 
 if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then
 if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then
-AC_ARG_WITH(goto-dir, [AS_HELP_STRING([--with-goto-dir=<dir>], [specify GotoBLAS lib location])],
-	[
-		blas_lib=goto
-		gotodir=$withval
-		AC_SUBST(GOTODIR, $gotodir)
-
-		CPPFLAGS="${CPPFLAGS} -I$gotodir/ "
-		LDFLAGS="${LDFLAGS} -L$gotodir/ "
+   AC_ARG_WITH(goto-dir, [AS_HELP_STRING([--with-goto-dir=<dir>], [specify GotoBLAS lib location])],
+   	[
+	    blas_lib=goto
+	    gotodir=$withval
+	    AC_SUBST(GOTODIR, $gotodir)
+
+	    CPPFLAGS="${CPPFLAGS} -I$gotodir/ "
+	    LDFLAGS="${LDFLAGS} -L$gotodir/ "
 	]
 	]
 	)
 	)
 
 
-if test x$blas_lib = xgoto; then
-STARPU_CHECK_LIB(BLAS, gfortran, main,,)
-STARPU_CHECK_LIB(BLAS, ifcore, main,,)
-# Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a})
-STARPU_CHECK_LIB(BLAS, goto2, sgemm_,, [havegoto2=no], [$STARPU_BLAS_LDFLAGS])
-if test x$havegoto2 = xno; then
-STARPU_CHECK_LIB(BLAS, goto, sgemm_,,AC_MSG_ERROR([cannot find goto lib]), [$STARPU_BLAS_LDFLAGS])
-fi
-AC_DEFINE(STARPU_GOTO, [1], [use STARPU_GOTO library])
-fi
-
+   if test x$blas_lib = xgoto; then
+       STARPU_CHECK_LIB(BLAS, gfortran, main,,)
+       STARPU_CHECK_LIB(BLAS, ifcore, main,,)
+       # Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a})
+       STARPU_CHECK_LIB(BLAS, goto2, sgemm_,, [havegoto2=no], [$STARPU_BLAS_LDFLAGS])
+       if test x$havegoto2 = xno; then
+	   STARPU_CHECK_LIB(BLAS, goto, sgemm_,,AC_MSG_ERROR([cannot find goto lib]), [$STARPU_BLAS_LDFLAGS])
+       fi
+       AC_DEFINE(STARPU_GOTO, [1], [use STARPU_GOTO library])
+   fi
 fi
 fi
 
 
 if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then
 if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then
-AC_ARG_WITH(atlas-dir, [AS_HELP_STRING([--with-atlas-dir=<dir>], [specify ATLAS lib location])],
+    AC_ARG_WITH(atlas-dir, [AS_HELP_STRING([--with-atlas-dir=<dir>], [specify ATLAS lib location])],
 	[
 	[
-		AC_MSG_CHECKING(STARPU_ATLAS location)
-		blas_lib=atlas
-		atlasdir=$withval
-		AC_MSG_RESULT($atlasdir)
-		AC_SUBST(ATLASDIR, $atlasdir)
-
-		CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ "
-		LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ "
+	    AC_MSG_CHECKING(STARPU_ATLAS location)
+	    blas_lib=atlas
+	    atlasdir=$withval
+	    AC_MSG_RESULT($atlasdir)
+	    AC_SUBST(ATLASDIR, $atlasdir)
+
+	    CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ "
+	    LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ "
 	]
 	]
-	)
-
-if test x$blas_lib = xatlas; then
-# test whether STARPU_ATLAS is actually available
-AC_CHECK_HEADER([cblas.h],,AC_MSG_ERROR([cannot find atlas headers]))
-STARPU_CHECK_LIB(BLAS, atlas, ATL_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),)
-STARPU_CHECK_LIB(BLAS, cblas, cblas_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),[-latlas])
-AC_DEFINE(STARPU_ATLAS, [1], [use STARPU_ATLAS library])
-fi
-
+    )
+
+    if test x$blas_lib = xatlas; then
+	# test whether STARPU_ATLAS is actually available
+	AC_CHECK_HEADER([cblas.h],,AC_MSG_ERROR([cannot find atlas headers]))
+	STARPU_CHECK_LIB(BLAS, atlas, ATL_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),)
+	STARPU_CHECK_LIB(BLAS, cblas, cblas_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),[-latlas])
+	AC_DEFINE(STARPU_ATLAS, [1], [use STARPU_ATLAS library])
+    fi
 fi
 fi
 
 
 if test x$blas_lib = xmaybe; then
 if test x$blas_lib = xmaybe; then
-	# Should we use MKL ?
-	AC_ARG_WITH(mkl-cflags, [AS_HELP_STRING([--with-mkl-cflags], [specify MKL compilation flags])],
-		[
-			CPPFLAGS="${CPPFLAGS} $withval"
-			blas_lib=mkl
-		])
+    # Should we use MKL ?
+    AC_ARG_WITH(mkl-cflags, [AS_HELP_STRING([--with-mkl-cflags], [specify MKL compilation flags])],
+	[
+	    CPPFLAGS="${CPPFLAGS} $withval"
+	    blas_lib=mkl
+	    ])
 
 
-	AC_ARG_WITH(mkl-ldflags, [AS_HELP_STRING([--with-mkl-ldflags], [specify MKL linking flags])],
-		[
-			LDFLAGS="${LDFLAGS} $withval"
-			blas_lib=mkl
-		])
-	if test x$blas_lib = xmkl; then
-	        AC_DEFINE(STARPU_MKL, [1], [use MKL library])
-	fi
+    AC_ARG_WITH(mkl-ldflags, [AS_HELP_STRING([--with-mkl-ldflags], [specify MKL linking flags])],
+	[
+	    LDFLAGS="${LDFLAGS} $withval"
+	    blas_lib=mkl
+	    ])
+    if test x$blas_lib = xmkl; then
+	AC_DEFINE(STARPU_MKL, [1], [use MKL library])
+    fi
 fi
 fi
 
 
 if test x$blas_lib = xmaybe; then
 if test x$blas_lib = xmaybe; then
-     #perhaps it is possible to use some BLAS lib from the system
-     use_system_blas=no
-     STARPU_SEARCH_LIBS(BLAS,[sgemm_],[blas],use_system_blas=yes,,)
-     if test x$use_system_blas = xyes; then
+    #perhaps it is possible to use some BLAS lib from the system
+    use_system_blas=no
+    STARPU_SEARCH_LIBS(BLAS,[sgemm_],[blas],use_system_blas=yes,,)
+    if test x$use_system_blas = xyes; then
         AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use refblas library])
         AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use refblas library])
 	blas_lib=system
 	blas_lib=system
-     elif test x"$BLAS_LIBS" != x; then
+    elif test x"$BLAS_LIBS" != x; then
         AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use user defined library])
         AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use user defined library])
         STARPU_BLAS_LDFLAGS="$BLAS_LIBS"
         STARPU_BLAS_LDFLAGS="$BLAS_LIBS"
         AC_SUBST(STARPU_BLAS_LDFLAGS)
         AC_SUBST(STARPU_BLAS_LDFLAGS)
         blas_lib=system
         blas_lib=system
         AC_ARG_VAR([BLAS_LIBS], [linker flags for blas])
         AC_ARG_VAR([BLAS_LIBS], [linker flags for blas])
-     else
+    else
 	blas_lib=none
 	blas_lib=none
-     fi
+    fi
 fi
 fi
 
 
 AM_CONDITIONAL(ATLAS_BLAS_LIB, test x$blas_lib = xatlas)
 AM_CONDITIONAL(ATLAS_BLAS_LIB, test x$blas_lib = xatlas)

+ 4 - 4
doc/chapters/advanced-api.texi

@@ -73,11 +73,11 @@ todo
 @item @code{struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface)}
 @item @code{struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface)}
 todo
 todo
 
 
-@item @code{int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr)}
-Pack the data handle into a contiguous buffer at the address @code{ptr}
+@item @code{int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr, size_t *count)}
+Pack the data handle into a contiguous buffer at the address @code{ptr} and set the size of the newly created buffer in @code{count}
 
 
-@item @code{int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr)}
-Unpack the data handle from the contiguous buffer at the address @code{ptr}
+@item @code{int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr, size_t count)}
+Unpack the data handle from the contiguous buffer at the address @code{ptr} of size @var{count}
 
 
 @end table
 @end table
 @end deftp
 @end deftp

+ 5 - 4
doc/chapters/basic-api.texi

@@ -767,10 +767,11 @@ The function also sets @var{count} to the size of the data handle by calling
 @code{starpu_handle_get_size()}.
 @code{starpu_handle_get_size()}.
 @end deftypefun
 @end deftypefun
 
 
-@deftypefun int starpu_handle_unpack_data (starpu_data_handle_t @var{handle}, {void *}@var{ptr})
-Copy in @var{handle} the data located at @var{ptr} as described by the
-interface of the data. The interface registered at @var{handle} must
-define a unpacking operation (@pxref{struct starpu_data_interface_ops}).
+@deftypefun int starpu_handle_unpack_data (starpu_data_handle_t @var{handle}, {void *}@var{ptr}, size_t @var{count})
+Unpack in @var{handle} the data located at @var{ptr} of size
+@var{count} as described by the interface of the data. The interface
+registered at @var{handle} must define a unpacking operation
+(@pxref{struct starpu_data_interface_ops}).
 @end deftypefun
 @end deftypefun
 
 
 @node Accessing Variable Data Interfaces
 @node Accessing Variable Data Interfaces

+ 5 - 4
doc/chapters/mpi-support.texi

@@ -259,21 +259,22 @@ of data interface} can also be used within StarPU-MPI and exchanged
 between nodes. Two functions needs to be defined through
 between nodes. Two functions needs to be defined through
 the type @code{struct starpu_data_interface_ops} (@pxref{Data
 the type @code{struct starpu_data_interface_ops} (@pxref{Data
 Interface API}). The pack function takes a handle and returns a
 Interface API}). The pack function takes a handle and returns a
-contiguous memory buffer where data to be conveyed to another node
+contiguous memory buffer along with its size where data to be conveyed to another node
 should be copied. The reversed operation is implemented in the unpack
 should be copied. The reversed operation is implemented in the unpack
 function which takes a contiguous memory buffer and recreates the data
 function which takes a contiguous memory buffer and recreates the data
 handle.
 handle.
 
 
 @cartouche
 @cartouche
 @smallexample
 @smallexample
-static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr)
+static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr, size_t *count)
 @{
 @{
   STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
   STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
 
   struct starpu_complex_interface *complex_interface =
   struct starpu_complex_interface *complex_interface =
     (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node);
     (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node);
 
 
-  *ptr = malloc(complex_get_size(handle));
+  *count = complex_get_size(handle);
+  *ptr = malloc(*count);
   memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
   memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
   memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary,
   memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary,
          complex_interface->nx*sizeof(double));
          complex_interface->nx*sizeof(double));
@@ -285,7 +286,7 @@ static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **
 
 
 @cartouche
 @cartouche
 @smallexample
 @smallexample
-static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr)
+static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr, size_t count)
 @{
 @{
   STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
   STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
 

+ 2 - 0
examples/Makefile.am

@@ -749,6 +749,8 @@ interface_complex_SOURCES	=	\
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA
 interface_complex_SOURCES	+=	\
 interface_complex_SOURCES	+=	\
 	interface/complex_kernels.cu
 	interface/complex_kernels.cu
+interface/complex_kernels.o: interface/complex_kernels.cu
+	$(NVCC) $< -c -o $@ $(NVCCFLAGS) -arch sm_13
 endif
 endif
 
 
 if STARPU_USE_OPENCL
 if STARPU_USE_OPENCL

+ 26 - 3
examples/interface/complex.c

@@ -18,6 +18,28 @@
 #include "complex_interface.h"
 #include "complex_interface.h"
 #include "complex_codelet.h"
 #include "complex_codelet.h"
 
 
+static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl)
+{
+       if (starpu_worker_get_type(workerid) == STARPU_OPENCL_WORKER)
+               return 1;
+
+#ifdef STARPU_USE_CUDA
+       /* Cuda device */
+       const struct cudaDeviceProp *props;
+       props = starpu_cuda_get_device_properties(workerid);
+       if (props->major >= 2 || props->minor >= 3)
+       {
+               /* At least compute capability 1.3, supports doubles */
+               return 1;
+       }
+       else
+       {
+               /* Old card does not support doubles */
+               return 0;
+       }
+#endif
+}
+
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 #endif
@@ -34,10 +56,10 @@ struct starpu_codelet cl_copy =
 	.opencl_funcs = {copy_complex_codelet_opencl, NULL},
 	.opencl_funcs = {copy_complex_codelet_opencl, NULL},
 #endif
 #endif
 	.nbuffers = 2,
 	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_W}
+	.modes = {STARPU_R, STARPU_W},
+	.can_execute = can_execute
 };
 };
 
 
-
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 struct starpu_opencl_program opencl_program;
 struct starpu_opencl_program opencl_program;
 #endif
 #endif
@@ -95,7 +117,6 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto enodev;
 	if (ret == -ENODEV) goto enodev;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 
 
-
 	ret = starpu_insert_task(&cl_compare,
 	ret = starpu_insert_task(&cl_compare,
 				 STARPU_R, handle1,
 				 STARPU_R, handle1,
 				 STARPU_R, handle2,
 				 STARPU_R, handle2,
@@ -103,6 +124,8 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto enodev;
 	if (ret == -ENODEV) goto enodev;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 
 
+#warning get the comparison result and return it as the application return code
+
 	starpu_task_wait_for_all();
 	starpu_task_wait_for_all();
 
 
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL

+ 4 - 3
examples/interface/complex_interface.c

@@ -171,21 +171,22 @@ static void *complex_handle_to_pointer(starpu_data_handle_t handle, uint32_t nod
 	return (void*) complex_interface->real;
 	return (void*) complex_interface->real;
 }
 }
 
 
-static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr)
+static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr, size_t *count)
 {
 {
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
 
 	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
 	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
 		starpu_data_get_interface_on_node(handle, node);
 		starpu_data_get_interface_on_node(handle, node);
 
 
-	*ptr = malloc(complex_get_size(handle));
+	*count = complex_get_size(handle);
+	*ptr = malloc(*count);
 	memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
 	memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
 	memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
 	memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr)
+static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr, size_t count)
 {
 {
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
 

+ 3 - 3
include/starpu_data_interfaces.h

@@ -133,9 +133,9 @@ struct starpu_data_interface_ops
 	struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
 	struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
 
 
 	/* Pack the data handle into a contiguous buffer at the address ptr and store the size of the buffer in count */
 	/* Pack the data handle into a contiguous buffer at the address ptr and store the size of the buffer in count */
-	int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr);
+        int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr, size_t *count);
 	/* Unpack the data handle from the contiguous buffer at the address ptr */
 	/* Unpack the data handle from the contiguous buffer at the address ptr */
-	int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr);
+	int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr, size_t count);
 };
 };
 
 
 /* Return the next available id for a data interface */
 /* Return the next available id for a data interface */
@@ -435,7 +435,7 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handle, uint32_t hom
 enum starpu_data_interface_id starpu_handle_get_interface_id(starpu_data_handle_t handle);
 enum starpu_data_interface_id starpu_handle_get_interface_id(starpu_data_handle_t handle);
 
 
 int starpu_handle_pack_data(starpu_data_handle_t handle, void **ptr, size_t *count);
 int starpu_handle_pack_data(starpu_data_handle_t handle, void **ptr, size_t *count);
-int starpu_handle_unpack_data(starpu_data_handle_t handle, void *ptr);
+int starpu_handle_unpack_data(starpu_data_handle_t handle, void *ptr, size_t count);
 size_t starpu_handle_get_size(starpu_data_handle_t handle);
 size_t starpu_handle_get_size(starpu_data_handle_t handle);
 
 
 /* Lookup a ram pointer into a StarPU handle */
 /* Lookup a ram pointer into a StarPU handle */

+ 3 - 6
mpi/examples/Makefile.am

@@ -51,17 +51,14 @@ examplebindir = $(libdir)/starpu/mpi
 examplebin_PROGRAMS =
 examplebin_PROGRAMS =
 
 
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA
-# TODO define NVCCFLAGS
-NVCC ?= nvcc
-
-NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include
+NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
 
 
 .cu.cubin:
 .cu.cubin:
 	$(MKDIR_P) `dirname $@`
 	$(MKDIR_P) `dirname $@`
-	$(NVCC) -cubin $< -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS)
+	$(NVCC) -cubin $< -o $@ $(NVCCFLAGS)
 
 
 .cu.o:
 .cu.o:
-	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/  -I$(top_builddir)/include/
+	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
 endif
 endif
 
 
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)

+ 1 - 1
mpi/examples/cholesky/mpi_cholesky_codelets.c

@@ -90,7 +90,7 @@ void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, in
 				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)matA[x][y],
 				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)matA[x][y],
 						ld, size/nblocks, size/nblocks, sizeof(float));
 						ld, size/nblocks, size/nblocks, sizeof(float));
 			}
 			}
-			/* TODO: make better test to only registering what is needed */
+#warning TODO: make better test to only register what is needed
 			else
 			else
 			{
 			{
 				/* I don't own that index, but will need it for my computations */
 				/* I don't own that index, but will need it for my computations */

+ 71 - 30
mpi/src/starpu_mpi.c

@@ -24,8 +24,9 @@
 #include <starpu_mpi_stats.h>
 #include <starpu_mpi_stats.h>
 #include <starpu_mpi_insert_task.h>
 #include <starpu_mpi_insert_task.h>
 
 
-/* TODO find a better way to select the polling method (perhaps during the
- * configuration) */
+#ifdef STARPU_DEVEL
+#  warning TODO find a better way to select the polling method (perhaps during the configuration)
+#endif
 //#define USE_STARPU_ACTIVITY	1
 //#define USE_STARPU_ACTIVITY	1
 
 
 static void _starpu_mpi_submit_new_mpi_request(void *arg);
 static void _starpu_mpi_submit_new_mpi_request(void *arg);
@@ -33,6 +34,10 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
 #ifdef STARPU_MPI_VERBOSE
 #ifdef STARPU_MPI_VERBOSE
 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
 #endif
 #endif
+static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
+							int dest, int mpi_tag, MPI_Comm comm,
+							unsigned detached, void (*callback)(void *), void *arg);
+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg);
 
 
 /* The list of requests that have been newly submitted by the application */
 /* The list of requests that have been newly submitted by the application */
 static struct _starpu_mpi_req_list *new_requests;
 static struct _starpu_mpi_req_list *new_requests;
@@ -109,23 +114,13 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 /*                                                      */
 /*                                                      */
 /********************************************************/
 /********************************************************/
 
 
-static void _starpu_mpi_isend_func(struct _starpu_mpi_req *req)
+static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 {
         _STARPU_MPI_LOG_IN();
         _STARPU_MPI_LOG_IN();
 
 
-	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
-	if (req->needs_unpacking)
-	{
-		starpu_handle_pack_data(req->data_handle, &req->ptr, &req->count);
-	}
-	else
-	{
-		req->count = 1;
-		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
-	}
 	STARPU_ASSERT(req->ptr);
 	STARPU_ASSERT(req->ptr);
 
 
-        _STARPU_MPI_DEBUG("post MPI isend tag %d dst %d ptr %p datatype %p count %d req %p\n", req->mpi_tag, req->srcdst, req->ptr, req->datatype, req->count, &req->request);
+        _STARPU_MPI_DEBUG("post MPI isend tag %d dst %d ptr %p datatype %p count %d req %p\n", req->mpi_tag, req->srcdst, req->ptr, req->datatype, (int)req->count, &req->request);
 
 
 	_starpu_mpi_comm_amounts_inc(req->comm, req->srcdst, req->datatype, req->count);
 	_starpu_mpi_comm_amounts_inc(req->comm, req->srcdst, req->datatype, req->count);
 
 
@@ -142,11 +137,37 @@ static void _starpu_mpi_isend_func(struct _starpu_mpi_req *req)
         _STARPU_MPI_LOG_OUT();
         _STARPU_MPI_LOG_OUT();
 }
 }
 
 
+static void _starpu_mpi_isend_size_callback(void *arg)
+{
+	struct _starpu_mpi_req *req = (struct _starpu_mpi_req *) arg;
+	_starpu_mpi_isend_data_func(req);
+}
+
+static void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
+{
+	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
+	if (!req->needs_unpacking)
+	{
+		req->count = 1;
+		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
+		_starpu_mpi_isend_data_func(req);
+	}
+	else
+	{
+		starpu_data_handle_t count_handle;
+
+		starpu_handle_pack_data(req->data_handle, &req->ptr, &req->count);
+		starpu_variable_data_register(&count_handle, 0, (uintptr_t)&req->count, sizeof(req->count));
+		_starpu_mpi_isend_common(count_handle, req->srcdst, req->mpi_tag, req->comm, 1, _starpu_mpi_isend_size_callback, req);
+		starpu_data_unregister_submit(count_handle);
+	}
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 							int dest, int mpi_tag, MPI_Comm comm,
 							int dest, int mpi_tag, MPI_Comm comm,
 							unsigned detached, void (*callback)(void *), void *arg)
 							unsigned detached, void (*callback)(void *), void *arg)
 {
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_func, STARPU_R);
+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R);
 }
 }
 
 
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
@@ -195,24 +216,13 @@ int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI
 /*                                                      */
 /*                                                      */
 /********************************************************/
 /********************************************************/
 
 
-static void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req)
+static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 {
 {
         _STARPU_MPI_LOG_IN();
         _STARPU_MPI_LOG_IN();
 
 
-	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
-	if (req->needs_unpacking == 1)
-	{
-		req->count = starpu_handle_get_size(req->data_handle);
-		req->ptr = malloc(req->count);
-	}
-	else
-	{
-		req->count = 1;
-		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
-	}
 	STARPU_ASSERT(req->ptr);
 	STARPU_ASSERT(req->ptr);
 
 
-	_STARPU_MPI_DEBUG("post MPI irecv tag %d src %d data %p ptr %p req %p datatype %p\n", req->mpi_tag, req->srcdst, req->data_handle, req->ptr, &req->request, req->datatype);
+	_STARPU_MPI_DEBUG("post MPI irecv tag %d src %d data %p ptr %p datatype %p count %d req %p \n", req->mpi_tag, req->srcdst, req->data_handle, req->ptr, req->datatype, (int)req->count, &req->request);
 
 
         req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
         req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
         STARPU_ASSERT(req->ret == MPI_SUCCESS);
         STARPU_ASSERT(req->ret == MPI_SUCCESS);
@@ -225,9 +235,40 @@ static void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req)
         _STARPU_MPI_LOG_OUT();
         _STARPU_MPI_LOG_OUT();
 }
 }
 
 
+static void _starpu_mpi_irecv_size_callback(void *arg)
+{
+	struct _starpu_mpi_req *req = (struct _starpu_mpi_req *) arg;
+#ifdef STARPU_DEVEL
+#  warning TODO: are we sure that req->count can be used as we have not released count_handle?
+#endif
+	req->ptr = malloc(req->count);
+	_starpu_mpi_irecv_data_func(req);
+}
+
+static void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
+{
+        _STARPU_MPI_LOG_IN();
+
+	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
+	if (!req->needs_unpacking)
+	{
+		req->count = 1;
+		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
+		_starpu_mpi_irecv_data_func(req);
+	}
+	else
+	{
+		starpu_data_handle_t count_handle;
+
+		starpu_variable_data_register(&count_handle, 0, (uintptr_t)&req->count, sizeof(req->count));
+		_starpu_mpi_irecv_common(count_handle, req->srcdst, req->mpi_tag, req->comm, 1, _starpu_mpi_irecv_size_callback, req);
+		starpu_data_unregister_submit(count_handle);
+	}
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
 static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
 {
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_func, STARPU_W);
+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W);
 }
 }
 
 
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
@@ -517,7 +558,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
         if (req->request_type != BARRIER_REQ)
         if (req->request_type != BARRIER_REQ)
 	{
 	{
 		if (req->needs_unpacking)
 		if (req->needs_unpacking)
-			starpu_handle_unpack_data(req->data_handle, req->ptr);
+			starpu_handle_unpack_data(req->data_handle, req->ptr, req->count);
 		else
 		else
 			MPI_Type_free(&req->datatype);
 			MPI_Type_free(&req->datatype);
                 starpu_data_release(req->data_handle);
                 starpu_data_release(req->data_handle);

+ 2 - 2
mpi/src/starpu_mpi_collective.c

@@ -47,7 +47,7 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 	MPI_Comm_rank(comm, &rank);
 	MPI_Comm_rank(comm, &rank);
 
 
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
-#warning callback_arg needs to be free-ed
+#warning TODO: callback_arg needs to be free-ed
 #endif
 #endif
 	callback_func = _callback_collective;
 	callback_func = _callback_collective;
 	callback_arg = malloc(sizeof(struct _callback_arg));
 	callback_arg = malloc(sizeof(struct _callback_arg));
@@ -115,7 +115,7 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 	MPI_Comm_rank(comm, &rank);
 	MPI_Comm_rank(comm, &rank);
 
 
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
-#warning callback_arg needs to be free-ed
+#warning TODO: callback_arg needs to be free-ed
 #endif
 #endif
 	callback_func = _callback_collective;
 	callback_func = _callback_collective;
 	callback_arg = malloc(sizeof(struct _callback_arg));
 	callback_arg = malloc(sizeof(struct _callback_arg));

+ 3 - 2
mpi/src/starpu_mpi_insert_task.c

@@ -293,8 +293,9 @@ void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum star
 				HASH_FIND_PTR(received_data[mpi_rank], &data, already_received);
 				HASH_FIND_PTR(received_data[mpi_rank], &data, already_received);
 				if (already_received)
 				if (already_received)
 				{
 				{
-					/* Somebody else will write to the data, so discard our cached copy if any */
-					/* TODO: starpu_mpi could just remember itself. */
+#ifdef STARPU_DEVEL
+#  warning TODO: Somebody else will write to the data, so discard our cached copy if any. starpu_mpi could just remember itself.
+#endif
 					_STARPU_MPI_DEBUG("Clearing receive cache for data %p\n", data);
 					_STARPU_MPI_DEBUG("Clearing receive cache for data %p\n", data);
 					HASH_DEL(received_data[mpi_rank], already_received);
 					HASH_DEL(received_data[mpi_rank], already_received);
 					starpu_data_invalidate_submit(data);
 					starpu_data_invalidate_submit(data);

+ 3 - 6
mpi/tests/Makefile.am

@@ -37,17 +37,14 @@ examplebindir = $(libdir)/starpu/examples/mpi
 examplebin_PROGRAMS =
 examplebin_PROGRAMS =
 
 
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA
-# TODO define NVCCFLAGS
-NVCC ?= nvcc
-
-NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include
+NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
 
 
 .cu.cubin:
 .cu.cubin:
 	$(MKDIR_P) `dirname $@`
 	$(MKDIR_P) `dirname $@`
-	$(NVCC) -cubin $< -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS)
+	$(NVCC) -cubin $< -o $@ $(NVCCFLAGS)
 
 
 .cu.o:
 .cu.o:
-	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/  -I$(top_builddir)/include/
+	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
 endif
 endif
 
 
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)

+ 1 - 1
src/core/workers.h

@@ -79,7 +79,7 @@ struct _starpu_worker
 	unsigned worker_is_running;
 	unsigned worker_is_running;
 	unsigned worker_is_initialized;
 	unsigned worker_is_initialized;
 	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
 	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
-	char name[48];
+	char name[64];
 	char short_name[10];
 	char short_name[10];
 	unsigned run_by_starpu; /* Is this run by StarPU or directly by the application ? */
 	unsigned run_by_starpu; /* Is this run by StarPU or directly by the application ? */
 
 

+ 1 - 1
src/datawizard/interfaces/coo_interface.c

@@ -374,7 +374,7 @@ allocate_coo_buffer_on_node(void *data_interface, uint32_t dst_node)
 		if (STARPU_UNLIKELY(addr_rows == NULL))
 		if (STARPU_UNLIKELY(addr_rows == NULL))
 			goto fail_rows;
 			goto fail_rows;
 		addr_values = (uintptr_t) malloc(n_values * elemsize);
 		addr_values = (uintptr_t) malloc(n_values * elemsize);
-		if (STARPU_UNLIKELY(addr_values == NULL))
+		if (STARPU_UNLIKELY(addr_values == (uintptr_t) NULL))
 			goto fail_values;
 			goto fail_values;
 		break;
 		break;
 	}
 	}

+ 3 - 4
src/datawizard/interfaces/data_interface.c

@@ -684,14 +684,13 @@ int starpu_data_interface_get_next_id(void)
 int starpu_handle_pack_data(starpu_data_handle_t handle, void **ptr, size_t *count)
 int starpu_handle_pack_data(starpu_data_handle_t handle, void **ptr, size_t *count)
 {
 {
 	STARPU_ASSERT(handle->ops->pack_data);
 	STARPU_ASSERT(handle->ops->pack_data);
-	*count = starpu_handle_get_size(handle);
-	return handle->ops->pack_data(handle, _starpu_get_local_memory_node(), ptr);
+	return handle->ops->pack_data(handle, _starpu_get_local_memory_node(), ptr, count);
 }
 }
 
 
-int starpu_handle_unpack_data(starpu_data_handle_t handle, void *ptr)
+int starpu_handle_unpack_data(starpu_data_handle_t handle, void *ptr, size_t count)
 {
 {
 	STARPU_ASSERT(handle->ops->unpack_data);
 	STARPU_ASSERT(handle->ops->unpack_data);
-	return handle->ops->unpack_data(handle, _starpu_get_local_memory_node(), ptr);
+	return handle->ops->unpack_data(handle, _starpu_get_local_memory_node(), ptr, count);
 }
 }
 
 
 size_t starpu_handle_get_size(starpu_data_handle_t handle)
 size_t starpu_handle_get_size(starpu_data_handle_t handle)