瀏覽代碼

remove nmad as its code has been merged into mpi

Nathalie Furmento 7 年之前
父節點
當前提交
834ba9c05a
共有 100 個文件被更改,包括 4 次插入16146 次删除
  1. 1 1
      Makefile.am
  2. 0 9
      configure.ac
  3. 2 2
      mpi/src/nmad/starpu_mpi_nmad.c
  4. 1 1
      mpi/src/starpu_mpi_private.h
  5. 0 1
      nmad/.gitignore
  6. 0 41
      nmad/Makefile.am
  7. 0 396
      nmad/examples/Makefile.am
  8. 0 150
      nmad/examples/comm/comm.c
  9. 0 185
      nmad/examples/comm/mix_comm.c
  10. 0 126
      nmad/examples/complex/mpi_complex.c
  11. 0 27
      nmad/examples/helper.h
  12. 0 86
      nmad/examples/matrix_decomposition/mpi_cholesky.c
  13. 0 31
      nmad/examples/matrix_decomposition/mpi_cholesky.h
  14. 0 267
      nmad/examples/matrix_decomposition/mpi_cholesky_codelets.c
  15. 0 30
      nmad/examples/matrix_decomposition/mpi_cholesky_codelets.h
  16. 0 60
      nmad/examples/matrix_decomposition/mpi_cholesky_distributed.c
  17. 0 246
      nmad/examples/matrix_decomposition/mpi_cholesky_kernels.c
  18. 0 33
      nmad/examples/matrix_decomposition/mpi_cholesky_kernels.h
  19. 0 40
      nmad/examples/matrix_decomposition/mpi_cholesky_models.c
  20. 0 25
      nmad/examples/matrix_decomposition/mpi_cholesky_models.h
  21. 0 111
      nmad/examples/matrix_decomposition/mpi_decomposition_matrix.c
  22. 0 30
      nmad/examples/matrix_decomposition/mpi_decomposition_matrix.h
  23. 0 113
      nmad/examples/matrix_decomposition/mpi_decomposition_params.c
  24. 0 34
      nmad/examples/matrix_decomposition/mpi_decomposition_params.h
  25. 0 30
      nmad/examples/matrix_mult/Makefile
  26. 0 25
      nmad/examples/matrix_mult/environment
  27. 0 390
      nmad/examples/matrix_mult/mm.c
  28. 0 42
      nmad/examples/mpi_lu/mpi_lu-double.h
  29. 0 42
      nmad/examples/mpi_lu/mpi_lu-float.h
  30. 0 19
      nmad/examples/mpi_lu/pdlu.c
  31. 0 19
      nmad/examples/mpi_lu/pdlu_implicit.c
  32. 0 19
      nmad/examples/mpi_lu/pdlu_kernels.c
  33. 0 597
      nmad/examples/mpi_lu/plu_example.c
  34. 0 19
      nmad/examples/mpi_lu/plu_example_double.c
  35. 0 19
      nmad/examples/mpi_lu/plu_example_float.c
  36. 0 369
      nmad/examples/mpi_lu/plu_implicit_example.c
  37. 0 19
      nmad/examples/mpi_lu/plu_implicit_example_double.c
  38. 0 19
      nmad/examples/mpi_lu/plu_implicit_example_float.c
  39. 0 402
      nmad/examples/mpi_lu/plu_outofcore_example.c
  40. 0 19
      nmad/examples/mpi_lu/plu_outofcore_example_double.c
  41. 0 19
      nmad/examples/mpi_lu/plu_outofcore_example_float.c
  42. 0 397
      nmad/examples/mpi_lu/plu_solve.c
  43. 0 19
      nmad/examples/mpi_lu/plu_solve_double.c
  44. 0 19
      nmad/examples/mpi_lu/plu_solve_float.c
  45. 0 19
      nmad/examples/mpi_lu/pslu.c
  46. 0 19
      nmad/examples/mpi_lu/pslu_implicit.c
  47. 0 19
      nmad/examples/mpi_lu/pslu_kernels.c
  48. 0 919
      nmad/examples/mpi_lu/pxlu.c
  49. 0 69
      nmad/examples/mpi_lu/pxlu.h
  50. 0 184
      nmad/examples/mpi_lu/pxlu_implicit.c
  51. 0 454
      nmad/examples/mpi_lu/pxlu_kernels.c
  52. 0 32
      nmad/examples/mpi_lu/pxlu_kernels.h
  53. 0 19
      nmad/examples/mpi_lu/slu_kernels.c
  54. 0 108
      nmad/examples/native_fortran/nf_basic_ring.f90
  55. 0 236
      nmad/examples/native_fortran/nf_mm.f90
  56. 0 90
      nmad/examples/native_fortran/nf_mm_cl.f90
  57. 0 106
      nmad/examples/perf.sh
  58. 0 287
      nmad/examples/stencil/stencil5.c
  59. 0 290
      nmad/examples/stencil/stencil5_lb.c
  60. 0 228
      nmad/examples/user_datatype/my_interface.c
  61. 0 62
      nmad/examples/user_datatype/my_interface.h
  62. 0 118
      nmad/examples/user_datatype/user_datatype.c
  63. 0 752
      nmad/include/fstarpu_mpi_mod.f90
  64. 0 145
      nmad/include/starpu_mpi.h
  65. 0 44
      nmad/include/starpu_mpi_lb.h
  66. 0 29
      nmad/libstarpumpi.pc.in
  67. 0 105
      nmad/src/Makefile.am
  68. 0 161
      nmad/src/load_balancer/load_balancer.c
  69. 0 286
      nmad/src/load_balancer/policy/data_movements_interface.c
  70. 0 48
      nmad/src/load_balancer/policy/data_movements_interface.h
  71. 0 53
      nmad/src/load_balancer/policy/load_balancer_policy.h
  72. 0 274
      nmad/src/load_balancer/policy/load_data_interface.c
  73. 0 70
      nmad/src/load_balancer/policy/load_data_interface.h
  74. 0 643
      nmad/src/load_balancer/policy/load_heat_propagation.c
  75. 0 224
      nmad/src/mpi/starpu_mpi_comm.c
  76. 0 43
      nmad/src/mpi/starpu_mpi_comm.h
  77. 0 124
      nmad/src/mpi/starpu_mpi_early_data.c
  78. 0 59
      nmad/src/mpi/starpu_mpi_early_data.h
  79. 0 121
      nmad/src/mpi/starpu_mpi_early_request.c
  80. 0 47
      nmad/src/mpi/starpu_mpi_early_request.h
  81. 0 1640
      nmad/src/mpi/starpu_mpi_mpi.c
  82. 0 153
      nmad/src/mpi/starpu_mpi_sync_data.c
  83. 0 46
      nmad/src/mpi/starpu_mpi_sync_data.h
  84. 0 122
      nmad/src/mpi/starpu_mpi_tag.c
  85. 0 43
      nmad/src/mpi/starpu_mpi_tag.h
  86. 0 767
      nmad/src/nmad/starpu_mpi_nmad.c
  87. 0 410
      nmad/src/starpu_mpi.c
  88. 0 398
      nmad/src/starpu_mpi_cache.c
  89. 0 59
      nmad/src/starpu_mpi_cache.h
  90. 0 62
      nmad/src/starpu_mpi_cache_stats.c
  91. 0 41
      nmad/src/starpu_mpi_cache_stats.h
  92. 0 156
      nmad/src/starpu_mpi_collective.c
  93. 0 323
      nmad/src/starpu_mpi_datatype.c
  94. 0 39
      nmad/src/starpu_mpi_datatype.h
  95. 0 303
      nmad/src/starpu_mpi_fortran.c
  96. 0 157
      nmad/src/starpu_mpi_fxt.h
  97. 0 120
      nmad/src/starpu_mpi_helper.c
  98. 0 237
      nmad/src/starpu_mpi_init.c
  99. 0 35
      nmad/src/starpu_mpi_init.h
  100. 0 0
      nmad/src/starpu_mpi_private.c

+ 1 - 1
Makefile.am

@@ -40,7 +40,7 @@ SUBDIRS += mpi
 endif
 
 if USE_NMAD
-SUBDIRS += nmad
+SUBDIRS += mpi
 endif
 
 if USE_DSM

+ 0 - 9
configure.ac

@@ -3416,11 +3416,6 @@ AC_OUTPUT([
 	mpi/starpumpi-1.1.pc
 	mpi/starpumpi-1.2.pc
 	mpi/starpumpi-1.3.pc
-  nmad/libstarpumpi.pc
-  nmad/starpumpi-1.0.pc
-  nmad/starpumpi-1.1.pc
-  nmad/starpumpi-1.2.pc
-  nmad/starpumpi-1.3.pc
 	starpufft/Makefile
 	starpufft/src/Makefile
 	starpufft/tests/Makefile
@@ -3440,10 +3435,6 @@ AC_OUTPUT([
 	mpi/src/Makefile
 	mpi/tests/Makefile
 	mpi/examples/Makefile
-  nmad/Makefile
-  nmad/src/Makefile
-  nmad/tests/Makefile
-  nmad/examples/Makefile
 	starpu-top/StarPU-Top.pro
 	starpu-top/StarPU-Top-qwt-embed.pri
 	starpu-top/StarPU-Top-qwt-system.pri

+ 2 - 2
mpi/src/nmad/starpu_mpi_nmad.c

@@ -206,7 +206,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(30, "post MPI isend request %p type %s tag %d src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
+	_STARPU_MPI_DEBUG(30, "post NM isend request %p type %s tag %d src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
 
 	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
 
@@ -297,7 +297,7 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(20, "post MPI irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+	_STARPU_MPI_DEBUG(20, "post NM irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
 	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
 

+ 1 - 1
mpi/src/starpu_mpi_private.h

@@ -143,7 +143,7 @@ int _starpu_debug_rank;
                                              fprintf(stderr, "[%d][starpu_mpi][%s:%d] " fmt , _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \
                                              fflush(stderr); } while(0);
 
-#ifdef STARPU_VERBOSE
+#ifdef xSTARPU_VERBOSE
 #  define _STARPU_MPI_LOG_IN()             do { if (!_starpu_silent) { \
                                                if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank);                        \
                                                fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] -->\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__); \

+ 0 - 1
nmad/.gitignore

@@ -1 +0,0 @@
-/.deps

+ 0 - 41
nmad/Makefile.am

@@ -1,41 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2013, 2015  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
-# Copyright (C) 2016  Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-SUBDIRS=src tests examples
-
-pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc starpumpi-1.1.pc starpumpi-1.2.pc starpumpi-1.3.pc
-
-versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
-versinclude_HEADERS = 					\
-	include/starpu_mpi.h				\
-	include/starpu_mpi_lb.h				\
-	include/fstarpu_mpi_mod.f90
-
-recheck:
-	RET=0 ; \
-	for i in $(SUBDIRS) ; do \
-		make -C $$i recheck || RET=1 ; \
-	done ; \
-	exit $$RET
-
-showcheck:
-	RET=0 ; \
-	for i in $(SUBDIRS) ; do \
-		make -C $$i showcheck || RET=1 ; \
-	done ; \
-	exit $$RET

+ 0 - 396
nmad/examples/Makefile.am

@@ -1,396 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2013, 2015-2017  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
-# Copyright (C) 2016  Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-include $(top_srcdir)/starpu.mk
-
-if STARPU_SIMGRID
-STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
-STARPU_HOSTNAME=mirage
-MALLOC_PERTURB_=0
-export STARPU_PERF_MODEL_DIR
-export STARPU_HOSTNAME
-export MALLOC_PERTURB_
-endif
-
-CC=$(MPICC)
-CCLD=$(MPICC)
-FC=$(MPIFORT)
-FCLD=$(MPIFORT)
-
-if STARPU_HAVE_WINDOWS
-LOADER_BIN		=
-else
-loader_CPPFLAGS 	= 	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
-if !STARPU_SIMGRID
-LOADER			=	loader
-LOADER_BIN		=	$(abs_top_builddir)/nmad/examples/$(LOADER)
-endif
-loader_SOURCES		=	../../tests/loader.c
-endif
-
-if STARPU_SIMGRID
-MPI			=	$(abs_top_builddir)/tools/starpu_smpirun -np 4 -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile
-else
-# we always test on 4 processes, the execution time is not that bigger
-if STARPU_QUICK_CHECK
-MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
-else
-MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
-endif
-endif
-
-if STARPU_HAVE_AM111
-TESTS_ENVIRONMENT	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
-LOG_COMPILER	 	=	$(MPI) $(LOADER_BIN)
-else
-TESTS_ENVIRONMENT 	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
-endif
-
-if STARPU_MPI_CHECK
-TESTS			=	$(starpu_mpi_EXAMPLES)
-endif
-
-check_PROGRAMS = $(LOADER) $(starpu_mpi_EXAMPLES)
-starpu_mpi_EXAMPLES =
-
-BUILT_SOURCES =
-
-CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log *.mod
-
-EXTRA_DIST = 				\
-	mpi_lu/mpi_lu-float.h		\
-	mpi_lu/mpi_lu-double.h		\
-	mpi_lu/plu_example.c		\
-	mpi_lu/plu_implicit_example.c	\
-	mpi_lu/plu_outofcore_example.c	\
-	mpi_lu/plu_solve.c		\
-	mpi_lu/pxlu.h			\
-	mpi_lu/pxlu.c			\
-	mpi_lu/pxlu_implicit.c		\
-	mpi_lu/pxlu_kernels.h		\
-	mpi_lu/pxlu_kernels.c		\
-	matrix_decomposition/mpi_cholesky.h 		\
-	matrix_decomposition/mpi_cholesky_codelets.h 	\
-	matrix_decomposition/mpi_cholesky_kernels.h	\
-	matrix_decomposition/mpi_cholesky_models.h 	\
-	matrix_decomposition/mpi_decomposition_params.h	\
-	matrix_decomposition/mpi_decomposition_matrix.h	\
-	user_datatype/my_interface.h			\
-	helper.h
-
-examplebindir = $(libdir)/starpu/mpi
-
-examplebin_PROGRAMS =
-
-if STARPU_USE_CUDA
-if STARPU_COVERITY
-include $(top_srcdir)/starpu-mynvcc.mk
-else
-NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
-
-.cu.cubin:
-	$(MKDIR_P) `dirname $@`
-	$(NVCC) -cubin $< -o $@ $(NVCCFLAGS)
-
-.cu.o:
-	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
-endif
-endif
-
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la -lm @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
-
-###################
-# Stencil example #
-###################
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=		\
-	stencil/stencil5
-starpu_mpi_EXAMPLES	+=	\
-	stencil/stencil5
-
-if STARPU_USE_MPI_MPI
-examplebin_PROGRAMS +=		\
-	stencil/stencil5_lb
-starpu_mpi_EXAMPLES	+=	\
-	stencil/stencil5_lb
-endif
-
-endif
-
-##################
-# MPI LU example #
-##################
-
-if BUILD_EXAMPLES
-if !NO_BLAS_LIB
-
-examplebin_PROGRAMS += 			\
-	mpi_lu/plu_example_float	\
-	mpi_lu/plu_example_double	\
-	mpi_lu/plu_implicit_example_float	\
-	mpi_lu/plu_implicit_example_double	\
-	mpi_lu/plu_outofcore_example_float	\
-	mpi_lu/plu_outofcore_example_double
-
-mpi_lu_plu_example_float_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_example_float_SOURCES =	\
-	mpi_lu/plu_example_float.c	\
-	mpi_lu/plu_solve_float.c	\
-	mpi_lu/pslu_kernels.c		\
-	mpi_lu/pslu.c			\
-	../../examples/common/blas.c
-
-mpi_lu_plu_example_double_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_example_double_SOURCES =	\
-	mpi_lu/plu_example_double.c	\
-	mpi_lu/plu_solve_double.c  	\
-	mpi_lu/pdlu_kernels.c	    	\
-	mpi_lu/pdlu.c		    	\
-	../../examples/common/blas.c
-
-mpi_lu_plu_implicit_example_float_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_implicit_example_float_SOURCES =	\
-	mpi_lu/plu_implicit_example_float.c	\
-	mpi_lu/plu_solve_float.c		\
-	mpi_lu/pslu_kernels.c			\
-	mpi_lu/pslu_implicit.c			\
-	../../examples/common/blas.c
-
-mpi_lu_plu_implicit_example_double_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_implicit_example_double_SOURCES =	\
-	mpi_lu/plu_implicit_example_double.c	\
-	mpi_lu/plu_solve_double.c		\
-	mpi_lu/pdlu_kernels.c			\
-	mpi_lu/pdlu_implicit.c			\
-	../../examples/common/blas.c
-
-mpi_lu_plu_outofcore_example_float_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_outofcore_example_float_SOURCES =	\
-	mpi_lu/plu_outofcore_example_float.c	\
-	mpi_lu/plu_solve_float.c		\
-	mpi_lu/pslu_kernels.c			\
-	mpi_lu/pslu_implicit.c			\
-	../../examples/common/blas.c
-
-mpi_lu_plu_outofcore_example_double_LDADD =	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-mpi_lu_plu_outofcore_example_double_SOURCES =	\
-	mpi_lu/plu_outofcore_example_double.c	\
-	mpi_lu/plu_solve_double.c		\
-	mpi_lu/pdlu_kernels.c			\
-	mpi_lu/pdlu_implicit.c			\
-	../../examples/common/blas.c
-endif
-endif
-
-########################
-# MPI Cholesky example #
-########################
-
-if BUILD_EXAMPLES
-if !NO_BLAS_LIB
-examplebin_PROGRAMS +=		\
-	matrix_decomposition/mpi_cholesky			\
-	matrix_decomposition/mpi_cholesky_distributed
-
-matrix_decomposition_mpi_cholesky_SOURCES	=		\
-	matrix_decomposition/mpi_cholesky.c		\
-	matrix_decomposition/mpi_cholesky_models.c		\
-	matrix_decomposition/mpi_cholesky_kernels.c	\
-	matrix_decomposition/mpi_cholesky_codelets.c	\
-	matrix_decomposition/mpi_decomposition_params.c	\
-	matrix_decomposition/mpi_decomposition_matrix.c	\
-	../../examples/common/blas.c
-
-matrix_decomposition_mpi_cholesky_LDADD =			\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-matrix_decomposition_mpi_cholesky_distributed_SOURCES =	\
-	matrix_decomposition/mpi_cholesky_distributed.c	\
-	matrix_decomposition/mpi_cholesky_models.c		\
-	matrix_decomposition/mpi_cholesky_kernels.c	\
-	matrix_decomposition/mpi_cholesky_codelets.c	\
-	matrix_decomposition/mpi_decomposition_params.c	\
-	matrix_decomposition/mpi_decomposition_matrix.c	\
-	../../examples/common/blas.c
-
-matrix_decomposition_mpi_cholesky_distributed_LDADD =	\
-	$(STARPU_BLAS_LDFLAGS) -lm
-
-if !STARPU_SIMGRID
-starpu_mpi_EXAMPLES +=				\
-	matrix_decomposition/mpi_cholesky			\
-	matrix_decomposition/mpi_cholesky_distributed
-endif
-endif
-endif
-
-########################
-# MPI Matrix mult example #
-########################
-
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=		\
-	matrix_mult/mm
-
-matrix_mult_mm_SOURCES	=		\
-	matrix_mult/mm.c
-
-matrix_mult_mm_LDADD =			\
-	-lm
-
-if !STARPU_SIMGRID
-starpu_mpi_EXAMPLES +=				\
-	matrix_mult/mm
-endif
-endif
-
-##########################################
-# Native Fortran MPI Matrix mult example #
-##########################################
-
-if STARPU_HAVE_MPIFORT
-if BUILD_EXAMPLES
-if !STARPU_SANITIZE
-examplebin_PROGRAMS +=		\
-	native_fortran/nf_mm	\
-	native_fortran/nf_basic_ring
-
-native_fortran_nf_mm_SOURCES	=			\
-	native_fortran/nf_mm_cl.f90			\
-	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
-	$(top_srcdir)/include/fstarpu_mod.f90		\
-	native_fortran/nf_mm.f90
-
-native_fortran_nf_mm_LDADD =					\
-	-lm
-
-native_fortran_nf_basic_ring_SOURCES	=			\
-	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
-	$(top_srcdir)/include/fstarpu_mod.f90		\
-	native_fortran/nf_basic_ring.f90
-
-native_fortran_nf_basic_ring_LDADD =					\
-	-lm
-
-if !STARPU_SIMGRID
-starpu_mpi_EXAMPLES +=				\
-	native_fortran/nf_mm			\
-	native_fortran/nf_basic_ring
-endif
-endif
-endif
-endif
-
-###################
-# complex example #
-###################
-
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=			\
-	complex/mpi_complex
-
-complex_mpi_complex_SOURCES =		\
-	complex/mpi_complex.c		\
-	../../examples/interface/complex_interface.c
-
-starpu_mpi_EXAMPLES	+=			\
-	complex/mpi_complex
-endif
-
-#########################
-# user_datatype example #
-#########################
-
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=				\
-	user_datatype/user_datatype
-
-user_datatype_user_datatype_SOURCES =		\
-	user_datatype/user_datatype.c		\
-	user_datatype/my_interface.c
-
-if !STARPU_SIMGRID
-starpu_mpi_EXAMPLES	+=			\
-	user_datatype/user_datatype
-endif
-endif
-
-###################
-# comm example #
-###################
-
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=			\
-	comm/comm			\
-	comm/mix_comm
-
-if !STARPU_SIMGRID
-starpu_mpi_EXAMPLES	+=			\
-	comm/comm				\
-	comm/mix_comm
-endif
-endif
-
-if STARPU_HAVE_MPIFORT
-if BUILD_EXAMPLES
-if !STARPU_SANITIZE
-# Native Fortran example
-# - list explicit dependences to control proper module files generation
-# - the overriding rule fully disables the corresponing default rule, thus
-#   the default rule body must be copied entirely
-fstarpu_mod.mod: fstarpu_mod.o
-fstarpu_mpi_mod.mod: fstarpu_mpi_mod.o
-nf_mm_cl.mod: nf_mm_cl.o
-
-fstarpu_mod.o: $(top_srcdir)/include/fstarpu_mod.f90
-	$(AM_V_FC)$(FC) $(native_fortran_nf_mm_FCFLAGS) $(FCFLAGS) -c -o $@ '$(top_srcdir)/'include/fstarpu_mod.f90
-
-fstarpu_mpi_mod.o: $(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90 fstarpu_mod.mod
-	$(AM_V_FC)$(FC) $(native_fortran_nf_mm_FCFLAGS) $(FCFLAGS) -c -o $@ '$(top_srcdir)/'mpi/include/fstarpu_mpi_mod.f90
-
-nf_mm_cl.o: $(top_srcdir)/mpi/examples/native_fortran/nf_mm_cl.f90 fstarpu_mpi_mod.mod fstarpu_mod.mod
-	$(AM_V_FC)$(FC) $(native_fortran_nf_mm_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_mm_cl.f90' || echo '$(srcdir)/'`native_fortran/nf_mm_cl.f90
-
-nf_mm.o: $(top_srcdir)/mpi/examples/native_fortran/nf_mm.f90 nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod
-	$(AM_V_FC)$(FC) $(native_fortran_nf_mm_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_mm.f90' || echo '$(srcdir)/'`native_fortran/nf_mm.f90
-
-nf_basic_ring.o: $(top_srcdir)/mpi/examples/native_fortran/nf_basic_ring.f90 fstarpu_mpi_mod.mod fstarpu_mod.mod
-	$(AM_V_FC)$(FC) $(native_fortran_nf_basic_ring_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_basic_ring.f90' || echo '$(srcdir)/'`native_fortran/nf_basic_ring.f90
-endif
-endif
-endif

+ 0 - 150
nmad/examples/comm/comm.c

@@ -1,150 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*
- * This example splits the whole set of communicators in subgroups,
- * all communications take place within each subgroups
- */
-
-#include <starpu_mpi.h>
-#include "../helper.h"
-
-void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	int rank;
-
-	starpu_codelet_unpack_args(_args, &rank);
-	FPRINTF_MPI(stderr, "Executing codelet with value %d and rank %d\n", *value, rank);
-	STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank);
-}
-
-struct starpu_codelet mycodelet =
-{
-	.cpu_funcs = {func_cpu},
-	.nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-int main(int argc, char **argv)
-{
-	int size, x=789;
-	int color;
-	MPI_Comm newcomm;
-	int rank, newrank;
-	int ret;
-	starpu_data_handle_t data[2];
-	int thread_support;
-
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
-	{
-		fprintf(stderr,"MPI_Init_thread failed\n");
-		exit(1);
-	}
-	if (thread_support == MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
-	if (thread_support < MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI does not have thread support!\n");
-
-        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-        if (size < 4)
-        {
-		FPRINTF(stderr, "We need at least 4 processes.\n");
-                MPI_Finalize();
-                return STARPU_TEST_SKIPPED;
-        }
-
-	color = rank%2;
-	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm);
-	MPI_Comm_rank(newcomm, &newrank);
-	FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color);
-
-	if (newrank == 0)
-	{
-		FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank);
-		MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm);
-	}
-	else if (newrank == 1)
-	{
-		MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
-	}
-
-        ret = starpu_init(NULL);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-        ret = starpu_mpi_init_comm(NULL, NULL, 0, newcomm);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	if (newrank == 0)
-	{
-		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
-		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
-		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
-	}
-	else
-		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int));
-	starpu_mpi_data_register_comm(data[0], 12, 0, newcomm);
-
-	if (newrank == 0)
-	{
-		starpu_mpi_req req[2];
-		starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm);
-		starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm);
-		starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE);
-		starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE);
-	}
-	else if (newrank == 1)
-	{
-		int *xx;
-
-		starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[0], STARPU_RW);
-		xx = (int *)starpu_variable_get_local_ptr(data[0]);
-		starpu_data_release(data[0]);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
-		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
-
-		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int));
-		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
-		starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[0], STARPU_RW);
-		xx = (int *)starpu_variable_get_local_ptr(data[0]);
-		starpu_data_release(data[0]);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
-		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
-	}
-
-	if (newrank == 0 || newrank == 1)
-	{
-		starpu_mpi_task_insert(newcomm, &mycodelet,
-				       STARPU_RW, data[0],
-				       STARPU_VALUE, &x, sizeof(x),
-				       STARPU_EXECUTE_ON_NODE, 1,
-				       0);
-
-		starpu_task_wait_for_all();
-		starpu_data_unregister(data[0]);
-		starpu_data_unregister(data[1]);
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-	MPI_Comm_free(&newcomm);
-        MPI_Finalize();
-	return 0;
-}

+ 0 - 185
nmad/examples/comm/mix_comm.c

@@ -1,185 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*
- * This example splits the whole set of communicators in subgroups,
- * communications take place both within each subgroups and MPI_COMM_WORLD.
- */
-
-#include <starpu_mpi.h>
-#include "../helper.h"
-
-void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	int rank;
-
-	starpu_codelet_unpack_args(_args, &rank);
-	FPRINTF_MPI(stderr, "Executing codelet with value %d and rank %d\n", *value, rank);
-	STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank);
-}
-
-struct starpu_codelet mycodelet =
-{
-	.cpu_funcs = {func_cpu},
-	.nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-int main(int argc, char **argv)
-{
-	int size, x;
-	int color;
-	MPI_Comm newcomm;
-	int rank, newrank;
-	int ret;
-	starpu_data_handle_t data[3];
-	int value = 90;
-	int thread_support;
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
-	{
-		fprintf(stderr,"MPI_Init_thread failed\n");
-		exit(1);
-	}
-	if (thread_support == MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
-	if (thread_support < MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI does not have thread support!\n");
-
-        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-        if (size < 4)
-        {
-		FPRINTF(stderr, "We need at least 4 processes.\n");
-                MPI_Finalize();
-                return STARPU_TEST_SKIPPED;
-        }
-
-	color = rank%2;
-	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm);
-	MPI_Comm_rank(newcomm, &newrank);
-	FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color);
-
-	if (newrank == 0)
-	{
-		FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank);
-		MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm);
-	}
-	else if (newrank == 1)
-	{
-		MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
-	}
-
-        ret = starpu_init(NULL);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-        ret = starpu_mpi_init(NULL, NULL, 0);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	if (rank == 0)
-	{
-		starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int));
-	}
-	else
-		starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int));
-	starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD);
-
-	if (newrank == 0)
-	{
-		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
-		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
-		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
-	}
-	else
-		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int));
-	starpu_mpi_data_register_comm(data[0], 12, 0, newcomm);
-
-	if (newrank == 0)
-	{
-		starpu_mpi_req req[2];
-		starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm);
-		starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm);
-		starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE);
-		starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE);
-	}
-	else if (newrank == 1)
-	{
-		int *xx;
-
-		starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[0], STARPU_RW);
-		xx = (int *)starpu_variable_get_local_ptr(data[0]);
-		starpu_data_release(data[0]);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
-		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
-
-		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int));
-		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
-		starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[0], STARPU_RW);
-		xx = (int *)starpu_variable_get_local_ptr(data[0]);
-		starpu_data_release(data[0]);
-		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
-		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
-	}
-
-	if (rank == 0)
-	{
-		starpu_data_acquire(data[2], STARPU_RW);
-		int rvalue = *((int *)starpu_variable_get_local_ptr(data[2]));
-		starpu_data_release(data[2]);
-		FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1);
-		starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD);
-		starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[2], STARPU_RW);
-		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
-		starpu_data_release(data[2]);
-		FPRINTF_MPI(stderr, "Value back is %d\n", *xx);
-		STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1)));
-	}
-	else
-	{
-		int next = (rank == size-1) ? 0 : rank+1;
-		starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-		starpu_data_acquire(data[2], STARPU_RW);
-		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
-		FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next);
-		*xx = *xx + 2;
-		starpu_data_release(data[2]);
-		starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD);
-	}
-
-	if (newrank == 0 || newrank == 1)
-	{
-		starpu_mpi_task_insert(newcomm, &mycodelet,
-				       STARPU_RW, data[0],
-				       STARPU_VALUE, &x, sizeof(x),
-				       STARPU_EXECUTE_ON_NODE, 1,
-				       0);
-
-		starpu_task_wait_for_all();
-		starpu_data_unregister(data[0]);
-		starpu_data_unregister(data[1]);
-	}
-	starpu_data_unregister(data[2]);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-	MPI_Comm_free(&newcomm);
-        MPI_Finalize();
-	return 0;
-}

+ 0 - 126
nmad/examples/complex/mpi_complex.c

@@ -1,126 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012, 2013, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <interface/complex_interface.h>
-#include <interface/complex_codelet.h>
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-
-void display_foo_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	int *foo = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	FPRINTF(stderr, "foo = %d\n", *foo);
-}
-
-/* Dumb performance model for simgrid */
-static double display_cost_function(struct starpu_task *task, unsigned nimpl)
-{
-	(void) task;
-	(void) nimpl;
-	return 0.000001;
-}
-
-static struct starpu_perfmodel display_model =
-{
-	.type = STARPU_COMMON,
-	.cost_function = display_cost_function,
-	.symbol = "display"
-};
-
-struct starpu_codelet foo_display =
-{
-	.cpu_funcs = {display_foo_codelet},
-	.nbuffers = 1,
-	.modes = {STARPU_R},
-	.model = &display_model
-};
-
-int main(int argc, char **argv)
-{
-	int rank, nodes;
-	int ret;
-	int compare=0;
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
-
-	if (nodes < 2 || (starpu_cpu_worker_get_count() == 0))
-	{
-		if (rank == 0)
-		{
-			if (nodes < 2)
-				fprintf(stderr, "We need at least 2 processes.\n");
-			else
-				fprintf(stderr, "We need at least 1 CPU.\n");
-		}
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return 77;
-	}
-
-	starpu_data_handle_t handle;
-	starpu_data_handle_t handle2;
-
-	double real[2] = {4.0, 2.0};
-	double imaginary[2] = {7.0, 9.0};
-
-	double real2[2] = {14.0, 12.0};
-	double imaginary2[2] = {17.0, 19.0};
-
-	if (rank == 1)
-	{
-		real[0] = 0.0;
-		real[1] = 0.0;
-		imaginary[0] = 0.0;
-		imaginary[1] = 0.0;
-	}
-
-	starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2);
-	starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);
-
-	if (rank == 0)
-	{
-		int *compare_ptr = &compare;
-
-		starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0);
-		starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL);
-		starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL);
-
-		starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0);
-		starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
-	}
-	else if (rank == 1)
-	{
-		starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL);
-		starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0);
-		starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
-	}
-
-	starpu_task_wait_for_all();
-
-	starpu_data_unregister(handle);
-	starpu_data_unregister(handle2);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return (rank == 0) ? !compare : 0;
-}

+ 0 - 27
nmad/examples/helper.h

@@ -1,27 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <errno.h>
-#include <starpu_mpi.h>
-
-#define STARPU_TEST_SKIPPED 77
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
-    						int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank);       \
-                                                fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
-                                                fflush(ofile); }} while(0);
-

+ 0 - 86
nmad/examples/matrix_decomposition/mpi_cholesky.c

@@ -1,86 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012, 2015  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-#include "helper.h"
-
-int main(int argc, char **argv)
-{
-	/* create a simple definite positive symetric matrix example
-	 *
-	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
-	 * */
-
-	float ***bmat;
-	int rank, nodes, ret;
-	double timing, flops;
-#ifndef STARPU_SIMGRID
-	int correctness;
-#endif
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
-	starpu_cublas_init();
-
-	if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0)
-	{
-		if (rank == 0)
-		{
-			FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
-		}
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	parse_args(argc, argv, nodes);
-
-	matrix_init(&bmat, rank, nodes, 1);
-	matrix_display(bmat, rank);
-
-	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
-
-	starpu_mpi_shutdown();
-
-#ifndef STARPU_SIMGRID
-	matrix_display(bmat, rank);
-
-	dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops);
-#endif
-
-	matrix_free(&bmat, rank, nodes, 1);
-	starpu_cublas_shutdown();
-	starpu_shutdown();
-
-#ifndef STARPU_SIMGRID
-	assert(correctness);
-#endif
-
-	if (rank == 0)
-	{
-		FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
-		FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f));
-	}
-
-	return 0;
-}

+ 0 - 31
nmad/examples/matrix_decomposition/mpi_cholesky.h

@@ -1,31 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_H__
-#define __MPI_CHOLESKY_H__
-
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include "mpi_cholesky_codelets.h"
-#include "mpi_cholesky_kernels.h"
-#include "mpi_cholesky_models.h"
-#include "mpi_decomposition_matrix.h"
-#include "mpi_decomposition_params.h"
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-
-#endif // __MPI_CHOLESKY_H__

+ 0 - 267
nmad/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -1,267 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-#include <common/blas.h>
-#include <sys/time.h>
-#include <limits.h>
-
-/*
- *	Create the codelets
- */
-
-static struct starpu_codelet cl11 =
-{
-	.cpu_funcs = {chol_cpu_codelet_update_u11},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u11},
-#elif defined(STARPU_SIMGRID)
-	.cuda_funcs = {(void*)1},
-#endif
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-	.model = &chol_model_11
-};
-
-static struct starpu_codelet cl21 =
-{
-	.cpu_funcs = {chol_cpu_codelet_update_u21},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u21},
-#elif defined(STARPU_SIMGRID)
-	.cuda_funcs = {(void*)1},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &chol_model_21
-};
-
-static struct starpu_codelet cl22 =
-{
-	.cpu_funcs = {chol_cpu_codelet_update_u22},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u22},
-#elif defined(STARPU_SIMGRID)
-	.cuda_funcs = {(void*)1},
-#endif
-	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
-	.model = &chol_model_22
-};
-
-/*
- *	code to bootstrap the factorization
- *	and construct the DAG
- */
-void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops)
-{
-	double start;
-	double end;
-	starpu_data_handle_t **data_handles;
-	unsigned x,y,i,j,k;
-
-	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
-
-	/* create all the DAG nodes */
-
-	data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
-	for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle_t));
-
-	for(x = 0; x < nblocks ; x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)matA[x][y],
-						ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-#ifdef STARPU_DEVEL
-#warning TODO: make better test to only register what is needed
-#endif
-			else
-			{
-				/* I don't own that index, but will need it for my computations */
-				//fprintf(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], -1, (uintptr_t)NULL,
-						ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-			if (data_handles[x][y])
-			{
-				starpu_data_set_coordinates(data_handles[x][y], 2, x, y);
-				starpu_mpi_data_register(data_handles[x][y], (y*nblocks)+x, mpi_rank);
-			}
-		}
-	}
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	start = starpu_timing_now();
-
-	for (k = 0; k < nblocks; k++)
-	{
-		starpu_iteration_push(k);
-
-		starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
-				       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
-				       STARPU_RW, data_handles[k][k],
-				       0);
-
-		for (j = k+1; j<nblocks; j++)
-		{
-			starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
-					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - j) : (j == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-					       STARPU_R, data_handles[k][k],
-					       STARPU_RW, data_handles[k][j],
-					       0);
-
-			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[k][k]);
-			if (my_distrib(k, k, nodes) == rank)
-				starpu_data_wont_use(data_handles[k][k]);
-
-			for (i = k+1; i<nblocks; i++)
-			{
-				if (i <= j)
-				{
-					starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
-							       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - j - i) : ((i == k+1) && (j == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-							       STARPU_R, data_handles[k][i],
-							       STARPU_R, data_handles[k][j],
-							       STARPU_RW | STARPU_COMMUTE, data_handles[i][j],
-							       0);
-				}
-			}
-
-			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[k][j]);
-			if (my_distrib(k, j, nodes) == rank)
-				starpu_data_wont_use(data_handles[k][j]);
-		}
-		starpu_iteration_pop();
-	}
-
-	starpu_task_wait_for_all();
-
-	for(x = 0; x < nblocks ; x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			if (data_handles[x][y])
-				starpu_data_unregister(data_handles[x][y]);
-		}
-		free(data_handles[x]);
-	}
-	free(data_handles);
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	end = starpu_timing_now();
-
-	if (rank == 0)
-	{
-		*timing = end - start;
-		*flops = (1.0f*size*size*size)/3.0f;
-	}
-}
-
-void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops)
-{
-	unsigned i,j,x,y;
-	float *rmat = malloc(size*size*sizeof(float));
-
-	for(x=0 ; x<nblocks ; x++)
-	{
-		for(y=0 ; y<nblocks ; y++)
-		{
-			for (i = 0; i < BLOCKSIZE; i++)
-			{
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					rmat[j+(y*BLOCKSIZE)+(i+(x*BLOCKSIZE))*size] = matA[x][y][j +i*BLOCKSIZE];
-				}
-			}
-		}
-	}
-
-	FPRINTF(stderr, "[%d] compute explicit LLt ...\n", rank);
-	for (j = 0; j < size; j++)
-	{
-		for (i = 0; i < size; i++)
-		{
-			if (i > j)
-			{
-				rmat[j+i*size] = 0.0f; // debug
-			}
-		}
-	}
-	float *test_mat = malloc(size*size*sizeof(float));
-	STARPU_ASSERT(test_mat);
-
-	STARPU_SSYRK("L", "N", size, size, 1.0f,
-			rmat, size, 0.0f, test_mat, size);
-
-	FPRINTF(stderr, "[%d] comparing results ...\n", rank);
-	if (display)
-	{
-		for (j = 0; j < size; j++)
-		{
-			for (i = 0; i < size; i++)
-			{
-				if (i <= j)
-				{
-					printf("%2.2f\t", test_mat[j +i*size]);
-				}
-				else
-				{
-					printf(".\t");
-				}
-			}
-			printf("\n");
-		}
-	}
-
-	*correctness = 1;
-	for(x = 0; x < nblocks ; x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				for (i = (size/nblocks)*x ; i < (size/nblocks)*x+(size/nblocks); i++)
-				{
-					for (j = (size/nblocks)*y ; j < (size/nblocks)*y+(size/nblocks); j++)
-					{
-						if (i <= j)
-						{
-							float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
-							float err = abs(test_mat[j +i*size] - orig);
-							if (err > 0.00001)
-							{
-								FPRINTF(stderr, "[%d] Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", rank, i, j, test_mat[j +i*size], orig, err);
-								*correctness = 0;
-								*flops = 0;
-								break;
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-	free(rmat);
-	free(test_mat);
-}

+ 0 - 30
nmad/examples/matrix_decomposition/mpi_cholesky_codelets.h

@@ -1,30 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_CODELETS_H__
-#define __MPI_CHOLESKY_CODELETS_H__
-
-
-/*
- *	code to bootstrap the factorization
- *	and construct the DAG
- */
-void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops);
-
-void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops);
-
-#endif /* __MPI_CHOLESKY_CODELETS_H__ */

+ 0 - 60
nmad/examples/matrix_decomposition/mpi_cholesky_distributed.c

@@ -1,60 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-
-int main(int argc, char **argv)
-{
-	/* create a simple definite positive symetric matrix example
-	 *
-	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
-	 * */
-
-	float ***bmat;
-	int rank, nodes, ret;
-	double timing, flops;
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
-	starpu_cublas_init();
-
-	parse_args(argc, argv, nodes);
-
-	matrix_init(&bmat, rank, nodes, 0);
-
-	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
-
-	starpu_mpi_shutdown();
-
-	matrix_free(&bmat, rank, nodes, 0);
-	starpu_cublas_shutdown();
-	starpu_shutdown();
-
-	if (rank == 0)
-	{
-		FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
-		FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f));
-	}
-
-	return 0;
-}

+ 0 - 246
nmad/examples/matrix_decomposition/mpi_cholesky_kernels.c

@@ -1,246 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010, 2012-2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-#include <math.h>
-#include "common/blas.h"
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <cublas.h>
-#ifdef STARPU_HAVE_MAGMA
-#include "magma.h"
-#include "magma_lapack.h"
-#endif
-#endif
-
-/*
- * U22
- */
-
-static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	//printf("22\n");
-	float *left 	= (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-	float *right 	= (float *)STARPU_MATRIX_GET_PTR(descr[1]);
-	float *center 	= (float *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-	unsigned dx = STARPU_MATRIX_GET_NY(descr[2]);
-	unsigned dy = STARPU_MATRIX_GET_NX(descr[2]);
-	unsigned dz = STARPU_MATRIX_GET_NY(descr[0]);
-
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]);
-	unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]);
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus st;
-#endif
-
-	switch (s)
-	{
-		case 0:
-			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
-				right, ld12, 1.0f, center, ld22);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			cublasSetKernelStream(starpu_cuda_get_local_stream());
-#endif
-			cublasSgemm('n', 't', dy, dx, dz,
-					-1.0f, left, ld21, right, ld12,
-					 1.0f, center, ld22);
-			st = cublasGetError();
-			if (STARPU_UNLIKELY(st != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(st);
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-void chol_cpu_codelet_update_u22(void *descr[], void *_args)
-{
-	chol_common_cpu_codelet_update_u22(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u22(void *descr[], void *_args)
-{
-	chol_common_cpu_codelet_update_u22(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-/*
- * U21
- */
-
-static inline void chol_common_codelet_update_u21(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-//	printf("21\n");
-	float *sub11;
-	float *sub21;
-
-	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-	sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
-	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
-
-	switch (s)
-	{
-		case 0:
-			STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			cublasSetKernelStream(starpu_cuda_get_local_stream());
-#endif
-			cublasStrsm('R', 'L', 'T', 'N', nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-void chol_cpu_codelet_update_u21(void *descr[], void *_args)
-{
-	 chol_common_codelet_update_u21(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u21(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u21(descr, 1, _args);
-}
-#endif
-
-/*
- *	U11
- */
-
-static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-//	printf("11\n");
-	float *sub11;
-
-	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-	unsigned nx = STARPU_MATRIX_GET_NY(descr[0]);
-	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
-
-	unsigned z;
-
-	switch (s)
-	{
-		case 0:
-
-#ifdef STARPU_MKL
-			STARPU_SPOTRF("L", nx, sub11, ld);
-#else
-			/*
-			 *	- alpha 11 <- lambda 11 = sqrt(alpha11)
-			 *	- alpha 21 <- l 21	= alpha 21 / lambda 11
-			 *	- A22 <- A22 - l21 trans(l21)
-			 */
-
-			for (z = 0; z < nx; z++)
-			{
-				float lambda11;
-				lambda11 = sqrt(sub11[z+z*ld]);
-				sub11[z+z*ld] = lambda11;
-
-				STARPU_ASSERT(lambda11 != 0.0f);
-
-				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
-
-				STARPU_SSYR("L", nx - z - 1, -1.0f,
-							&sub11[(z+1)+z*ld], 1,
-							&sub11[(z+1)+(z+1)*ld], ld);
-			}
-#endif
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			{
-				int ret;
-				int info;
-				ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info);
-				if (ret != MAGMA_SUCCESS)
-				{
-					fprintf(stderr, "Error in Magma: %d\n", ret);
-					STARPU_ABORT();
-				}
-				cudaError_t cures = cudaThreadSynchronize();
-				STARPU_ASSERT(!cures);
-			}
-#else
-			for (z = 0; z < nx; z++)
-			{
-				float lambda11;
-				cudaMemcpyAsync(&lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
-				cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-				STARPU_ASSERT(lambda11 != 0.0f);
-
-				lambda11 = sqrt(lambda11);
-
-				cublasSetVector(1, sizeof(float), &lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float));
-
-				cublasSscal(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
-
-				cublasSsyr('U', nx - z - 1, -1.0f,
-							&sub11[(z+1)+z*ld], 1,
-							&sub11[(z+1)+(z+1)*ld], ld);
-			}
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-#endif
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-
-void chol_cpu_codelet_update_u11(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u11(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u11(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u11(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA

+ 0 - 33
nmad/examples/matrix_decomposition/mpi_cholesky_kernels.h

@@ -1,33 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_KERNELS_H__
-#define __MPI_CHOLESKY_KERNELS_H__
-
-#include <starpu.h>
-
-void chol_cpu_codelet_update_u11(void **, void *);
-void chol_cpu_codelet_update_u21(void **, void *);
-void chol_cpu_codelet_update_u22(void **, void *);
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u11(void *descr[], void *_args);
-void chol_cublas_codelet_update_u21(void *descr[], void *_args);
-void chol_cublas_codelet_update_u22(void *descr[], void *_args);
-#endif
-
-#endif // __MPI_CHOLESKY_KERNELS_H__

+ 0 - 40
nmad/examples/matrix_decomposition/mpi_cholesky_models.c

@@ -1,40 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-
-/*
- *	Number of flops of Gemm
- */
-
-struct starpu_perfmodel chol_model_11 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_11"
-};
-
-struct starpu_perfmodel chol_model_21 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_21"
-};
-
-struct starpu_perfmodel chol_model_22 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_22"
-};

+ 0 - 25
nmad/examples/matrix_decomposition/mpi_cholesky_models.h

@@ -1,25 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __DW_CHOLESKY_MODELS_H__
-#define __DW_CHOLESKY_MODELS_H__
-
-extern struct starpu_perfmodel chol_model_11;
-extern struct starpu_perfmodel chol_model_21;
-extern struct starpu_perfmodel chol_model_22;
-
-#endif // __DW_CHOLESKY_MODELS_H__

+ 0 - 111
nmad/examples/matrix_decomposition/mpi_decomposition_matrix.c

@@ -1,111 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012, 2015  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	//return (x+y) % nb_nodes;
-	return (x%dblockx)+(y%dblocky)*dblockx;
-}
-
-
-void matrix_display(float ***bmat, int rank)
-{
-	if (display)
-	{
-		unsigned y;
-		printf("[%d] Input :\n", rank);
-
-		for(y=0 ; y<nblocks ; y++)
-		{
-			unsigned x;
-			for(x=0 ; x<nblocks ; x++)
-			{
-				unsigned j;
-				printf("Block %u,%u :\n", x, y);
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					unsigned i;
-					for (i = 0; i < BLOCKSIZE; i++)
-					{
-						if (i <= j)
-						{
-							printf("%2.2f\t", bmat[y][x][j +i*BLOCKSIZE]);
-						}
-						else
-						{
-							printf(".\t");
-						}
-					}
-					printf("\n");
-				}
-			}
-		}
-	}
-}
-
-void matrix_init(float ****bmat, int rank, int nodes, int alloc_everywhere)
-{
-	unsigned i,j,x,y;
-
-	*bmat = malloc(nblocks * sizeof(float **));
-	for(x=0 ; x<nblocks ; x++)
-	{
-		(*bmat)[x] = malloc(nblocks * sizeof(float *));
-		for(y=0 ; y<nblocks ; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (alloc_everywhere || (mpi_rank == rank))
-			{
-				starpu_malloc((void **)&(*bmat)[x][y], BLOCKSIZE*BLOCKSIZE*sizeof(float));
-				for (i = 0; i < BLOCKSIZE; i++)
-				{
-					for (j = 0; j < BLOCKSIZE; j++)
-					{
-#ifndef STARPU_SIMGRID
-						(*bmat)[x][y][j +i*BLOCKSIZE] = (1.0f/(1.0f+(i+(x*BLOCKSIZE)+j+(y*BLOCKSIZE)))) + ((i+(x*BLOCKSIZE) == j+(y*BLOCKSIZE))?1.0f*size:0.0f);
-						//mat[j +i*size] = ((i == j)?1.0f*size:0.0f);
-#endif
-					}
-				}
-			}
-		}
-	}
-}
-
-void matrix_free(float ****bmat, int rank, int nodes, int alloc_everywhere)
-{
-	unsigned x, y;
-
-	for(x=0 ; x<nblocks ; x++)
-	{
-		for(y=0 ; y<nblocks ; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (alloc_everywhere || (mpi_rank == rank))
-			{
-				starpu_free((void *)(*bmat)[x][y]);
-			}
-		}
-		free((*bmat)[x]);
-	}
-	free(*bmat);
-}
-

+ 0 - 30
nmad/examples/matrix_decomposition/mpi_decomposition_matrix.h

@@ -1,30 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_MATRIX_H__
-#define __MPI_CHOLESKY_MATRIX_H__
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes);
-
-void matrix_display(float ***bmat, int rank);
-void matrix_init(float ****bmat, int rank, int nodes, int alloc_everywhere);
-void matrix_free(float ****bmat, int rank, int nodes, int alloc_everywhere);
-
-#endif /* __MPI_CHOLESKY_MATRIX_H__ */
-

+ 0 - 113
nmad/examples/matrix_decomposition/mpi_decomposition_params.c

@@ -1,113 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010, 2015-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky.h"
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef STARPU_QUICK_CHECK
-unsigned size = 4*64;
-unsigned nblocks = 2;
-unsigned nbigblocks = 2;
-#elif !defined(STARPU_LONG_CHECK)
-unsigned size = 4*320;
-unsigned nblocks = 4;
-unsigned nbigblocks = 2;
-#else
-unsigned size = 16*320;
-unsigned nblocks = 16;
-unsigned nbigblocks = 2;
-#endif
-unsigned noprio = 0;
-unsigned display = 0;
-int dblockx = -1;
-int dblocky = -1;
-
-void parse_args(int argc, char **argv, int nodes)
-{
-        int i;
-        for (i = 1; i < argc; i++)
-        {
-                if (strcmp(argv[i], "-size") == 0)
-                {
-                        char *argptr;
-                        size = strtol(argv[++i], &argptr, 10);
-                }
-
-                if (strcmp(argv[i], "-dblockx") == 0)
-                {
-                        char *argptr;
-                        dblockx = strtol(argv[++i], &argptr, 10);
-                }
-
-                if (strcmp(argv[i], "-dblocky") == 0)
-                {
-                        char *argptr;
-                        dblocky = strtol(argv[++i], &argptr, 10);
-                }
-
-                if (strcmp(argv[i], "-nblocks") == 0)
-                {
-                        char *argptr;
-                        nblocks = strtol(argv[++i], &argptr, 10);
-                }
-
-                if (strcmp(argv[i], "-nbigblocks") == 0)
-                {
-                        char *argptr;
-                        nbigblocks = strtol(argv[++i], &argptr, 10);
-                }
-
-                if (strcmp(argv[i], "-no-prio") == 0)
-                {
-                        noprio = 1;
-                }
-
-                if (strcmp(argv[i], "-display") == 0)
-                {
-                        display = 1;
-                }
-
-                if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
-                {
-                        printf("usage : %s [-display] [-size size] [-nblocks nblocks]\n", argv[0]);
-                }
-        }
-
-        if (nblocks > size)
-		nblocks = size;
-
-	if (dblockx == -1 || dblocky == -1)
-	{
-		int factor;
-		dblockx = nodes;
-		dblocky = 1;
-		for(factor=sqrt(nodes) ; factor>1 ; factor--)
-		{
-			if (nodes % factor == 0)
-			{
-				dblockx = nodes/factor;
-				dblocky = factor;
-				break;
-			}
-		}
-	}
-	FPRINTF(stdout, "size: %u - nblocks: %u - dblocksx: %d - dblocksy: %d\n", size, nblocks, dblockx, dblocky);
-}
-

+ 0 - 34
nmad/examples/matrix_decomposition/mpi_decomposition_params.h

@@ -1,34 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_PARAMS_H__
-#define __MPI_CHOLESKY_PARAMS_H__
-
-#define BLOCKSIZE       (size/nblocks)
-
-extern unsigned size;
-extern unsigned nblocks;
-extern unsigned nbigblocks;
-extern unsigned noprio;
-extern unsigned display;
-extern int dblockx;
-extern int dblocky;
-
-void parse_args(int argc, char **argv, int nodes);
-
-#endif // __MPI_CHOLESKY_PARAMS_H__
-

+ 0 - 30
nmad/examples/matrix_mult/Makefile

@@ -1,30 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2016  Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-
-# This makefile gives an example on how to build the testcase outside StarPU
-
-PRG	= mm
-
-CC	= mpicc
-CFLAGS	= $(shell pkg-config --cflags starpumpi-1.3) -g -Wall
-LDFLAGS	= $(shell pkg-config --libs starpumpi-1.3) -lm
-
-.phony: all clean
-
-all: $(PRG)
-
-clean:
-	rm -f $(PRG) *.o starpu*.log

+ 0 - 25
nmad/examples/matrix_mult/environment

@@ -1,25 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2016  Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-
-# This script gives an example on how to set environment variables to build and run the testcase outside StarPU
-
-STARPU_INSTALL_DIR=/usr # set this to StarPU's installation directory
-
-PATH=$STARPU_INSTALL_DIR/bin:$PATH
-PKG_CONFIG_PATH=$STARPU_INSTALL_DIR/lib/pkgconfig:$PKG_CONFIG_PATH
-LD_LIBRARY_PATH=$STARPU_INSTALL_DIR/lib:$LD_LIBRARY_PATH
-
-export PATH PKG_CONFIG_PATH LD_LIBRARY_PATH

+ 0 - 390
nmad/examples/matrix_mult/mm.c

@@ -1,390 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*
- * This example illustrates how to distribute a pre-existing data structure to
- * a set of computing nodes using StarPU-MPI routines.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define VERBOSE 0
-
-static int N  = 16; /* Matrix size */
-static int BS =  4; /* Block size */
-
-#define NB ((N)/(BS)) /* Number of blocks */
-
-/* Matrices. Will be allocated as regular, linearized C arrays */
-static double *A = NULL; /* A will be partitioned as BS rows x N  cols blocks */
-static double *B = NULL; /* B will be partitioned as N  rows x BS cols blocks */
-static double *C = NULL; /* C will be partitioned as BS rows x BS cols blocks */
-
-/* Arrays of data handles for managing matrix blocks */
-static starpu_data_handle_t *A_h;
-static starpu_data_handle_t *B_h;
-static starpu_data_handle_t *C_h;
-
-static int comm_rank; /* mpi rank of the process */
-static int comm_size; /* size of the mpi session */
-
-static void alloc_matrices(void)
-{
-	/* Regular 'malloc' can also be used instead, however, starpu_malloc make sure that
-	 * the area is allocated in suitably pinned memory to improve data transfers, especially
-	 * with CUDA */
-	starpu_malloc((void **)&A, N*N*sizeof(double));
-	starpu_malloc((void **)&B, N*N*sizeof(double));
-	starpu_malloc((void **)&C, N*N*sizeof(double));
-}
-
-static void free_matrices(void)
-{
-	starpu_free(A);
-	starpu_free(B);
-	starpu_free(C);
-}
-
-static void init_matrices(void)
-{
-	int row,col;
-	for (row = 0; row < N; row++)
-	{
-		for (col = 0; col < N; col++)
-		{
-			A[row*N+col] = (row==col)?2:0;
-			B[row*N+col] = row*N+col;
-			C[row*N+col] = 0;
-		}
-	}
-}
-
-#if VERBOSE
-static void disp_matrix(double *m)
-{
-	int row,col;
-	for (row = 0; row < N; row++)
-	{
-		for (col = 0; col < N; col++)
-		{
-			printf("\t%.2lf", m[row*N+col]);
-		}
-		printf("\n");
-	}
-}
-#endif
-
-static void check_result(void)
-{
-	int row,col;
-	for (row = 0; row < N; row++)
-	{
-		for (col = 0; col < N; col++)
-		{
-			if (fabs(C[row*N+col] - 2*(row*N+col)) > 1.0)
-			{
-				fprintf(stderr, "check failed\n");
-				exit(1);
-			}
-		}
-	}
-#if VERBOSE
-	printf("success\n");
-#endif
-}
-
-
-/* Register the matrix blocks to StarPU and to StarPU-MPI */
-static void register_matrices()
-{
-	A_h = calloc(NB, sizeof(starpu_data_handle_t));
-	B_h = calloc(NB, sizeof(starpu_data_handle_t));
-	C_h = calloc(NB*NB, sizeof(starpu_data_handle_t));
-
-	/* Memory region, where the data being registered resides.
-	 * In this example, all blocks are allocated by node 0, thus
-	 * - node 0 specifies STARPU_MAIN_RAM to indicate that it owns the block in its main memory
-	 * - nodes !0 specify -1 to indicate that they don't have a copy of the block initially
-	 */
-	int mr = (comm_rank == 0) ? STARPU_MAIN_RAM : -1;
-
-	/* mpi tag used for the block */
-	int tag = 0;
-
-	int b_row,b_col;
-
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		/* Register a block to StarPU */
-		starpu_matrix_data_register(&A_h[b_row],
-				mr,
-				(comm_rank == 0)?(uintptr_t)(A+b_row*BS*N):0, N, N, BS,
-				sizeof(double));
-
-		/* Register a block to StarPU-MPI, specifying the mpi tag to use for transfering the block
-		 * and the rank of the owner node.
-		 *
-		 * Note: StarPU-MPI is an autonomous layer built on top of StarPU, hence the two separate
-		 * registration steps.
-		 */
-		starpu_data_set_coordinates(A_h[b_row], 2, 0, b_row);
-		starpu_mpi_data_register(A_h[b_row], tag++, 0);
-	}
-
-	for (b_col = 0; b_col < NB; b_col++)
-	{
-		starpu_matrix_data_register(&B_h[b_col],
-				mr,
-				(comm_rank == 0)?(uintptr_t)(B+b_col*BS):0, N, BS, N,
-				sizeof(double));
-		starpu_data_set_coordinates(B_h[b_col], 2, b_col, 0);
-		starpu_mpi_data_register(B_h[b_col], tag++, 0);
-	}
-
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		for (b_col = 0; b_col < NB; b_col++)
-		{
-			starpu_matrix_data_register(&C_h[b_row*NB+b_col],
-					mr,
-					(comm_rank == 0)?(uintptr_t)(C+b_row*BS*N+b_col*BS):0, N, BS, BS,
-					sizeof(double));
-			starpu_data_set_coordinates(C_h[b_row*NB+b_col], 2, b_col, b_row);
-			starpu_mpi_data_register(C_h[b_row*NB+b_col], tag++, 0);
-		}
-	}
-}
-
-/* Transfer ownership of the C matrix blocks following some user-defined distribution over the nodes.
- * Note: since C will be Write-accessed, it will implicitly define which node perform the task
- * associated to a given block. */
-static void distribute_matrix_C(void)
-{
-	int b_row,b_col;
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		for (b_col = 0; b_col < NB; b_col++)
-		{
-			starpu_data_handle_t h = C_h[b_row*NB+b_col]; 
-
-			/* Select the node where the block should be computed. */
-			int target_rank = (b_row+b_col)%comm_size;
-
-			/* Move the block on to its new owner. */
-			starpu_mpi_data_migrate(MPI_COMM_WORLD, h, target_rank);
-		}
-	}
-}
-
-/* Transfer ownership of the C matrix blocks back to node 0, for display purpose. This is not mandatory. */
-static void undistribute_matrix_C(void)
-{
-	int b_row,b_col;
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		for (b_col = 0; b_col < NB; b_col++)
-		{
-			starpu_data_handle_t h = C_h[b_row*NB+b_col]; 
-			starpu_mpi_data_migrate(MPI_COMM_WORLD, h, 0);
-		}
-	}
-}
-
-/* Unregister matrices from the StarPU management. */
-static void unregister_matrices()
-{
-	int b_row,b_col;
-
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		starpu_data_unregister(A_h[b_row]);
-	}
-
-	for (b_col = 0; b_col < NB; b_col++)
-	{
-		starpu_data_unregister(B_h[b_col]);
-	}
-
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		for (b_col = 0; b_col < NB; b_col++)
-		{
-			starpu_data_unregister(C_h[b_row*NB+b_col]);
-		}
-	}
-
-	free(A_h);
-	free(B_h);
-	free(C_h);
-}
-
-/* Perform the actual computation. In a real-life case, this would rather call a BLAS 'gemm' routine
- * instead. */
-static void cpu_mult(void *handles[], STARPU_ATTRIBUTE_UNUSED void *arg)
-{
-	double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]);
-	double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]);
-	double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]);
-
-	unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]);
-	unsigned n_col_B = STARPU_MATRIX_GET_NX(handles[1]);
-	unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]);
-
-	unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]);
-	unsigned n_row_B = STARPU_MATRIX_GET_NY(handles[1]);
-	unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]);
-
-	unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]);
-	unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]);
-	unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]);
-
-	/* Sanity check, not needed in real life case */
-	assert(n_col_C == n_col_B);
-	assert(n_row_C == n_row_A);
-	assert(n_col_A == n_row_B);
-
-	unsigned i,j,k;
-	for (k = 0; k < n_row_C; k++)
-	{
-		for (j = 0; j < n_col_C; j++)
-		{
-			for (i = 0; i < n_col_A; i++)
-			{
-				block_C[k*ld_C+j] += block_A[k*ld_A+i] * block_B[i*ld_B+j]; 
-			}
-
-#if VERBOSE
-			/* For illustration purpose, shows which node computed
-			 * the block in the decimal part of the cell */
-			block_C[k*ld_C+j] += comm_rank / 100.0;
-#endif
-		}
-	}
-}
-
-/* Define a StarPU 'codelet' structure for the matrix multiply kernel above.
- * This structure enable specifying multiple implementations for the kernel (such as CUDA or OpenCL versions)
- */
-static struct starpu_codelet gemm_cl =
-{
-	.cpu_funcs = {cpu_mult}, /* cpu implementation(s) of the routine */
-	.nbuffers = 3, /* number of data handles referenced by this routine */
-	.modes = {STARPU_R, STARPU_R, STARPU_RW} /* access modes for each data handle */
-};
-
-int main(int argc, char *argv[])
-{
-	/* Initializes the StarPU core */
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	/* Initializes the StarPU-MPI layer */
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	if (starpu_cpu_worker_get_count() == 0)
-	{
-		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	/* Parse the matrix size and block size optional args */
-	if (argc > 1)
-	{
-		N = atoi(argv[1]);
-		if (N < 1)
-		{
-			fprintf(stderr, "invalid matrix size\n");
-			exit(1);
-		}
-		if (argc > 2)
-		{
-			BS = atoi(argv[2]);
-		}
-		if (BS < 1 || N % BS != 0)
-		{
-			fprintf(stderr, "invalid block size\n");
-			exit(1);
-		}
-	}
-
-	/* Get the process rank and session size */
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size);
-
-	if (comm_rank == 0)
-	{
-#if VERBOSE
-		printf("N = %d\n", N);
-		printf("BS = %d\n", BS);
-		printf("NB = %d\n", NB);
-		printf("comm_size = %d\n", comm_size);
-#endif
-		/* In this example, node rank 0 performs all the memory allocations and initializations,
-		 * and the blocks are later distributed on the other nodes.
-		 * This is not mandatory however, and blocks could be allocated on other nodes right
-		 * from the beginning, depending on the application needs (in particular for the case
-		 * where the session wide data footprint is larger than a single node available memory. */
-		alloc_matrices();
-		init_matrices();
-	}
-
-	/* Register matrices to StarPU and StarPU-MPI */
-	register_matrices();
-	/* Distribute C blocks */
-	distribute_matrix_C();
-
-	int b_row,b_col;
-
-	for (b_row = 0; b_row < NB; b_row++)
-	{
-		for (b_col = 0; b_col < NB; b_col++)
-		{
-			starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl,
-					STARPU_R,  A_h[b_row],
-					STARPU_R,  B_h[b_col],
-					STARPU_RW, C_h[b_row*NB+b_col],
-					0);
-		}
-	}
-
-	starpu_task_wait_for_all();
-
-	undistribute_matrix_C();
-	unregister_matrices();
-
-	if (comm_rank == 0)
-	{
-#if VERBOSE
-		disp_matrix(C);
-#endif
-		check_result();
-		free_matrices();
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-	return 0;
-}
-

+ 0 - 42
nmad/examples/mpi_lu/mpi_lu-double.h

@@ -1,42 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#define TYPE double
-#define MPI_TYPE	MPI_DOUBLE
-
-#define STARPU_PLU(name)       starpu_pdlu_##name
-
-#define CUBLAS_GEMM	cublasDgemm
-#define CUBLAS_TRSM	cublasDtrsm
-#define CUBLAS_SCAL	cublasDscal
-#define CUBLAS_GER	cublasDger
-#define CUBLAS_SWAP	cublasDswap
-#define CUBLAS_IAMAX	cublasIdamax
-
-#define CPU_GEMM	STARPU_DGEMM
-#define CPU_GEMV	STARPU_DGEMV
-#define CPU_TRSM	STARPU_DTRSM
-#define CPU_SCAL	STARPU_DSCAL
-#define CPU_GER		STARPU_DGER
-#define CPU_SWAP	STARPU_DSWAP
-
-#define CPU_TRMM	STARPU_DTRMM
-#define CPU_AXPY	STARPU_DAXPY
-#define CPU_ASUM	STARPU_DASUM
-#define CPU_IAMAX	STARPU_IDAMAX
-
-#define PIVOT_THRESHHOLD	10e-10

+ 0 - 42
nmad/examples/mpi_lu/mpi_lu-float.h

@@ -1,42 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#define TYPE float
-#define MPI_TYPE	MPI_FLOAT
-
-#define STARPU_PLU(name)       starpu_pslu_##name
-
-#define CUBLAS_GEMM	cublasSgemm
-#define CUBLAS_TRSM	cublasStrsm
-#define CUBLAS_SCAL	cublasSscal
-#define CUBLAS_GER	cublasSger
-#define CUBLAS_SWAP	cublasSswap
-#define CUBLAS_IAMAX	cublasIsamax
-
-#define CPU_GEMM	STARPU_SGEMM
-#define CPU_GEMV	STARPU_SGEMV
-#define CPU_TRSM	STARPU_STRSM
-#define CPU_SCAL	STARPU_SSCAL
-#define CPU_GER		STARPU_SGER
-#define CPU_SWAP	STARPU_SSWAP
-
-#define CPU_TRMM	STARPU_STRMM
-#define CPU_AXPY	STARPU_SAXPY
-#define CPU_ASUM	STARPU_SASUM
-#define CPU_IAMAX	STARPU_ISAMAX
-
-#define PIVOT_THRESHHOLD	10e-5

+ 0 - 19
nmad/examples/mpi_lu/pdlu.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "pxlu.c"

+ 0 - 19
nmad/examples/mpi_lu/pdlu_implicit.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "pxlu_implicit.c"

+ 0 - 19
nmad/examples/mpi_lu/pdlu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "pxlu_kernels.c"

+ 0 - 597
nmad/examples/mpi_lu/plu_example.c

@@ -1,597 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2011, 2013, 2015, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <math.h>
-#include <starpu.h>
-
-#include "pxlu.h"
-//#include "pxlu_kernels.h"
-
-#ifdef STARPU_HAVE_LIBNUMA
-#include <numaif.h>
-#endif
-
-static unsigned long size = 4096;
-static unsigned nblocks = 16;
-static unsigned check = 0;
-static int p = 1;
-static int q = 1;
-static unsigned display = 0;
-
-#ifdef STARPU_HAVE_LIBNUMA
-static unsigned numa = 0;
-#endif
-
-static size_t allocated_memory = 0;
-static size_t allocated_memory_extra = 0;
-
-static starpu_data_handle_t *dataA_handles;
-static TYPE **dataA;
-
-/* In order to implement the distributed LU decomposition, we allocate
- * temporary buffers */
-#ifdef SINGLE_TMP11
-static starpu_data_handle_t tmp_11_block_handle;
-static TYPE *tmp_11_block;
-#else
-static starpu_data_handle_t *tmp_11_block_handles;
-static TYPE **tmp_11_block;
-#endif
-#ifdef SINGLE_TMP1221
-static starpu_data_handle_t *tmp_12_block_handles;
-static TYPE **tmp_12_block;
-static starpu_data_handle_t *tmp_21_block_handles;
-static TYPE **tmp_21_block;
-#else
-static starpu_data_handle_t *(tmp_12_block_handles[2]);
-static TYPE **(tmp_12_block[2]);
-static starpu_data_handle_t *(tmp_21_block_handles[2]);
-static TYPE **(tmp_21_block[2]);
-#endif
-
-static void parse_args(int rank, int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-size") == 0)
-		{
-			char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-nblocks") == 0)
-		{
-			char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-check") == 0)
-		{
-			check = 1;
-		}
-
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-
-		if (strcmp(argv[i], "-numa") == 0)
-		{
-#ifdef STARPU_HAVE_LIBNUMA
-			numa = 1;
-#else
-			if (rank == 0)
-				fprintf(stderr, "Warning: libnuma is not available\n");
-#endif
-		}
-
-		if (strcmp(argv[i], "-p") == 0)
-		{
-			char *argptr;
-			p = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-q") == 0)
-		{
-			char *argptr;
-			q = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0)
-		{
-			fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]);
-			fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n");
-			exit(0);
-		}
-	}
-}
-
-unsigned STARPU_PLU(display_flag)(void)
-{
-	return display;
-}
-
-static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks)
-{
-	const unsigned block_size = (psize/pnblocks);
-
-	unsigned i, j;
-	for (i = 0; i < block_size; i++)
-	     for (j = 0; j < block_size; j++)
-	     {
-		  blockptr[j+i*block_size] = (TYPE)starpu_drand48();
-	     }
-}
-
-#ifdef SINGLE_TMP11
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void)
-{
-	return tmp_11_block_handle;
-}
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k)
-{
-	return tmp_11_block_handles[k];
-}
-#endif
-
-#ifdef SINGLE_TMP1221
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j)
-{
-	return tmp_12_block_handles[j];
-}
-
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i)
-{
-	return tmp_21_block_handles[i];
-}
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k)
-{
-	return tmp_12_block_handles[k%2][j];
-}
-
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k)
-{
-	return tmp_21_block_handles[k%2][i];
-}
-#endif
-
-static unsigned tmp_11_block_is_needed(int rank, unsigned pnblocks, unsigned k)
-{
-	return 1;
-}
-
-static unsigned tmp_12_block_is_needed(int rank, unsigned pnblocks, unsigned j)
-{
-	unsigned i;
-	for (i = 1; i < pnblocks; i++)
-	{
-		if (get_block_rank(i, j) == rank)
-			return 1;
-	}
-
-	return 0;
-}
-
-static unsigned tmp_21_block_is_needed(int rank, unsigned pnblocks, unsigned i)
-{
-	unsigned j;
-	for (j = 1; j < pnblocks; j++)
-	{
-		if (get_block_rank(i, j) == rank)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void init_matrix(int rank)
-{
-#ifdef STARPU_HAVE_LIBNUMA
-	if (numa)
-	{
-		fprintf(stderr, "Using INTERLEAVE policy\n");
-		unsigned long nodemask = ((1<<0)|(1<<1));
-		int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3);
-		if (ret)
-			perror("set_mempolicy failed");
-	}
-#endif
-
-	/* Allocate a grid of data handles, not all of them have to be allocated later on */
-	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t));
-	dataA = calloc(nblocks*nblocks, sizeof(TYPE *));
-	allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-
-	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
-
-	/* Allocate all the blocks that belong to this mpi node */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			TYPE **blockptr = &dataA[j+i*nblocks];
-//			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-
-			if (get_block_rank(i, j) == rank)
-			{
-				/* This blocks should be treated by the current MPI process */
-				/* Allocate and fill it */
-				starpu_malloc((void **)blockptr, blocksize);
-				allocated_memory += blocksize;
-
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				fill_block_with_random(*blockptr, size, nblocks);
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				if (i == j)
-				{
-					unsigned tmp;
-					for (tmp = 0; tmp < size/nblocks; tmp++)
-					{
-						(*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks;
-					}
-				}
-
-				/* Register it to StarPU */
-				starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM,
-					(uintptr_t)*blockptr, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-				starpu_data_set_coordinates(*handleptr, 2, j, i);
-			}
-			else
-			{
-				*blockptr = STARPU_POISON_PTR;
-				*handleptr = STARPU_POISON_PTR;
-			}
-		}
-	}
-
-	/* Allocate the temporary buffers required for the distributed algorithm */
-
-	unsigned k;
-
-	/* tmp buffer 11 */
-#ifdef SINGLE_TMP11
-	starpu_malloc((void **)&tmp_11_block, blocksize);
-	allocated_memory_extra += blocksize;
-	starpu_matrix_data_register(&tmp_11_block_handle, STARPU_MAIN_RAM, (uintptr_t)tmp_11_block,
-			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-#else
-	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_11_block = calloc(nblocks, sizeof(TYPE *));
-	allocated_memory_extra += nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-
-	for (k = 0; k < nblocks; k++)
-	{
-		if (tmp_11_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_11_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_11_block[k]);
-
-			starpu_matrix_data_register(&tmp_11_block_handles[k], STARPU_MAIN_RAM,
-				(uintptr_t)tmp_11_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-	}
-#endif
-
-	/* tmp buffers 12 and 21 */
-#ifdef SINGLE_TMP1221
-	tmp_12_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_21_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_12_block = calloc(nblocks, sizeof(TYPE *));
-	tmp_21_block = calloc(nblocks, sizeof(TYPE *));
-
-	allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-#else
-	for (i = 0; i < 2; i++)
-	{
-		tmp_12_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t));
-		tmp_21_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t));
-		tmp_12_block[i] = calloc(nblocks, sizeof(TYPE *));
-		tmp_21_block[i] = calloc(nblocks, sizeof(TYPE *));
-
-		allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-	}
-#endif
-
-	for (k = 0; k < nblocks; k++)
-	{
-#ifdef SINGLE_TMP1221
-		if (tmp_12_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_12_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_12_block[k]);
-
-			starpu_matrix_data_register(&tmp_12_block_handles[k], STARPU_MAIN_RAM,
-				(uintptr_t)tmp_12_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-
-		if (tmp_21_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_21_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_21_block[k]);
-
-			starpu_matrix_data_register(&tmp_21_block_handles[k], STARPU_MAIN_RAM,
-				(uintptr_t)tmp_21_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-#else
-	for (i = 0; i < 2; i++)
-	{
-		if (tmp_12_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_12_block[i][k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_12_block[i][k]);
-
-			starpu_matrix_data_register(&tmp_12_block_handles[i][k], STARPU_MAIN_RAM,
-				(uintptr_t)tmp_12_block[i][k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-
-		if (tmp_21_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_21_block[i][k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_21_block[i][k]);
-
-			starpu_matrix_data_register(&tmp_21_block_handles[i][k], STARPU_MAIN_RAM,
-				(uintptr_t)tmp_21_block[i][k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-	}
-#endif
-	}
-
-	//display_all_blocks(nblocks, size/nblocks);
-}
-
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j)
-{
-	return dataA[j+i*nblocks];
-}
-
-int get_block_rank(unsigned i, unsigned j)
-{
-	/* Take a 2D block cyclic distribution */
-	/* NB: p (resp. q) is for "direction" i (resp. j) */
-	return (j % q) * p + (i % p);
-}
-
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
-{
-	return dataA_handles[j+i*nblocks];
-}
-
-static void display_grid(int rank, unsigned pnblocks)
-{
-	if (!display)
-		return;
-
-	//if (rank == 0)
-	{
-		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
-
-		unsigned i, j;
-		for (j = 0; j < pnblocks; j++)
-		{
-			for (i = 0; i < pnblocks; i++)
-			{
-				TYPE *blockptr = STARPU_PLU(get_block)(i, j);
-				starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j);
-
-				fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle);
-			}
-			fprintf(stderr, "\n");
-		}
-	}
-}
-
-int main(int argc, char **argv)
-{
-	int rank;
-	int world_size;
-
-	/*
-	 *	Initialization
-	 */
-	int thread_support;
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
-	{
-		fprintf(stderr,"MPI_Init_thread failed\n");
-		exit(1);
-	}
-	if (thread_support == MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
-	if (thread_support < MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI does not have thread support!\n");
-
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
-
-	starpu_srand48((long int)time(NULL));
-
-	parse_args(rank, argc, argv);
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	/* We disable sequential consistency in this example */
-	starpu_data_set_default_sequential_consistency_flag(0);
-
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	STARPU_ASSERT(p*q == world_size);
-
-	starpu_cublas_init();
-
-	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	/*
-	 * 	Problem Init
-	 */
-
-	init_matrix(rank);
-
-	fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank,
-                        (int)(allocated_memory/(1024*1024)),
-			(int)(allocated_memory_extra/(1024*1024)),
-                        (int)((allocated_memory+allocated_memory_extra)/(1024*1024)));
-
-	display_grid(rank, nblocks);
-
-	TYPE *a_r = NULL;
-//	STARPU_PLU(display_data_content)(a_r, size);
-
-	if (check)
-	{
-		TYPE *x, *y;
-
-		x = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(x);
-
-		y = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			unsigned ind;
-			for (ind = 0; ind < size; ind++)
-				x[ind] = (TYPE)starpu_drand48();
-		}
-
-		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-		if (rank == 0)
-			STARPU_PLU(display_data_content)(a_r, size);
-
-//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
-
-		free(x);
-		free(y);
-	}
-
-	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
-
-	/*
-	 * 	Report performance
-	 */
-
-	int reduce_ret;
-	double min_timing = timing;
-	double max_timing = timing;
-	double sum_timing = timing;
-
-	reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	if (rank == 0)
-	{
-		fprintf(stderr, "Computation took: %f ms\n", max_timing/1000);
-		fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000);
-		fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000);
-		fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000));
-
-		unsigned n = size;
-		double flop = (2.0f*n*n*n)/3.0f;
-		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f));
-	}
-
-	/*
-	 *	Test Result Correctness
-	 */
-
-	if (check)
-	{
-		/*
-		 *	Compute || A - LU ||
-		 */
-
-		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);
-
-#if 0
-		/*
-		 *	Compute || Ax - LUx ||
-		 */
-
-		unsigned ind;
-
-		y2 = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			for (ind = 0; ind < size; ind++)
-			{
-				y2[ind] = (TYPE)0.0;
-			}
-		}
-
-		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
-
-		/* Compute y2 = y2 - y */
-		CPU_AXPY(size, -1.0, y, 1, y2, 1);
-
-		TYPE err = CPU_ASUM(size, y2, 1);
-		int max = CPU_IAMAX(size, y2, 1);
-
-		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
-		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
-#endif
-	}
-
-	/*
-	 * 	Termination
-	 */
-
-	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	starpu_cublas_shutdown();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-#if 0
-	MPI_Finalize();
-#endif
-
-	return 0;
-}

+ 0 - 19
nmad/examples/mpi_lu/plu_example_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_example.c"

+ 0 - 19
nmad/examples/mpi_lu/plu_example_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_example.c"

+ 0 - 369
nmad/examples/mpi_lu/plu_implicit_example.c

@@ -1,369 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2011, 2013, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <math.h>
-#include <starpu.h>
-
-#include "pxlu.h"
-//#include "pxlu_kernels.h"
-
-#ifdef STARPU_HAVE_LIBNUMA
-#include <numaif.h>
-#endif
-
-static unsigned long size = 4096;
-static unsigned nblocks = 16;
-static unsigned check = 0;
-static int p = 1;
-static int q = 1;
-static unsigned display = 0;
-
-#ifdef STARPU_HAVE_LIBNUMA
-static unsigned numa = 0;
-#endif
-
-static size_t allocated_memory = 0;
-static size_t allocated_memory_extra = 0;
-
-static starpu_data_handle_t *dataA_handles;
-static TYPE **dataA;
-
-int get_block_rank(unsigned i, unsigned j);
-
-static void parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-size") == 0)
-		{
-			char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-nblocks") == 0)
-		{
-			char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-check") == 0)
-		{
-			check = 1;
-		}
-
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-
-		if (strcmp(argv[i], "-numa") == 0)
-		{
-#ifdef STARPU_HAVE_LIBNUMA
-			numa = 1;
-#else
-			fprintf(stderr, "Warning: libnuma is not available\n");
-#endif
-		}
-
-		if (strcmp(argv[i], "-p") == 0)
-		{
-			char *argptr;
-			p = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-q") == 0)
-		{
-			char *argptr;
-			q = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0)
-		{
-			fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]);
-			fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n");
-			exit(0);
-		}
-	}
-}
-
-unsigned STARPU_PLU(display_flag)(void)
-{
-	return display;
-}
-
-static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks)
-{
-	const unsigned block_size = (psize/pnblocks);
-
-	unsigned i, j;
-	for (i = 0; i < block_size; i++)
-	     for (j = 0; j < block_size; j++)
-	     {
-		  blockptr[j+i*block_size] = (TYPE)starpu_drand48();
-	     }
-}
-
-static void init_matrix(int rank)
-{
-#ifdef STARPU_HAVE_LIBNUMA
-	if (numa)
-	{
-		fprintf(stderr, "Using INTERLEAVE policy\n");
-		unsigned long nodemask = ((1<<0)|(1<<1));
-		int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3);
-		if (ret)
-			perror("set_mempolicy failed");
-	}
-#endif
-
-	/* Allocate a grid of data handles, not all of them have to be allocated later on */
-	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t));
-	dataA = calloc(nblocks*nblocks, sizeof(TYPE *));
-	allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-
-	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
-
-	/* Allocate all the blocks that belong to this mpi node */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			int block_rank = get_block_rank(i, j);
-			TYPE **blockptr = &dataA[j+i*nblocks];
-//			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-
-			if (block_rank == rank)
-			{
-				/* This blocks should be treated by the current MPI process */
-				/* Allocate and fill it */
-				starpu_malloc((void **)blockptr, blocksize);
-				allocated_memory += blocksize;
-
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				fill_block_with_random(*blockptr, size, nblocks);
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				if (i == j)
-				{
-					unsigned tmp;
-					for (tmp = 0; tmp < size/nblocks; tmp++)
-					{
-						(*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks;
-					}
-				}
-
-				/* Register it to StarPU */
-				starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM,
-					(uintptr_t)*blockptr, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-			}
-			else
-			{
-				starpu_matrix_data_register(handleptr, -1,
-					0, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-				*blockptr = STARPU_POISON_PTR;
-			}
-			starpu_data_set_coordinates(*handleptr, 2, j, i);
-			starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank);
-		}
-	}
-
-	//display_all_blocks(nblocks, size/nblocks);
-}
-
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j)
-{
-	return dataA[j+i*nblocks];
-}
-
-int get_block_rank(unsigned i, unsigned j)
-{
-	/* Take a 2D block cyclic distribution */
-	/* NB: p (resp. q) is for "direction" i (resp. j) */
-	return (j % q) * p + (i % p);
-}
-
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
-{
-	return dataA_handles[j+i*nblocks];
-}
-
-static void display_grid(int rank, unsigned pnblocks)
-{
-	if (!display)
-		return;
-
-	//if (rank == 0)
-	{
-		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
-
-		unsigned i, j;
-		for (j = 0; j < pnblocks; j++)
-		{
-			for (i = 0; i < pnblocks; i++)
-			{
-				TYPE *blockptr = STARPU_PLU(get_block)(i, j);
-				starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j);
-
-				fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle);
-			}
-			fprintf(stderr, "\n");
-		}
-	}
-}
-
-int main(int argc, char **argv)
-{
-	int rank;
-	int world_size;
-
-	starpu_srand48((long int)time(NULL));
-
-	parse_args(argc, argv);
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
-
-	STARPU_ASSERT(p*q == world_size);
-
-	starpu_cublas_init();
-
-	/*
-	 * 	Problem Init
-	 */
-
-	init_matrix(rank);
-
-	fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank,
-                        (int)(allocated_memory/(1024*1024)),
-			(int)(allocated_memory_extra/(1024*1024)),
-                        (int)((allocated_memory+allocated_memory_extra)/(1024*1024)));
-
-	display_grid(rank, nblocks);
-
-	TYPE *a_r = NULL;
-//	STARPU_PLU(display_data_content)(a_r, size);
-
-	if (check)
-	{
-		TYPE *x, *y;
-
-		x = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(x);
-
-		y = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			unsigned ind;
-			for (ind = 0; ind < size; ind++)
-				x[ind] = (TYPE)starpu_drand48();
-		}
-
-		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-		if (rank == 0)
-			STARPU_PLU(display_data_content)(a_r, size);
-
-//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
-
-		free(x);
-		free(y);
-	}
-
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
-
-	/*
-	 * 	Report performance
-	 */
-
-	if (rank == 0)
-	{
-		fprintf(stderr, "Computation took: %f ms\n", timing/1000);
-
-		unsigned n = size;
-		double flop = (2.0f*n*n*n)/3.0f;
-		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-	}
-
-	/*
-	 *	Test Result Correctness
-	 */
-
-	if (check)
-	{
-		/*
-		 *	Compute || A - LU ||
-		 */
-
-		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);
-
-#if 0
-		/*
-		 *	Compute || Ax - LUx ||
-		 */
-
-		unsigned ind;
-
-		y2 = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			for (ind = 0; ind < size; ind++)
-			{
-				y2[ind] = (TYPE)0.0;
-			}
-		}
-
-		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
-
-		/* Compute y2 = y2 - y */
-		CPU_AXPY(size, -1.0, y, 1, y2, 1);
-
-		TYPE err = CPU_ASUM(size, y2, 1);
-		int max = CPU_IAMAX(size, y2, 1);
-
-		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
-		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
-#endif
-	}
-
-	/*
-	 * 	Termination
-	 */
-
-	starpu_cublas_shutdown();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 19
nmad/examples/mpi_lu/plu_implicit_example_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_implicit_example.c"

+ 0 - 19
nmad/examples/mpi_lu/plu_implicit_example_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_implicit_example.c"

+ 0 - 402
nmad/examples/mpi_lu/plu_outofcore_example.c

@@ -1,402 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2011, 2013-2014, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <time.h>
-#include <math.h>
-#include <starpu.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-
-#include "pxlu.h"
-//#include "pxlu_kernels.h"
-
-#ifdef STARPU_HAVE_LIBNUMA
-#include <numaif.h>
-#endif
-
-static unsigned long size = 4096;
-static unsigned nblocks = 16;
-static unsigned check = 0;
-static int p = 1;
-static int q = 1;
-static unsigned display = 0;
-static char *path = "./starpu-ooc-files";
-
-#ifdef STARPU_HAVE_LIBNUMA
-static unsigned numa = 0;
-#endif
-
-static size_t allocated_memory = 0;
-
-static starpu_data_handle_t *dataA_handles;
-
-int get_block_rank(unsigned i, unsigned j);
-
-static void parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-size") == 0)
-		{
-			char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-nblocks") == 0)
-		{
-			char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-check") == 0)
-		{
-			check = 1;
-		}
-
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-
-		if (strcmp(argv[i], "-numa") == 0)
-		{
-#ifdef STARPU_HAVE_LIBNUMA
-			numa = 1;
-#else
-			fprintf(stderr, "Warning: libnuma is not available\n");
-#endif
-		}
-
-		if (strcmp(argv[i], "-p") == 0)
-		{
-			char *argptr;
-			p = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-q") == 0)
-		{
-			char *argptr;
-			q = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-path") == 0)
-		{
-			path = argv[++i];
-		}
-
-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0)
-		{
-			fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q] [-path PATH]\n", argv[0]);
-			fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n");
-			exit(0);
-		}
-	}
-}
-
-unsigned STARPU_PLU(display_flag)(void)
-{
-	return display;
-}
-
-static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks)
-{
-	const unsigned block_size = (psize/pnblocks);
-
-	unsigned i, j;
-	for (i = 0; i < block_size; i++)
-	     for (j = 0; j < block_size; j++)
-	     {
-		  blockptr[j+i*block_size] = (TYPE)starpu_drand48();
-	     }
-}
-
-static void create_matrix()
-{
-	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
-	TYPE *blockptr = malloc(blocksize);
-	int fd;
-	char *filename;
-	unsigned filename_length = strlen(path) + 1 + sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1;
-
-	filename = malloc(filename_length);
-
-	allocated_memory += nblocks*nblocks*blocksize;
-
-	/* Create the whole matrix on the disk */
-	unsigned i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			fill_block_with_random(blockptr, size, nblocks);
-			if (i == j)
-			{
-				unsigned tmp;
-				for (tmp = 0; tmp < size/nblocks; tmp++)
-				{
-					blockptr[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks;
-				}
-			}
-			snprintf(filename, filename_length, "%s/%u,%u", path, i, j);
-			fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0777);
-			if (fd < 0)
-			{
-				perror("open");
-				exit(1);
-			}
-			if (write(fd, blockptr, blocksize) != (starpu_ssize_t) blocksize)
-			{
-				fprintf(stderr,"short write");
-				exit(1);
-			}
-			if (close(fd) < 0)
-			{
-				perror("close");
-				exit(1);
-			}
-		}
-	}
-
-	free(blockptr);
-	free(filename);
-}
-
-static void init_matrix(int rank)
-{
-	/* Allocate a grid of data handles, not all of them have to be allocated later on */
-	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t));
-
-	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
-
-	int disk_node = starpu_disk_register(&starpu_disk_unistd_ops, path, STARPU_MAX(1024*1024, size*size*sizeof(TYPE)));
-	assert(disk_node >= 0);
-
-	char filename[sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1];
-
-	/* Allocate all the blocks that belong to this mpi node */
-	unsigned i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			int block_rank = get_block_rank(i, j);
-//			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-
-			if (block_rank == rank)
-			{
-				void *disk_obj;
-				snprintf(filename, sizeof(filename), "%u,%u", i, j);
-				/* Register it to StarPU */
-				disk_obj = starpu_disk_open(disk_node, filename, blocksize);
-				if (!disk_obj)
-				{
-					fprintf(stderr,"could not open %s\n", filename);
-					exit(1);
-				}
-				starpu_matrix_data_register(handleptr, disk_node,
-					(uintptr_t) disk_obj, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-			}
-			else
-			{
-				starpu_matrix_data_register(handleptr, -1,
-					0, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-			}
-			starpu_data_set_coordinates(*handleptr, 2, j, i);
-			starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank);
-		}
-	}
-
-	//display_all_blocks(nblocks, size/nblocks);
-}
-
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j)
-{
-	/* This does not really make sense in out of core */
-	assert(0);
-}
-
-int get_block_rank(unsigned i, unsigned j)
-{
-	/* Take a 2D block cyclic distribution */
-	/* NB: p (resp. q) is for "direction" i (resp. j) */
-	return (j % q) * p + (i % p);
-}
-
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
-{
-	return dataA_handles[j+i*nblocks];
-}
-
-int main(int argc, char **argv)
-{
-	int rank;
-	int world_size;
-	int ret;
-	unsigned i, j;
-
-	starpu_srand48((long int)time(NULL));
-
-	parse_args(argc, argv);
-
-	ret = mkdir(path, 0777);
-	if (ret != 0 && errno != EEXIST)
-	{
-		fprintf(stderr,"%s does not exist\n", path);
-		exit(1);
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
-
-	STARPU_ASSERT(p*q == world_size);
-
-	starpu_cublas_init();
-
-	/*
-	 * 	Problem Init
-	 */
-
-	if (rank == 0)
-		create_matrix();
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-
-	init_matrix(rank);
-
-	if (rank == 0)
-		fprintf(stderr, "%dMB on disk\n", (int)(allocated_memory/(1024*1024)));
-
-	TYPE *a_r = NULL;
-//	STARPU_PLU(display_data_content)(a_r, size);
-
-	if (check)
-	{
-		TYPE *x, *y;
-
-		x = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(x);
-
-		y = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			unsigned ind;
-			for (ind = 0; ind < size; ind++)
-				x[ind] = (TYPE)starpu_drand48();
-		}
-
-		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-		if (rank == 0)
-			STARPU_PLU(display_data_content)(a_r, size);
-
-//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
-
-		free(x);
-		free(y);
-	}
-
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
-
-	/*
-	 * 	Report performance
-	 */
-
-	if (rank == 0)
-	{
-		fprintf(stderr, "Computation took: %f ms\n", timing/1000);
-
-		unsigned n = size;
-		double flop = (2.0f*n*n*n)/3.0f;
-		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-	}
-
-	/*
-	 *	Test Result Correctness
-	 */
-
-	if (check)
-	{
-		/*
-		 *	Compute || A - LU ||
-		 */
-
-		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);
-
-#if 0
-		/*
-		 *	Compute || Ax - LUx ||
-		 */
-
-		unsigned ind;
-
-		y2 = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			for (ind = 0; ind < size; ind++)
-			{
-				y2[ind] = (TYPE)0.0;
-			}
-		}
-
-		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
-
-		/* Compute y2 = y2 - y */
-		CPU_AXPY(size, -1.0, y, 1, y2, 1);
-
-		TYPE err = CPU_ASUM(size, y2, 1);
-		int max = CPU_IAMAX(size, y2, 1);
-
-		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
-		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
-#endif
-	}
-
-	/*
-	 * 	Termination
-	 */
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			starpu_data_unregister(dataA_handles[j+nblocks*i]);
-		}
-	}
-
-	starpu_cublas_shutdown();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 19
nmad/examples/mpi_lu/plu_outofcore_example_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_outofcore_example.c"

+ 0 - 19
nmad/examples/mpi_lu/plu_outofcore_example_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_outofcore_example.c"

+ 0 - 397
nmad/examples/mpi_lu/plu_solve.c

@@ -1,397 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <math.h>
-#include "pxlu.h"
-
-/*
- *	Various useful functions
- */
-
-static double frobenius_norm(TYPE *v, unsigned n)
-{
-	double sum2 = 0.0;
-
-	/* compute sqrt(Sum(|x|^2)) */
-
-	unsigned i,j;
-	for (j = 0; j < n; j++)
-		for (i = 0; i < n; i++)
-		{
-			double a = fabsl((double)v[i+n*j]);
-			sum2 += a*a;
-		}
-
-	return sqrt(sum2);
-}
-
-void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize)
-{
-	if (!STARPU_PLU(display_flag)())
-		return;
-
-	fprintf(stderr, "DISPLAY BLOCK\n");
-
-	unsigned i, j;
-	for (j = 0; j < blocksize; j++)
-	{
-		for (i = 0; i < blocksize; i++)
-		{
-			fprintf(stderr, "%f ", data[j+i*blocksize]);
-		}
-		fprintf(stderr, "\n");
-	}
-
-	fprintf(stderr, "****\n");
-}
-
-void STARPU_PLU(extract_upper)(unsigned block_size, TYPE *inblock, TYPE *outblock)
-{
-	unsigned li, lj;
-	for (lj = 0; lj < block_size; lj++)
-	{
-		/* Upper block diag is 1 */
-		outblock[lj*(block_size + 1)] = (TYPE)1.0;
-
-		for (li = lj + 1; li < block_size; li++)
-		{
-			outblock[lj + li*block_size] = inblock[lj + li*block_size];
-		}
-	}
-}
-
-void STARPU_PLU(extract_lower)(unsigned block_size, TYPE *inblock, TYPE *outblock)
-{
-	unsigned li, lj;
-	for (lj = 0; lj < block_size; lj++)
-	{
-		for (li = 0; li <= lj; li++)
-		{
-			outblock[lj + li*block_size] = inblock[lj + li*block_size];
-		}
-	}
-}
-
-/*
- *	Compute Ax = y
- */
-
-static void STARPU_PLU(compute_ax_block)(unsigned block_size, TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	fprintf(stderr, "block data %p sub x %p sub y %p\n", block_data, sub_x, sub_y);
-	CPU_GEMV("N", block_size, block_size, 1.0, block_data, block_size, sub_x, 1, 1.0, sub_y, 1);
-}
-
-static void STARPU_PLU(compute_ax_block_upper)(unsigned size, unsigned nblocks,
-				 TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Take a copy of the upper part of the diagonal block */
-	TYPE *upper_block_copy = calloc((block_size)*(block_size), sizeof(TYPE));
-	STARPU_PLU(extract_upper)(block_size, block_data, upper_block_copy);
-
-	STARPU_PLU(compute_ax_block)(block_size, upper_block_copy, sub_x, sub_y);
-
-	free(upper_block_copy);
-}
-
-static void STARPU_PLU(compute_ax_block_lower)(unsigned size, unsigned nblocks,
-				 TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Take a copy of the upper part of the diagonal block */
-	TYPE *lower_block_copy = calloc((block_size)*(block_size), sizeof(TYPE));
-	STARPU_PLU(extract_lower)(block_size, block_data, lower_block_copy);
-
-	STARPU_PLU(compute_ax_block)(size/nblocks, lower_block_copy, sub_x, sub_y);
-
-	free(lower_block_copy);
-}
-
-void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank)
-{
-	/* Create temporary buffers where all MPI processes are going to
-	 * compute Ui x = yi where Ai is the matrix containing the blocks of U
-	 * affected to process i, and 0 everywhere else. We then have y as the
-	 * sum of all yi. */
-	TYPE *yi = calloc(size, sizeof(TYPE));
-
-	fprintf(stderr, "Compute LU\n");
-
-	unsigned block_size = size/nblocks;
-
-	/* Compute UiX = Yi */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		if (get_block_rank(j, j) == rank)
-		{
-			TYPE *block_data = STARPU_PLU(get_block)(j, j);
-			TYPE *sub_x = &x[j*(block_size)];
-			TYPE *sub_yi = &yi[j*(block_size)];
-
-			STARPU_PLU(compute_ax_block_upper)(size, nblocks, block_data, sub_x, sub_yi);
-		}
-
-		for (i = j + 1; i < nblocks; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*(block_size)];
-				TYPE *sub_yi = &yi[j*(block_size)];
-
-				STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi);
-			}
-		}
-	}
-
-	/* Grab Sum Yi in X */
-	MPI_Reduce(yi, x, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-	memset(yi, 0, size*sizeof(TYPE));
-
-//	unsigned ind;
-//	if (rank == 0)
-//	{
-//		fprintf(stderr, "INTERMEDIATE\n");
-//		for (ind = 0; ind < STARPU_MIN(10, size); ind++)
-//		{
-//			fprintf(stderr, "x[%d] = %f\n", ind, (float)x[ind]);
-//		}
-//		fprintf(stderr, "****\n");
-//	}
-
-	/* Everyone needs x */
-	int bcst_ret;
-	bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(bcst_ret == MPI_SUCCESS);
-
-	/* Compute LiX = Yi (with X = UX) */
-	for (j = 0; j < nblocks; j++)
-	{
-		if (j > 0)
-		for (i = 0; i < j; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*(block_size)];
-				TYPE *sub_yi = &yi[j*(block_size)];
-
-				STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi);
-			}
-		}
-
-		if (get_block_rank(j, j) == rank)
-		{
-			TYPE *block_data = STARPU_PLU(get_block)(j, j);
-			TYPE *sub_x = &x[j*(block_size)];
-			TYPE *sub_yi = &yi[j*(block_size)];
-
-			STARPU_PLU(compute_ax_block_lower)(size, nblocks, block_data, sub_x, sub_yi);
-		}
-	}
-
-	/* Grab Sum Yi in Y */
-	MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-
-	free(yi);
-}
-
-
-
-/*
- *	Allocate a contiguous matrix on node 0 and fill it with the whole
- *	content of the matrix distributed accross all nodes.
- */
-
-TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks)
-{
-//	fprintf(stderr, "RECONSTRUCT MATRIX size %d nblocks %d\n", size, nblocks);
-
-	TYPE *bigmatrix = calloc(size*size, sizeof(TYPE));
-
-	unsigned block_size = size/nblocks;
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-
-	unsigned bi, bj;
-	for (bj = 0; bj < nblocks; bj++)
-	for (bi = 0; bi < nblocks; bi++)
-	{
-		TYPE *block = NULL;
-
-		int block_rank = get_block_rank(bi, bj);
-
-		if (block_rank == 0)
-		{
-			block = STARPU_PLU(get_block)(bi, bj);
-		}
-		else
-		{
-			MPI_Status status;
-
-			if (rank == 0)
-			{
-				block = calloc(block_size*block_size, sizeof(TYPE));
-
-				int ret = MPI_Recv(block, block_size*block_size, MPI_TYPE, block_rank, 0, MPI_COMM_WORLD, &status);
-				STARPU_ASSERT(ret == MPI_SUCCESS);
-			}
-			else if (rank == block_rank)
-			{
-				block = STARPU_PLU(get_block)(bi, bj);
-				int ret = MPI_Send(block, block_size*block_size, MPI_TYPE, 0, 0, MPI_COMM_WORLD);
-				STARPU_ASSERT(ret == MPI_SUCCESS);
-			}
-		}
-
-		if (rank == 0)
-		{
-			unsigned j, i;
-			for (j = 0; j < block_size; j++)
-			for (i = 0; i < block_size; i++)
-			{
-				bigmatrix[(j + bj*block_size)+(i+bi*block_size)*size] =
-									block[j+i*block_size];
-			}
-
-			if (get_block_rank(bi, bj) != 0)
-				free(block);
-		}
-	}
-
-	return bigmatrix;
-}
-
-/* x and y must be valid (at least) on 0 */
-void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Send x to everyone */
-	int bcst_ret;
-	bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(bcst_ret == MPI_SUCCESS);
-
-	/* Create temporary buffers where all MPI processes are going to
-	 * compute Ai x = yi where Ai is the matrix containing the blocks of A
-	 * affected to process i, and 0 everywhere else. We then have y as the
-	 * sum of all yi. */
-	TYPE *yi = calloc(size, sizeof(TYPE));
-
-	/* Compute Aix = yi */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*block_size];
-				TYPE *sub_yi = &yi[j*block_size];
-
-				STARPU_PLU(compute_ax_block)(block_size, block_data, sub_x, sub_yi);
-			}
-		}
-	}
-
-	/* Compute the Sum of all yi = y */
-	MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-
-	fprintf(stderr, "RANK %d - FOO 1 y[0] %f\n", rank, y[0]);
-
-	free(yi);
-}
-
-void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved)
-{
-	TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-	unsigned display = STARPU_PLU(display_flag)();
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-
-	if (rank == 0)
-	{
-		TYPE *L = malloc((size_t)size*size*sizeof(TYPE));
-		TYPE *U = malloc((size_t)size*size*sizeof(TYPE));
-
-		memset(L, 0, size*size*sizeof(TYPE));
-		memset(U, 0, size*size*sizeof(TYPE));
-
-		/* only keep the lower part */
-		unsigned i, j;
-		for (j = 0; j < size; j++)
-		{
-			for (i = 0; i < j; i++)
-			{
-				L[j+i*size] = all_r[j+i*size];
-			}
-
-			/* diag i = j */
-			L[j+j*size] = all_r[j+j*size];
-			U[j+j*size] = 1.0;
-
-			for (i = j+1; i < size; i++)
-			{
-				U[j+i*size] = all_r[j+i*size];
-			}
-		}
-
-		STARPU_PLU(display_data_content)(L, size);
-		STARPU_PLU(display_data_content)(U, size);
-
-		/* now A_err = L, compute L*U */
-		CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
-
-		if (display)
-			fprintf(stderr, "\nLU\n");
-
-		STARPU_PLU(display_data_content)(L, size);
-
-		/* compute "LU - A" in L*/
-		CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1);
-
-		TYPE err = CPU_ASUM(size*size, L, 1);
-		int max = CPU_IAMAX(size*size, L, 1);
-
-		if (display)
-			fprintf(stderr, "DISPLAY ERROR\n");
-
-		STARPU_PLU(display_data_content)(L, size);
-
-		fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size));
-		fprintf(stderr, "(A - LU) Max error : %e\n", L[max]);
-
-		double residual = frobenius_norm(L, size);
-		double matnorm = frobenius_norm(Asaved, size);
-
-		fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size));
-	}
-
-	free(all_r);
-}

+ 0 - 19
nmad/examples/mpi_lu/plu_solve_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_solve.c"

+ 0 - 19
nmad/examples/mpi_lu/plu_solve_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_solve.c"

+ 0 - 19
nmad/examples/mpi_lu/pslu.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "pxlu.c"

+ 0 - 19
nmad/examples/mpi_lu/pslu_implicit.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2013  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "pxlu_implicit.c"

+ 0 - 19
nmad/examples/mpi_lu/pslu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "pxlu_kernels.c"

+ 0 - 919
nmad/examples/mpi_lu/pxlu.c

@@ -1,919 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2011, 2014, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2013, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "pxlu.h"
-#include "pxlu_kernels.h"
-#include <sys/time.h>
-
-#define MPI_TAG11(k)	((1U << 16) | (k))
-#define MPI_TAG12(k, j)	((2U << 16) | (k)<<8 | (j))
-#define MPI_TAG21(k, i)	((3U << 16) | (i)<<8 | (k))
-
-// 11 21
-// 12 22
-
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<50) | (unsigned long long)(k)))
-#define TAG12(k,j)	((starpu_tag_t)(((2ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21(k,i)	((starpu_tag_t)(((3ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<50) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-#define TAG11_SAVE(k)	((starpu_tag_t)( (5ULL<<50) | (unsigned long long)(k)))
-#define TAG12_SAVE(k,j)	((starpu_tag_t)(((6ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21_SAVE(k,i)	((starpu_tag_t)(((7ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
-#define TAG11_SAVE_PARTIAL(k)	((starpu_tag_t)( (8ULL<<50) | (unsigned long long)(k)))
-#define TAG12_SAVE_PARTIAL(k,j)	((starpu_tag_t)(((9ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21_SAVE_PARTIAL(k,i)	((starpu_tag_t)(((10ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
-#define STARPU_TAG_INIT	((starpu_tag_t)(11ULL<<50))
-
-//#define VERBOSE_INIT	1
-
-//#define DEBUG	1
-
-static unsigned no_prio = 0;
-
-static unsigned nblocks = 0;
-static int rank = -1;
-static int world_size = -1;
-
-struct callback_arg
-{
-	unsigned i, j, k;
-};
-
-/*
- *	Various
- */
-
-static struct debug_info *create_debug_info(unsigned i, unsigned j, unsigned k)
-{
-	struct debug_info *info = malloc(sizeof(struct debug_info));
-
-	info->i = i;
-	info->j = j;
-	info->k = k;
-
-	return info;
-}
-
-static struct starpu_task *create_task(starpu_tag_t id)
-{
-	struct starpu_task *task = starpu_task_create();
-		task->cl_arg = NULL;
-
-	task->use_tag = 1;
-	task->tag_id = id;
-
-	return task;
-}
-
-/* Send handle to every node appearing in the mask, and unlock tag once the
- * transfers are done. */
-static void send_data_to_mask(starpu_data_handle_t handle, int *rank_mask, int mpi_tag, starpu_tag_t tag)
-{
-	unsigned cnt = 0;
-
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	int rank_array[world_size];
-	MPI_Comm comm_array[world_size];
-	int mpi_tag_array[world_size];
-	starpu_data_handle_t handle_array[world_size];
-
-	int r;
-	for (r = 0; r < world_size; r++)
-	{
-		if (rank_mask[r])
-		{
-			rank_array[cnt] = r;
-
-			comm_array[cnt] = MPI_COMM_WORLD;
-			mpi_tag_array[cnt] = mpi_tag;
-			handle_array[cnt] = handle;
-			cnt++;
-		}
-	}
-
-	if (cnt == 0)
-	{
-		/* In case there is no message to send, we release the tag at
-		 * once */
-		starpu_tag_notify_from_apps(tag);
-	}
-	else
-	{
-		starpu_mpi_isend_array_detached_unlock_tag(cnt, handle_array,
-				rank_array, mpi_tag_array, comm_array, tag);
-	}
-}
-
-/* Initiate a receive request once all dependencies are fulfilled and unlock
- * tag 'unlocked_tag' once it's done. */
-
-struct recv_when_done_callback_arg
-{
-	int source;
-	int mpi_tag;
-	starpu_data_handle_t handle;
-	starpu_tag_t unlocked_tag;
-};
-
-static void callback_receive_when_done(void *_arg)
-{
-	struct recv_when_done_callback_arg *arg = _arg;
-
-	starpu_mpi_irecv_detached_unlock_tag(arg->handle, arg->source,
-			arg->mpi_tag, MPI_COMM_WORLD, arg->unlocked_tag);
-
-	free(arg);
-}
-
-static void receive_when_deps_are_done(unsigned ndeps, starpu_tag_t *deps_tags,
-				int source, int mpi_tag,
-				starpu_data_handle_t handle,
-				starpu_tag_t partial_tag,
-				starpu_tag_t unlocked_tag)
-{
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	struct recv_when_done_callback_arg *arg =
-		malloc(sizeof(struct recv_when_done_callback_arg));
-
-	arg->source = source;
-	arg->mpi_tag = mpi_tag;
-	arg->handle = handle;
-	arg->unlocked_tag = unlocked_tag;
-
-	if (ndeps == 0)
-	{
-		callback_receive_when_done(arg);
-		return;
-	}
-
-	starpu_create_sync_task(partial_tag, ndeps, deps_tags,
-					callback_receive_when_done, arg);
-}
-
-/*
- *	Task 11 (diagonal factorization)
- */
-
-static void create_task_11_recv(unsigned k)
-{
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 11_(k-1) can be used by 12_(k-1)j and
-	 * 21(k-1)i with i,j >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[2*nblocks];
-
-#ifdef SINGLE_TMP11
-	if (k > 0)
-	{
-		unsigned i;
-		for (i = (k-1)+1; i < nblocks; i++)
-		{
-			if (rank == get_block_rank(i, k-1))
-				tag_array[ndeps++] = TAG21(k-1, i);
-		}
-	}
-
-	if (k > 0)
-	{
-		unsigned j;
-		for (j = (k-1)+1; j < nblocks; j++)
-		{
-			if (rank == get_block_rank(k-1, j))
-				tag_array[ndeps++] = TAG12(k-1, j);
-		}
-	}
-#endif
-
-	int source = get_block_rank(k, k);
-#ifdef SINGLE_TMP11
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-	int mpi_tag = MPI_TAG11(k);
-	starpu_tag_t partial_tag = TAG11_SAVE_PARTIAL(k);
-	starpu_tag_t unlocked_tag = TAG11_SAVE(k);
-
-//	fprintf(stderr, "NODE %d - 11 (%d) - recv when done ndeps %d - tag array %lx\n", rank, k, ndeps, tag_array[0]);
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_11(unsigned k, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 11_k is used to compute 12_kj + 12ki with i,j > k */
-	unsigned i;
-	for (i = k+1; i < nblocks; i++)
-	{
-		int r = get_block_rank(i, k);
-		rank_mask[r] = 1;
-	}
-
-	unsigned j;
-	for (j = k+1; j < nblocks; j++)
-	{
-		int r = get_block_rank(k, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_11_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_11(k, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, k);
-	starpu_tag_t tag = TAG11_SAVE(k);
-	int mpi_tag = MPI_TAG11(k);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-
-	free(arg);
-}
-
-static void create_task_11_real(unsigned k)
-{
-	struct starpu_task *task = create_task(TAG11(k));
-
-	task->cl = &STARPU_PLU(cl11);
-
-	task->cl_arg = create_debug_info(k, k, k);
-
-	/* which sub-data is manipulated ? */
-	task->handles[0] = STARPU_PLU(get_block_handle)(k, k);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->k = k;
-
-	task->callback_func = callback_task_11_real;
-	task->callback_arg = arg;
-
-	/* this is an important task */
-	if (!no_prio)
-		task->priority = STARPU_MAX_PRIO;
-
-	/* enforce dependencies ... */
-	if (k > 0)
-	{
-		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
-	}
-	else
-	{
-		starpu_tag_declare_deps(TAG11(k), 1, STARPU_TAG_INIT);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_11(unsigned k)
-{
-	if (get_block_rank(k, k) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 11(%u) (TAG11_SAVE(%u) = %llux) on node %d\n", k, k, (unsigned long long) TAG11_SAVE(k), rank);
-#endif
-		create_task_11_real(k);
-	}
-	else
-	{
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_11(k, rank_mask);
-
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 11(%u) on node %d\n", k, rank);
-#endif
-			create_task_11_recv(k);
-		}
-		else
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 11(%u)\n", rank, k);
-#endif
-		}
-	}
-}
-
-
-
-/*
- *	Task 12 (Update lower left (TRSM))
- */
-
-static void create_task_12_recv(unsigned k, unsigned j)
-{
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 12_(k-1)j can be used by 22_(k-1)ij with
-	 * i >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[nblocks];
-
-	unsigned start;
-	unsigned bound;
-
-#ifdef SINGLE_TMP1221
-	bound = 0;
-	start = (k-1)+1;
-#else
-	bound = 1;
-	start = (k-2)+1;
-#endif
-
-	if (k > bound)
-	{
-		unsigned i;
-		for (i = start; i < nblocks; i++)
-		{
-			if (rank == get_block_rank(i, j))
-#ifdef SINGLE_TMP1221
-				tag_array[ndeps++] = TAG22(k-1, i, j);
-#else
-				tag_array[ndeps++] = TAG22(k-2, i, j);
-#endif
-		}
-	}
-
-	int source = get_block_rank(k, j);
-#ifdef SINGLE_TMP1221
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j);
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j,k);
-#endif
-	int mpi_tag = MPI_TAG12(k, j);
-	starpu_tag_t partial_tag = TAG12_SAVE_PARTIAL(k, j);
-	starpu_tag_t unlocked_tag = TAG12_SAVE(k, j);
-
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_12(unsigned k, unsigned j, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 12_kj is used to compute 22_kij with i > k */
-	unsigned i;
-	for (i = k+1; i < nblocks; i++)
-	{
-		int r = get_block_rank(i, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_12_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-	unsigned j = arg->j;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_12(k, j, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, j);
-	starpu_tag_t tag = TAG12_SAVE(k, j);
-	int mpi_tag = MPI_TAG12(k, j);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-
-	free(arg);
-}
-
-static void create_task_12_real(unsigned k, unsigned j)
-{
-	struct starpu_task *task = create_task(TAG12(k, j));
-
-#ifdef STARPU_DEVEL
-#warning temporary fix :/
-#endif
-//	task->cl = &STARPU_PLU(cl12);
-	task->cl = &STARPU_PLU(cl21);
-
-	task->cl_arg = create_debug_info(j, j, k);
-
-	unsigned diag_block_is_local = (get_block_rank(k, k) == rank);
-
-	starpu_tag_t tag_11_dep;
-
-	/* which sub-data is manipulated ? */
-	starpu_data_handle_t diag_block;
-	if (diag_block_is_local)
-	{
-		diag_block = STARPU_PLU(get_block_handle)(k, k);
-		tag_11_dep = TAG11(k);
-	}
-	else
-	{
-#ifdef SINGLE_TMP11
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-		tag_11_dep = TAG11_SAVE(k);
-	}
-
-	task->handles[0] = diag_block;
-	task->handles[1] = STARPU_PLU(get_block_handle)(k, j);
-
-	STARPU_ASSERT(get_block_rank(k, j) == rank);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->j = j;
-		arg->k = k;
-
-	task->callback_func = callback_task_12_real;
-	task->callback_arg = arg;
-
-	if (!no_prio && (j == k+1))
-	{
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0)
-	{
-		starpu_tag_declare_deps(TAG12(k, j), 2, tag_11_dep, TAG22(k-1, k, j));
-	}
-	else
-	{
-		starpu_tag_declare_deps(TAG12(k, j), 1, tag_11_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_12(unsigned k, unsigned j)
-{
-	if (get_block_rank(k, j) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 12(k = %u, j = %u) on node %d\n", k, j, rank);
-#endif
-		create_task_12_real(k, j);
-	}
-	else
-	{
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_12(k, j, rank_mask);
-
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 12(k = %u, j = %u) on node %d\n", k, j, rank);
-#endif
-			create_task_12_recv(k, j);
-		}
-		else
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 12(k=%u, i=%u)\n", rank, k, j);
-#endif
-		}
-	}
-}
-
-/*
- *	Task 21 (Update upper right (TRSM))
- */
-
-static void create_task_21_recv(unsigned k, unsigned i)
-{
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 21_(k-1)i can be used by 22_(k-1)ij with
-	 * j >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[nblocks];
-
-	unsigned bound;
-	unsigned start;
-
-#ifdef SINGLE_TMP1221
-	bound = 0;
-	start = (k-1)+1;
-#else
-	bound = 1;
-	start = (k-2)+1;
-#endif
-	if (k > bound)
-	{
-		unsigned j;
-		for (j = start; j < nblocks; j++)
-		{
-			if (rank == get_block_rank(i, j))
-#ifdef SINGLE_TMP1221
-				tag_array[ndeps++] = TAG22(k-1, i, j);
-#else
-				tag_array[ndeps++] = TAG22(k-2, i, j);
-		}
-#endif
-	}
-
-	int source = get_block_rank(i, k);
-#ifdef SINGLE_TMP1221
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i);
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i, k);
-#endif
-	int mpi_tag = MPI_TAG21(k, i);
-	starpu_tag_t partial_tag = TAG21_SAVE_PARTIAL(k, i);
-	starpu_tag_t unlocked_tag = TAG21_SAVE(k, i);
-
-//	fprintf(stderr, "NODE %d - 21 (%d, %d) - recv when done ndeps %d - tag array %lx\n", rank, k, i, ndeps, tag_array[0]);
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_21(unsigned k, unsigned i, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 21_ki is used to compute 22_kij with j > k */
-	unsigned j;
-	for (j = k+1; j < nblocks; j++)
-	{
-		int r = get_block_rank(i, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_21_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-	unsigned i = arg->i;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_21(k, i, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(i, k);
-	starpu_tag_t tag = TAG21_SAVE(k, i);
-	int mpi_tag = MPI_TAG21(k, i);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-
-	free(arg);
-}
-
-static void create_task_21_real(unsigned k, unsigned i)
-{
-	struct starpu_task *task = create_task(TAG21(k, i));
-
-#ifdef STARPU_DEVEL
-#warning temporary fix
-#endif
-//	task->cl = &STARPU_PLU(cl21);
-	task->cl = &STARPU_PLU(cl12);
-
-	task->cl_arg = create_debug_info(i, i, k);
-
-	unsigned diag_block_is_local = (get_block_rank(k, k) == rank);
-
-	starpu_tag_t tag_11_dep;
-
-	/* which sub-data is manipulated ? */
-	starpu_data_handle_t diag_block;
-	if (diag_block_is_local)
-	{
-		diag_block = STARPU_PLU(get_block_handle)(k, k);
-		tag_11_dep = TAG11(k);
-	}
-	else
-	{
-#ifdef SINGLE_TMP11
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-		tag_11_dep = TAG11_SAVE(k);
-	}
-
-	task->handles[0] = diag_block;
-	task->handles[1] = STARPU_PLU(get_block_handle)(i, k);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->i = i;
-		arg->k = k;
-
-	task->callback_func = callback_task_21_real;
-	task->callback_arg = arg;
-
-	if (!no_prio && (i == k+1))
-	{
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0)
-	{
-		starpu_tag_declare_deps(TAG21(k, i), 2, tag_11_dep, TAG22(k-1, i, k));
-	}
-	else
-	{
-		starpu_tag_declare_deps(TAG21(k, i), 1, tag_11_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_21(unsigned k, unsigned i)
-{
-	if (get_block_rank(i, k) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 21(k = %u, i = %u) on node %d\n", k, i, rank);
-#endif
-		create_task_21_real(k, i);
-	}
-	else
-	{
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_21(k, i, rank_mask);
-
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 21(k = %u, i = %u) on node %d\n", k, i, rank);
-#endif
-			create_task_21_recv(k, i);
-		}
-		else
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 21(k=%u, i=%u)\n", rank, k,i);
-#endif
-		}
-	}
-}
-
-/*
- *	Task 22 (GEMM)
- */
-
-static void create_task_22_real(unsigned k, unsigned i, unsigned j)
-{
-//	printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22(k,i,j));
-
-	struct starpu_task *task = create_task(TAG22(k, i, j));
-
-	task->cl = &STARPU_PLU(cl22);
-
-	task->cl_arg = create_debug_info(i, j, k);
-
-	/* which sub-data is manipulated ? */
-
-	/* produced by TAG21_SAVE(k, i) */
-	unsigned block21_is_local = (get_block_rank(i, k) == rank);
-	starpu_tag_t tag_21_dep;
-
-	starpu_data_handle_t block21;
-	if (block21_is_local)
-	{
-		block21 = STARPU_PLU(get_block_handle)(i, k);
-		tag_21_dep = TAG21(k, i);
-	}
-	else
-	{
-#ifdef SINGLE_TMP1221
-		block21 = STARPU_PLU(get_tmp_21_block_handle)(i);
-#else
-		block21 = STARPU_PLU(get_tmp_21_block_handle)(i, k);
-#endif
-		tag_21_dep = TAG21_SAVE(k, i);
-	}
-
-	/* produced by TAG12_SAVE(k, j) */
-	unsigned block12_is_local = (get_block_rank(k, j) == rank);
-	starpu_tag_t tag_12_dep;
-
-	starpu_data_handle_t block12;
-	if (block12_is_local)
-	{
-	//	block12 = STARPU_PLU(get_block_handle)(j, k);
-		block12 = STARPU_PLU(get_block_handle)(k, j);
-		tag_12_dep = TAG12(k, j);
-	}
-	else
-	{
-#ifdef SINGLE_TMP1221
-		block12 = STARPU_PLU(get_tmp_12_block_handle)(j);
-#else
-		block12 = STARPU_PLU(get_tmp_12_block_handle)(j, k);
-#endif
-		tag_12_dep = TAG12_SAVE(k, j);
-	}
-
-
-
-#ifdef STARPU_DEVEL
-#warning temporary fix :/
-#endif
-	//task->handles[0] = block21;
-	task->handles[0] = block12;
-
-	//task->handles[1] = block12;
-	task->handles[1] = block21;
-
-	/* produced by TAG22(k-1, i, j) */
-	task->handles[2] = STARPU_PLU(get_block_handle)(i, j);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[2] != STARPU_POISON_PTR);
-
-	if (!no_prio && (i == k + 1) && (j == k +1) )
-	{
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0)
-	{
-		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), tag_12_dep, tag_21_dep);
-	}
-	else
-	{
-		starpu_tag_declare_deps(TAG22(k, i, j), 2, tag_12_dep, tag_21_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_22(unsigned k, unsigned i, unsigned j)
-{
-	if (get_block_rank(i, j) == rank)
-	{
-	//	fprintf(stderr, "CREATE real task 22(k = %d, i = %d, j = %d) on node %d\n", k, i, j, rank);
-		create_task_22_real(k, i, j);
-	}
-//	else
-//	{
-//		fprintf(stderr, "Node %d needs not 22(k=%d, i=%d, j = %d)\n", rank, k,i,j);
-//	}
-}
-
-static void wait_tag_and_fetch_handle(starpu_tag_t tag, starpu_data_handle_t handle)
-{
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	starpu_tag_wait(tag);
-//	fprintf(stderr, "Rank %d : tag %lx is done\n", rank, tag);
-
-	starpu_data_acquire(handle, STARPU_R);
-
-//	starpu_data_unregister(handle);
-}
-
-static void wait_termination(void)
-{
-	unsigned k, i, j;
-	for (k = 0; k < nblocks; k++)
-	{
-		/* Wait task 11k if needed */
-		if (get_block_rank(k, k) == rank)
-		{
-			starpu_data_handle_t diag_block = STARPU_PLU(get_block_handle)(k, k);
-			wait_tag_and_fetch_handle(TAG11_SAVE(k), diag_block);
-		}
-
-
-		for (i = k + 1; i < nblocks; i++)
-		{
-			/* Wait task 21ki if needed */
-			if (get_block_rank(i, k) == rank)
-			{
-				starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(i, k);
-				//starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(k, i);
-				//fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21);
-				wait_tag_and_fetch_handle(TAG21_SAVE(k, i), block21);
-			}
-		}
-
-		for (j = k + 1; j < nblocks; j++)
-		{
-			/* Wait task 12kj if needed */
-			if (get_block_rank(k, j) == rank)
-			{
-				//starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(j, k);
-				starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(k, j);
-				//fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12);
-				wait_tag_and_fetch_handle(TAG12_SAVE(k, j), block12);
-			}
-		}
-	}
-}
-
-/*
- *	code to bootstrap the factorization
- */
-
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
-{
-	double start;
-	double end;
-
-	nblocks = _nblocks;
-	rank = _rank;
-	world_size = _world_size;
-
-	/* create all the DAG nodes */
-	unsigned i,j,k;
-
-	for (k = 0; k < nblocks; k++)
-	{
-		starpu_iteration_push(k);
-
-		create_task_11(k);
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			create_task_12(k, i);
-			create_task_21(k, i);
-		}
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			for (j = k+1; j<nblocks; j++)
-			{
-				create_task_22(k, i, j);
-			}
-		}
-		starpu_iteration_pop();
-	}
-
-	int barrier_ret = starpu_mpi_barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	/* schedule the codelet */
-	start = starpu_timing_now();
-
-	starpu_tag_notify_from_apps(STARPU_TAG_INIT);
-
-	wait_termination();
-
-	end = starpu_timing_now();
-
-	double timing = end - start;
-
-//	fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
-
-	return timing;
-}

+ 0 - 69
nmad/examples/mpi_lu/pxlu.h

@@ -1,69 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2014, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __PXLU_H__
-#define __PXLU_H__
-
-#include <starpu.h>
-#include <common/blas.h>
-#include <starpu_mpi.h>
-#ifdef STARPU_USE_CUDA
-#include <cublas.h>
-#endif
-
-#define BLAS3_FLOP(n1,n2,n3)    \
-        (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
-
-//#define SINGLE_TMP11	1
-//#define SINGLE_TMP1221	1
-
-struct debug_info
-{
-	unsigned i;
-	unsigned j;
-	unsigned k;
-};
-
-double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
-
-TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
-void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);
-
-unsigned STARPU_PLU(display_flag)(void);
-
-void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
-void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j);
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j);
-#ifdef SINGLE_TMP11
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void);
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k);
-#endif
-#ifdef SINGLE_TMP1221
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j);
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i);
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k);
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k);
-#endif
-
-void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize);
-
-int get_block_rank(unsigned i, unsigned j);
-
-#endif // __PXLU_H__

+ 0 - 184
nmad/examples/mpi_lu/pxlu_implicit.c

@@ -1,184 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2011, 2013-2015, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2013, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "pxlu.h"
-#include "pxlu_kernels.h"
-#include <sys/time.h>
-
-//#define VERBOSE_INIT	1
-
-//#define DEBUG	1
-
-static unsigned no_prio = 0;
-
-static unsigned nblocks = 0;
-static int rank = -1;
-static int world_size = -1;
-
-struct callback_arg
-{
-	unsigned i, j, k;
-};
-
-/*
- *	Task 11 (diagonal factorization)
- */
-
-static void create_task_11(unsigned k)
-{
-	starpu_mpi_task_insert(MPI_COMM_WORLD,
-			       &STARPU_PLU(cl11),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_RW, STARPU_PLU(get_block_handle)(k, k),
-			       STARPU_PRIORITY, !no_prio ?
-			       STARPU_MAX_PRIO : STARPU_MIN_PRIO,
-			       0);
-}
-
-/*
- *	Task 12 (Update lower left (TRSM))
- */
-
-static void create_task_12(unsigned k, unsigned j)
-{
-#ifdef STARPU_DEVEL
-#warning temporary fix 
-#endif
-	starpu_mpi_task_insert(MPI_COMM_WORLD,
-			       //&STARPU_PLU(cl12),
-			       &STARPU_PLU(cl21),
-			       STARPU_VALUE, &j, sizeof(j),
-			       STARPU_VALUE, &j, sizeof(j),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_R, STARPU_PLU(get_block_handle)(k, k),
-			       STARPU_RW, STARPU_PLU(get_block_handle)(k, j),
-			       STARPU_PRIORITY, !no_prio && (j == k+1) ?
-			       STARPU_MAX_PRIO : STARPU_MIN_PRIO,
-			       0);
-}
-
-/*
- *	Task 21 (Update upper right (TRSM))
- */
-
-static void create_task_21(unsigned k, unsigned i)
-{
-#ifdef STARPU_DEVEL
-#warning temporary fix 
-#endif
-	starpu_mpi_task_insert(MPI_COMM_WORLD,
-			       //&STARPU_PLU(cl21),
-			       &STARPU_PLU(cl12),
-			       STARPU_VALUE, &i, sizeof(i),
-			       STARPU_VALUE, &i, sizeof(i),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_R, STARPU_PLU(get_block_handle)(k, k),
-			       STARPU_RW, STARPU_PLU(get_block_handle)(i, k),
-			       STARPU_PRIORITY, !no_prio && (i == k+1) ?
-			       STARPU_MAX_PRIO : STARPU_MIN_PRIO,
-			       0);
-}
-
-/*
- *	Task 22 (GEMM)
- */
-
-static void create_task_22(unsigned k, unsigned i, unsigned j)
-{
-	starpu_mpi_task_insert(MPI_COMM_WORLD,
-			       &STARPU_PLU(cl22),
-			       STARPU_VALUE, &i, sizeof(i),
-			       STARPU_VALUE, &j, sizeof(j),
-			       STARPU_VALUE, &k, sizeof(k),
-			       STARPU_R, STARPU_PLU(get_block_handle)(k, j),
-			       STARPU_R, STARPU_PLU(get_block_handle)(i, k),
-			       STARPU_RW, STARPU_PLU(get_block_handle)(i, j),
-			       STARPU_PRIORITY, !no_prio && (i == k + 1) && (j == k +1) ?
-			       STARPU_MAX_PRIO : STARPU_MIN_PRIO,
-			       0);
-}
-
-/*
- *	code to bootstrap the factorization 
- */
-
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
-{
-	double start;
-	double end;
-
-	nblocks = _nblocks;
-	rank = _rank;
-	world_size = _world_size;
-
-	/* create all the DAG nodes */
-	unsigned i,j,k;
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-
-	start = starpu_timing_now();
-
-	for (k = 0; k < nblocks; k++)
-	{
-		starpu_iteration_push(k);
-
-		create_task_11(k);
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			create_task_12(k, i);
-			create_task_21(k, i);
-		}
-
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,k));
-		if (get_block_rank(k, k) == _rank)
-			starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,k));
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			for (j = k+1; j<nblocks; j++)
-			{
-				create_task_22(k, i, j);
-			}
-		}
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,i));
-			if (get_block_rank(k, i) == _rank)
-				starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,i));
-			starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(i,k));
-			if (get_block_rank(i, k) == _rank)
-				starpu_data_wont_use(STARPU_PLU(get_block_handle)(i,k));
-		}
-		starpu_iteration_pop();
-	}
-
-	starpu_task_wait_for_all();
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-
-	end = starpu_timing_now();
-
-	double timing = end - start;
-	
-//	fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
-	
-	return timing;
-}

+ 0 - 454
nmad/examples/mpi_lu/pxlu_kernels.c

@@ -1,454 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "pxlu.h"
-#include "pxlu_kernels.h"
-#include <math.h>
-
-///#define VERBOSE_KERNELS	1
-
-/*
- * U22
- */
-
-static inline void STARPU_PLU(common_u22)(void *descr[],
-				int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	TYPE *right 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-	unsigned dx = STARPU_MATRIX_GET_NX(descr[2]);
-	unsigned dy = STARPU_MATRIX_GET_NY(descr[2]);
-	unsigned dz = STARPU_MATRIX_GET_NY(descr[0]);
-
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-	unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]);
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 22 %d - k = %u i = %u j = %u\n", rank, info->k, info->i, info->j);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-	cudaError_t cures;
-#endif
-
-	switch (s)
-	{
-		case 0:
-			CPU_GEMM("N", "N", dy, dx, dz,
-				(TYPE)-1.0, right, ld21, left, ld12,
-				(TYPE)1.0, center, ld22);
-			break;
-
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_GEMM('n', 'n', dx, dy, dz,
-				(TYPE)-1.0, right, ld21, left, ld12,
-				(TYPE)1.0f, center, ld22);
-
-			status = cublasGetError();
-			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			if (STARPU_UNLIKELY((cures = cudaStreamSynchronize(starpu_cuda_get_local_stream())) != cudaSuccess))
-				STARPU_CUDA_REPORT_ERROR(cures);
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-#ifdef VERBOSE_KERNELS
-	fprintf(stderr, "KERNEL 22 %d - k = %u i = %u j = %u done\n", rank, info->k, info->i, info->j);
-#endif
-}
-
-static void STARPU_PLU(cpu_u22)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u22)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u22)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u22)(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_22) =
-{
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_22_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_22)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl22) =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u22)},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u22)},
-#endif
-	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_22)
-};
-
-
-/*
- * U12
- */
-
-static inline void STARPU_PLU(common_u12)(void *descr[],
-				int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	TYPE *sub11;
-	TYPE *sub12;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-	sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]);
-	unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]);
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-#warning fixed debugging according to other tweak
-	//fprintf(stderr, "KERNEL 12 %d - k = %u i %u\n", rank, info->k, info->i);
-	fprintf(stderr, "KERNEL 21 %d - k = %u i %u\n", rank, info->k, info->j);
-
-	//fprintf(stderr, "INPUT 12 U11\n");
-	fprintf(stderr, "INPUT 21 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx12);
-	//fprintf(stderr, "INPUT 12 U12\n");
-	fprintf(stderr, "INPUT 21 U21\n");
-	STARPU_PLU(display_data_content)(sub12, nx12);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-	cudaError_t cures;
-#endif
-
-	/* solve L11 U12 = A12 (find U12) */
-	switch (s)
-	{
-		case 0:
-			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
-					(TYPE)1.0, sub11, ld11, sub12, ld12);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_TRSM('L', 'L', 'N', 'N', ny12, nx12,
-					(TYPE)1.0, sub11, ld11, sub12, ld12);
-
-			status = cublasGetError();
-			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			if (STARPU_UNLIKELY((cures = cudaStreamSynchronize(starpu_cuda_get_local_stream())) != cudaSuccess))
-				STARPU_CUDA_REPORT_ERROR(cures);
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-
-#ifdef VERBOSE_KERNELS
-	//fprintf(stderr, "OUTPUT 12 U12\n");
-	fprintf(stderr, "OUTPUT 21 U21\n");
-	STARPU_PLU(display_data_content)(sub12, nx12);
-#endif
-}
-
-static void STARPU_PLU(cpu_u12)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u12)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u12)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u12)(descr, 1, _args);
-}
-#endif // STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_12) =
-{
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_12_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_12)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl12) =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u12)},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u12)},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_12)
-};
-
-
-/*
- * U21
- */
-
-static inline void STARPU_PLU(common_u21)(void *descr[],
-				int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	TYPE *sub11;
-	TYPE *sub21;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]);
-	unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]);
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-#warning fixed debugging according to other tweak
-	//fprintf(stderr, "KERNEL 21 %d (k = %u, i = %u)\n", rank, info->k, info->i);
-	fprintf(stderr, "KERNEL 12 %d (k = %u, j = %u)\n", rank, info->k, info->j);
-
-	//fprintf(stderr, "INPUT 21 U11\n");
-	fprintf(stderr, "INPUT 12 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx21);
-	//fprintf(stderr, "INPUT 21 U21\n");
-	fprintf(stderr, "INPUT 12 U12\n");
-	STARPU_PLU(display_data_content)(sub21, nx21);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-#endif
-
-
-	switch (s)
-	{
-		case 0:
-			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
-					(TYPE)1.0, sub11, ld11, sub21, ld21);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_TRSM('R', 'U', 'N', 'U', ny21, nx21,
-					(TYPE)1.0, sub11, ld11, sub21, ld21);
-
-			status = cublasGetError();
-			if (status != CUBLAS_STATUS_SUCCESS)
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-
-#ifdef VERBOSE_KERNELS
-	//fprintf(stderr, "OUTPUT 21 U11\n");
-	fprintf(stderr, "OUTPUT 12 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx21);
-	//fprintf(stderr, "OUTPUT 21 U21\n");
-	fprintf(stderr, "OUTPUT 12 U12\n");
-	STARPU_PLU(display_data_content)(sub21, nx21);
-#endif
-}
-
-static void STARPU_PLU(cpu_u21)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u21)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u21)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u21)(descr, 1, _args);
-}
-#endif
-
-static struct starpu_perfmodel STARPU_PLU(model_21) =
-{
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_21_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_21)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl21) =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u21)},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u21)},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_21)
-};
-
-
-/*
- *	U11
- */
-
-static inline void STARPU_PLU(common_u11)(void *descr[],
-				int s, STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	TYPE *sub11;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-	unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]);
-	unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]);
-
-	unsigned long z;
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 11 %d - k = %u\n", rank, info->k);
-#endif
-
-	switch (s)
-	{
-		case 0:
-			for (z = 0; z < nx; z++)
-			{
-				TYPE pivot;
-				pivot = sub11[z+z*ld];
-				STARPU_ASSERT(pivot != 0.0);
-
-				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
-
-				CPU_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
-			}
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			for (z = 0; z < nx; z++)
-			{
-				TYPE pivot;
-				cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
-				cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-				STARPU_ASSERT(pivot != 0.0);
-
-				CUBLAS_SCAL(nx - z - 1, 1.0/pivot, &sub11[z+(z+1)*ld], ld);
-
-				CUBLAS_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
-			}
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-#ifdef VERBOSE_KERNELS
-	fprintf(stderr, "KERNEL 11 %d - k = %u\n", rank, info->k);
-#endif
-}
-
-static void STARPU_PLU(cpu_u11)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u11)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u11)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u11)(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_11) =
-{
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_11_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_11)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl11) =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u11)},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u11)},
-#endif
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-	.model = &STARPU_PLU(model_11)
-};

+ 0 - 32
nmad/examples/mpi_lu/pxlu_kernels.h

@@ -1,32 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2012  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __PXLU_KERNELS_H__
-#define __PXLU_KERNELS_H__
-
-#include <starpu.h>
-
-#define str(s) #s
-#define xstr(s)        str(s)
-#define STARPU_PLU_STR(name)  xstr(STARPU_PLU(name))
-
-struct starpu_codelet STARPU_PLU(cl11);
-struct starpu_codelet STARPU_PLU(cl12);
-struct starpu_codelet STARPU_PLU(cl21);
-struct starpu_codelet STARPU_PLU(cl22);
-
-#endif // __PXLU_KERNELS_H__

+ 0 - 19
nmad/examples/mpi_lu/slu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "xlu_kernels.c"

+ 0 - 108
nmad/examples/native_fortran/nf_basic_ring.f90

@@ -1,108 +0,0 @@
-! StarPU --- Runtime system for heterogeneous multicore architectures.
-!
-! Copyright (C) 2016  Inria
-!
-! StarPU is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at
-! your option) any later version.
-!
-! StarPU is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-!
-! See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-program nf_basic_ring
-        use iso_c_binding       ! C interfacing module
-        use fstarpu_mod         ! StarPU interfacing module
-        use fstarpu_mpi_mod     ! StarPU-MPI interfacing module
-        implicit none
-
-        integer(c_int) :: ncpu
-        integer(c_int) :: ret
-        integer(c_int) :: rank,sz
-        integer(c_int),target :: token = 42
-        integer(c_int) :: nloops = 32
-        integer(c_int) :: loop
-        integer(c_int) :: tag
-        integer(c_int) :: world
-        integer(c_int) :: src,dst
-        type(c_ptr) :: token_dh, st
-
-        ret = fstarpu_init(C_NULL_PTR)
-        if (ret == -19) then
-                stop 77
-        else if (ret /= 0) then
-                stop 1
-        end if
-
-        ret = fstarpu_mpi_init(1)
-        print *,"fstarpu_mpi_init status:", ret
-        if (ret /= 0) then
-                stop 1
-        end if
-
-        ! stop there if no CPU worker available
-        ncpu = fstarpu_cpu_worker_get_count()
-        if (ncpu == 0) then
-                call fstarpu_shutdown()
-                ret = fstarpu_mpi_shutdown()
-                stop 77
-        end if
-
-        world = fstarpu_mpi_world_comm()
-        rank = fstarpu_mpi_world_rank()
-        sz = fstarpu_mpi_world_size()
-        write(*,*) "rank=", rank,"size=",sz,"world=",world
-        if (sz < 2) then
-                call fstarpu_shutdown()
-                ret = fstarpu_mpi_shutdown()
-                stop 77
-        end if
-
-        call fstarpu_variable_data_register(token_dh, 0, c_loc(token), c_sizeof(token))
-
-        st = fstarpu_mpi_status_alloc()
-        do loop=1,nloops
-                tag = loop*sz+rank
-                token = 0
-                if (loop == 1.and.rank == 0) then
-                        write(*,*) "rank=", rank,"token=",token
-                else
-                        src = modulo((rank+sz-1),sz)
-                        write(*,*) "rank=", rank,"recv--> src =", src, "tag =", tag
-                        ret = fstarpu_mpi_recv(token_dh, src, tag, world, st)
-                        if (ret /= 0) then
-                                write(*,*) "fstarpu_mpi_recv failed"
-                                stop 1
-                        end if
-                        write(*,*) "rank=", rank,"recv<--","token=",token
-                        token = token+1
-                end if
-                if (loop == nloops.and.rank == (sz-1)) then
-                        call fstarpu_data_acquire(token_dh, FSTARPU_R)
-                        write(*,*) "finished: rank=", rank,"token=",token
-                        call fstarpu_data_release(token_dh)
-                else
-                        dst = modulo((rank+1),sz)
-                        write(*,*) "rank=", rank,"send--> dst =", dst, "tag =", tag+1
-                        ret = fstarpu_mpi_send(token_dh, dst, tag+1, world)
-                        if (ret /= 0) then
-                                write(*,*) "fstarpu_mpi_recv failed"
-                                stop 1
-                        end if
-                        write(*,*) "rank=", rank,"send<--"
-                end if
-        end do
-        call fstarpu_mpi_status_free(st)
-        call fstarpu_data_unregister(token_dh)
-        call fstarpu_shutdown()
-
-        ret = fstarpu_mpi_shutdown()
-        print *,"fstarpu_mpi_shutdown status:", ret
-        if (ret /= 0) then
-                stop 1
-        end if
-end program nf_basic_ring
-

+ 0 - 236
nmad/examples/native_fortran/nf_mm.f90

@@ -1,236 +0,0 @@
-! StarPU --- Runtime system for heterogeneous multicore architectures.
-!
-! Copyright (C) 2016  Inria
-!
-! StarPU is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at
-! your option) any later version.
-!
-! StarPU is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-!
-! See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-program nf_mm
-        use iso_c_binding       ! C interfacing module
-        use fstarpu_mod         ! StarPU interfacing module
-        use fstarpu_mpi_mod     ! StarPU-MPI interfacing module
-        use nf_mm_cl
-        implicit none
-
-        logical, parameter :: verbose = .false.
-        integer(c_int) :: comm_rank, comm_size, comm_world
-        integer(c_int) :: N = 16, BS = 4, NB
-        real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
-        type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
-        type(c_ptr) :: cl_mm
-        integer(c_int) :: ncpu
-        integer(c_int) :: ret
-        integer(c_int) :: row, col
-        integer(c_int) :: b_row, b_col
-        integer(c_int) :: mr, tag, rank
-
-        ret = fstarpu_init(C_NULL_PTR)
-        if (ret == -19) then
-                stop 77
-        else if (ret /= 0) then
-                stop 1
-        end if
-
-        ret = fstarpu_mpi_init(1)
-        print *,"fstarpu_mpi_init status:", ret
-        if (ret /= 0) then
-                stop 1
-        end if
-
-        ! stop there if no CPU worker available
-        ncpu = fstarpu_cpu_worker_get_count()
-        if (ncpu == 0) then
-                call fstarpu_shutdown()
-                stop 77
-        end if
-
-        comm_world = fstarpu_mpi_world_comm()
-        comm_size = fstarpu_mpi_world_size()
-        comm_rank = fstarpu_mpi_world_rank()
-
-        if (comm_size < 2) then
-                call fstarpu_shutdown()
-                ret = fstarpu_mpi_shutdown()
-                stop 77
-        end if
-
-        ! TODO: process app's argc/argv
-        NB = N/BS
-
-        ! allocate and initialize codelet
-        cl_mm = fstarpu_codelet_allocate()
-        call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char)
-        call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult))
-        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
-        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
-        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
-
-        ! allocate matrices
-        if (comm_rank == 0) then
-                allocate(A(N,N))
-                allocate(B(N,N))
-                allocate(C(N,N))
-        end if
-
-        ! init matrices
-        if (comm_rank == 0) then
-                do col=1,N
-                do row=1,N
-                if (row == col) then
-                        A(row,col) = 2
-                else
-                        A(row,col) = 0
-                end if
-                B(row,col) = row*N+col
-                C(row,col) = 0
-                end do
-                end do
-
-                if (verbose) then
-                        print *,"A"
-                        call mat_disp(A)
-                        print *,"B"
-                        call mat_disp(B)
-                        print *,"C"
-                        call mat_disp(C)
-                end if
-        end if
-
-        ! allocate data handles
-        allocate(dh_A(NB))
-        allocate(dh_B(NB))
-        allocate(dh_C(NB,NB))
-
-        ! register matrices
-        if (comm_rank == 0) then
-                mr = 0 ! TODO: use STARPU_MAIN_RAM constant
-        else
-                mr = -1
-        end if
-        tag = 0
-
-        do b_row=1,NB
-                if (comm_rank == 0) then
-                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
-                                c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1)))
-                else
-                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
-                                c_null_ptr, N, BS, N, c_sizeof(A(1,1)))
-                end if
-                call fstarpu_mpi_data_register(dh_A(b_row), tag, 0)
-                tag = tag+1
-        end do
-
-        do b_col=1,NB
-                if (comm_rank == 0) then
-                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
-                                c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1)))
-                else
-                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
-                                c_null_ptr, N, N, BS, c_sizeof(B(1,1)))
-                end if
-                call fstarpu_mpi_data_register(dh_B(b_col), tag, 0)
-                tag = tag+1
-        end do
-
-        do b_col=1,NB
-        do b_row=1,NB
-                if (comm_rank == 0) then
-                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
-                                c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1)))
-                else
-                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
-                                c_null_ptr, N, BS, BS, c_sizeof(C(1,1)))
-                end if
-                call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0)
-                tag = tag+1
-        end do
-        end do
-
-        ! distribute matrix C
-        do b_col=1,NB
-        do b_row=1,NB
-        rank = modulo(b_row+b_col, comm_size)
-        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank)
-        end do
-        end do
-
-        do b_col=1,NB
-        do b_row=1,NB
-                ret = fstarpu_mpi_task_insert(comm_world, (/ cl_mm, &
-                        FSTARPU_R,  dh_A(b_row), &
-                        FSTARPU_R,  dh_B(b_col), &
-                        FSTARPU_RW, dh_C(b_row,b_col), &
-                        C_NULL_PTR /))
-        end do
-        end do
-
-        call fstarpu_task_wait_for_all()
-
-        ! undistribute matrix C
-        do b_col=1,NB
-        do b_row=1,NB
-        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0)
-        end do
-        end do
-
-        ! unregister matrices
-        do b_row=1,NB
-                call fstarpu_data_unregister(dh_A(b_row))
-        end do
-
-        do b_col=1,NB
-                call fstarpu_data_unregister(dh_B(b_col))
-        end do
-
-        do b_col=1,NB
-        do b_row=1,NB
-                call fstarpu_data_unregister(dh_C(b_row,b_col))
-        end do
-        end do
-
-        ! check result
-        if (comm_rank == 0) then
-                if (verbose) then
-                        print *,"final C"
-                        call mat_disp(C)
-                end if
-
-                do col=1,N
-                do row=1,N
-                if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then
-                        print *, "check failed"
-                        stop 1
-                end if
-                end do
-                end do
-        end if
-
-        ! free handles
-        deallocate(dh_A)
-        deallocate(dh_B)
-        deallocate(dh_C)
-
-        ! free matrices
-        if (comm_rank == 0) then
-                deallocate(A)
-                deallocate(B)
-                deallocate(C)
-        end if
-        call fstarpu_codelet_free(cl_mm)
-        call fstarpu_shutdown()
-
-        ret = fstarpu_mpi_shutdown()
-        print *,"fstarpu_mpi_shutdown status:", ret
-        if (ret /= 0) then
-                stop 1
-        end if
-end program nf_mm

+ 0 - 90
nmad/examples/native_fortran/nf_mm_cl.f90

@@ -1,90 +0,0 @@
-! StarPU --- Runtime system for heterogeneous multicore architectures.
-!
-! Copyright (C) 2016  Inria
-!
-! StarPU is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at
-! your option) any later version.
-!
-! StarPU is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-!
-! See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-module nf_mm_cl
-contains
-subroutine mat_disp (m)
-        ! declared here so it can be used both for the
-        ! program and for debugging codelet routines
-
-        use iso_c_binding       ! C interfacing module
-        implicit none
-        real(kind=c_double) :: m(:,:)
-        integer i,j
-
-        do i=lbound(m,1),ubound(m,1)
-                write(*, fmt="(A2) ",advance="no") "| "
-        do j=lbound(m,2),ubound(m,2)
-                write(*, fmt="(F6.1,A1) ", advance="no") m(i,j)," "
-        end do
-                write(*,*) "|"
-        end do
-        write(*,*)
-
-end subroutine
-
-recursive subroutine cl_cpu_mult (buffers, cl_args) bind(C)
-        use iso_c_binding       ! C interfacing module
-        use fstarpu_mod         ! StarPU interfacing module
-        implicit none
-
-        type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused
-        real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:)
-        integer :: ld_A,nx_A,ny_A
-        integer :: ld_B,nx_B,ny_B
-        integer :: ld_C,nx_C,ny_C
-        integer :: i,j,k
-
-        ld_A = fstarpu_matrix_get_ld(buffers, 0)
-        ld_B = fstarpu_matrix_get_ld(buffers, 1)
-        ld_C = fstarpu_matrix_get_ld(buffers, 2)
-
-        nx_A = fstarpu_matrix_get_nx(buffers, 0)
-        nx_B = fstarpu_matrix_get_nx(buffers, 1)
-        nx_C = fstarpu_matrix_get_nx(buffers, 2)
-
-        ny_A = fstarpu_matrix_get_ny(buffers, 0)
-        ny_B = fstarpu_matrix_get_ny(buffers, 1)
-        ny_C = fstarpu_matrix_get_ny(buffers, 2)
-
-        if (ny_C /= ny_B) then
-                write(*,*) "C -- B column mismatch"
-                stop 1
-        end if
-
-        if (nx_C /= nx_A) then
-                write(*,*) "C -- A row mismatch"
-                stop 1
-        end if
-
-        if (ny_A /= nx_B) then
-                write(*,*) "A -- B col/row mismatch"
-                stop 1
-        end if
-
-        call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A])
-        call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B])
-        call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C])
-
-        do k = 1, ny_C
-        do j = 1, nx_C
-        do i = 1, nx_B
-                C(j,k) = C(j,k) + A(j,i) * B(i,k)
-        end do
-        end do
-        end do
-
-end subroutine cl_cpu_mult
-end module nf_mm_cl

+ 0 - 106
nmad/examples/perf.sh

@@ -1,106 +0,0 @@
-#!/bin/bash
-
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-# 
-# Copyright (C) 2010  Université de Bordeaux
-# Copyright (C) 2010  CNRS
-# 
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-# 
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# 
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-# 4G x np = 4 * (k*1K) ^ 2
-# A G * np = 4 * k^2 * 1M
-# A * 250 * np = k^2
-# A = 6
-# k = sqrt(1500*np)
-# np = 1 => k = 32
-# np = 2 => k = 48
-# np = 3 => k = 64 
-# np = 4 => k = 64
-
-# Problem size
-NBLOCKS=16
-BLOCKSIZE=1024
-SIZE=$(($NBLOCKS*$BLOCKSIZE))
-
-echo "JOB ID ${PBS_JOBID}"
-
-nnodes=$(cat machinefile.${PBS_JOBID}|wc -l)
-echo "got $nnodes mpi nodes"
-
-# Calibrate
-ncalibrate=0
-for i in `seq 1 $ncalibrate`
-do
-echo "STARPU_CALIBRATE $i/$ncalibrate"
-STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
-done
-
-func()
-{
-ngpus=$1
-np=$2
-p=$3
-q=$4
-nblocks=$5
-
-echo "*******************************************"> log
-echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log
-echo "*******************************************">> log
-cat log
-cat log >> log.all
-
-STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
-cat log.out > log
-cat log.err >> log
-cat log
-cat log >> log.all
-}
-
-rm -f log.all
-
-#how many time do we repeat each experiment ?
-nloops=3
-
-per_node_max_memory=7000
-
-for np in 1 2 4
-do
-	for nblocks in 16 32 48 64 80
-	do
-		for ngpus_per_node in 1 2 3 4
-		do
-			for loop in `seq 1 $nloops`
-			do
-				# Compute p and q from np
-				case $np in
-				  1) p=1; q=1;;
-				  2) p=2; q=1;;
-				  4) p=2; q=2;;
-				  *) echo -n "does not support $np nodes yet";;
-				esac
-
-				# Does the problem fit into memory ?
-				matrix_size=$(($nblocks * $BLOCKSIZE))
-				per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np))
-
-				echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks"
-
-				if test $per_node_memory -ge $per_node_max_memory; then
-						echo "Problem is too large !"
-				else
-					func $ngpus_per_node $np $p $q $nblocks
-					echo "go !"
-				fi
-			done
-		done
-	done
-done

+ 0 - 287
nmad/examples/stencil/stencil5.c

@@ -1,287 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2013, 2015-2017              Université Bordeaux
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
-    						int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank);       \
-                                                fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
-                                                fflush(ofile); }} while(0);
-
-void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
-	float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]);
-	float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]);
-	float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]);
-
-//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
-//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-}
-
-/* Dumb performance model for simgrid */
-static double stencil5_cost_function(struct starpu_task *task, unsigned nimpl)
-{
-	(void) task;
-	(void) nimpl;
-	return 0.000001;
-}
-
-static struct starpu_perfmodel stencil5_model =
-{
-	.type = STARPU_COMMON,
-	.cost_function = stencil5_cost_function,
-	.symbol = "stencil5"
-};
-
-struct starpu_codelet stencil5_cl =
-{
-	.cpu_funcs = {stencil5_cpu},
-	.nbuffers = 5,
-	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R},
-	.model = &stencil5_model
-};
-
-#ifdef STARPU_QUICK_CHECK
-#  define NITER_DEF	10
-#  define X         	2
-#  define Y         	2
-#elif !defined(STARPU_LONG_CHECK)
-#  define NITER_DEF	10
-#  define X         	5
-#  define Y         	5
-#else
-#  define NITER_DEF	100
-#  define X         	20
-#  define Y         	20
-#endif
-
-int display = 0;
-int niter = NITER_DEF;
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	/* Block distrib */
-	return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
-}
-
-/* Shifted distribution, for migration example */
-int my_distrib2(int x, int y, int nb_nodes)
-{
-	return (my_distrib(x, y, nb_nodes) + 1) % nb_nodes;
-}
-
-static void parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-iter") == 0)
-		{
-			char *argptr;
-			niter = strtol(argv[++i], &argptr, 10);
-		}
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-	}
-}
-
-int main(int argc, char **argv)
-{
-	int my_rank, size, x, y, loop;
-	float mean=0;
-	float matrix[X][Y];
-	starpu_data_handle_t data_handles[X][Y];
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
-
-	if (starpu_cpu_worker_get_count() == 0)
-	{
-		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return 77;
-	}
-
-	parse_args(argc, argv);
-
-	/* Initial data values */
-	starpu_srand48((long int)time(NULL));
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			matrix[x][y] = (float)starpu_drand48();
-			mean += matrix[x][y];
-		}
-	}
-	mean /= (X*Y);
-
-	if (display)
-	{
-		FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
-		for(x = 0; x < X; x++)
-		{
-			fprintf(stdout, "[%d] ", my_rank);
-			for (y = 0; y < Y; y++)
-			{
-				fprintf(stdout, "%2.2f ", matrix[x][y]);
-			}
-			fprintf(stdout, "\n");
-		}
-	}
-
-	/* Initial distribution */
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			int mpi_rank = my_distrib(x, y, size);
-			if (mpi_rank == my_rank)
-			{
-				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
-				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
-			}
-			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
-				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
-			{
-				/* I don't own that index, but will need it for my computations */
-				//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
-				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
-			}
-			else
-			{
-				/* I know it's useless to allocate anything for this */
-				data_handles[x][y] = NULL;
-			}
-			if (data_handles[x][y])
-			{
-				starpu_data_set_coordinates(data_handles[x][y], 2, x, y);
-				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
-			}
-		}
-	}
-
-	/* First computation with initial distribution */
-	for(loop=0 ; loop<niter; loop++)
-	{
-		starpu_iteration_push(loop);
-
-		for (x = 1; x < X-1; x++)
-		{
-			for (y = 1; y < Y-1; y++)
-			{
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
-						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
-						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
-						       0);
-			}
-		}
-		starpu_iteration_pop();
-	}
-	FPRINTF(stderr, "Waiting ...\n");
-	starpu_task_wait_for_all();
-
-	/* Now migrate data to a new distribution */
-
-	/* First register newly needed data */
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			int mpi_rank = my_distrib2(x, y, size);
-			if (!data_handles[x][y] && (mpi_rank == my_rank
-				 || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size)
-				 || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size)))
-			{
-				/* Register newly-needed data */
-				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
-				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
-			}
-			if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y]))
-				/* Migrate the data */
-				starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank);
-		}
-	}
-
-	/* Second computation with new distribution */
-	for(loop=0 ; loop<niter; loop++)
-	{
-		starpu_iteration_push(niter + loop);
-
-		for (x = 1; x < X-1; x++)
-		{
-			for (y = 1; y < Y-1; y++)
-			{
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
-						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
-						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
-						       0);
-			}
-		}
-		starpu_iteration_pop();
-	}
-	FPRINTF(stderr, "Waiting ...\n");
-	starpu_task_wait_for_all();
-
-	/* Unregister data */
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			if (data_handles[x][y])
-			{
-				int mpi_rank = my_distrib(x, y, size);
-				/* Get back data to original place where the user-provided buffer is. */
-				starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank);
-				/* And unregister it */
-				starpu_data_unregister(data_handles[x][y]);
-			}
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	if (display)
-	{
-		FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
-		for(x = 0; x < X; x++)
-		{
-			FPRINTF(stdout, "[%d] ", my_rank);
-			for (y = 0; y < Y; y++)
-			{
-				FPRINTF(stdout, "%2.2f ", matrix[x][y]);
-			}
-			FPRINTF(stdout, "\n");
-		}
-	}
-
-	return 0;
-}

+ 0 - 290
nmad/examples/stencil/stencil5_lb.c

@@ -1,290 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2013, 2015-2017              Université Bordeaux
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <starpu_mpi_lb.h>
-#include <math.h>
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
-    						int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank);       \
-                                                fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
-                                                fflush(ofile); }} while(0);
-
-void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
-{
-	float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
-	float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]);
-	float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]);
-	float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]);
-
-//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
-//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-}
-
-struct starpu_codelet stencil5_cl =
-{
-	.cpu_funcs = {stencil5_cpu},
-	.nbuffers = 5,
-	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
-};
-
-#ifdef STARPU_QUICK_CHECK
-#  define NITER_DEF	10
-#  define X         	2
-#  define Y         	2
-#elif !defined(STARPU_LONG_CHECK)
-#  define NITER_DEF	10
-#  define X         	5
-#  define Y         	5
-#else
-#  define NITER_DEF	100
-#  define X         	20
-#  define Y         	20
-#endif
-
-int display = 0;
-int niter = NITER_DEF;
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	/* Block distrib */
-	return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
-}
-
-static void parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-iter") == 0)
-		{
-			char *argptr;
-			niter = strtol(argv[++i], &argptr, 10);
-		}
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-	}
-}
-
-void get_neighbors(int **neighbor_ids, int *nneighbors)
-{
-	int rank, size;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
-
-	if (size <= 2)
-	{
-		*nneighbors = 1;
-		*neighbor_ids = malloc(sizeof(int));
-		*neighbor_ids[0] = rank==size-1?0:rank+1;
-		fprintf(stderr, "rank %d has neighbor %d\n", rank, *neighbor_ids[0]);
-	}
-	else
-	{
-		*nneighbors = 2;
-		*neighbor_ids = malloc(2*sizeof(int));
-		(*neighbor_ids)[0] = rank==size-1?0:rank+1;
-		(*neighbor_ids)[1] = rank==0?size-1:rank-1;
-		fprintf(stderr, "rank %d has neighbor %d and %d\n", rank, (*neighbor_ids)[0], (*neighbor_ids)[1]);
-	}
-}
-
-struct data_node
-{
-	starpu_data_handle_t data_handle;
-	int node;
-};
-
-struct data_node data_nodes[X][Y];
-
-void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node)
-{
-	int rank, x, y;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "Looking to move data from %d to %d\n", rank, dst_node);
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			if (data_nodes[x][y].node == rank)
-			{
-				*handle_unit = malloc(sizeof(starpu_data_handle_t));
-				*handle_unit[0] = data_nodes[x][y].data_handle;
-				*nhandles = 1;
-				data_nodes[x][y].node = dst_node;
-				return;
-			}
-		}
-	}
-	*nhandles = 0;
-}
-
-int main(int argc, char **argv)
-{
-	int my_rank, size, x, y, loop;
-	float mean=0;
-	float matrix[X][Y];
-	struct starpu_mpi_lb_conf itf;
-
-	itf.get_neighbors = get_neighbors;
-	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
-
-	if (size > 2)
-	{
-		FPRINTF(stderr, "Only works with 2 nodes\n");
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return 77;
-	}
-	if (starpu_cpu_worker_get_count() == 0)
-	{
-		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return 77;
-	}
-
-	setenv("LB_HEAT_SLEEP_THRESHOLD", "5", 1);
-	starpu_mpi_lb_init("heat", &itf);
-
-	parse_args(argc, argv);
-
-	/* Initial data values */
-	starpu_srand48((long int)time(NULL));
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			matrix[x][y] = (float)starpu_drand48();
-			mean += matrix[x][y];
-		}
-	}
-	mean /= (X*Y);
-
-	if (display)
-	{
-		FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
-		for(x = 0; x < X; x++)
-		{
-			fprintf(stdout, "[%d] ", my_rank);
-			for (y = 0; y < Y; y++)
-			{
-				fprintf(stdout, "%2.2f ", matrix[x][y]);
-			}
-			fprintf(stdout, "\n");
-		}
-	}
-
-	/* Initial distribution */
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			data_nodes[x][y].node = my_distrib(x, y, size);
-			if (data_nodes[x][y].node == my_rank)
-			{
-				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
-				starpu_variable_data_register(&data_nodes[x][y].data_handle, 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
-			}
-			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
-				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
-			{
-				/* I don't own that index, but will need it for my computations */
-				//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
-				starpu_variable_data_register(&data_nodes[x][y].data_handle, -1, (uintptr_t)NULL, sizeof(float));
-			}
-			else
-			{
-				/* I know it's useless to allocate anything for this */
-				data_nodes[x][y].data_handle = NULL;
-			}
-			if (data_nodes[x][y].data_handle)
-			{
-				starpu_data_set_coordinates(data_nodes[x][y].data_handle, 2, x, y);
-				starpu_mpi_data_register(data_nodes[x][y].data_handle, (y*X)+x, data_nodes[x][y].node);
-			}
-		}
-	}
-
-	/* First computation with initial distribution */
-	for(loop=0 ; loop<niter; loop++)
-	{
-		starpu_iteration_push(loop);
-
-		for (x = 1; x < X-1; x++)
-		{
-			for (y = 1; y < Y-1; y++)
-			{
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_nodes[x][y].data_handle,
-						       STARPU_R, data_nodes[x-1][y].data_handle, STARPU_R, data_nodes[x+1][y].data_handle,
-						       STARPU_R, data_nodes[x][y-1].data_handle, STARPU_R, data_nodes[x][y+1].data_handle,
-						       STARPU_TAG_ONLY, ((starpu_tag_t)X)*x + y,
-						       0);
-			}
-		}
-		starpu_iteration_pop();
-	}
-	FPRINTF(stderr, "Waiting ...\n");
-	starpu_task_wait_for_all();
-
-	// The load balancer needs to be shutdown before unregistering data as it needs access to them
-	starpu_mpi_lb_shutdown();
-
-	/* Unregister data */
-	for(x = 0; x < X; x++)
-	{
-		for (y = 0; y < Y; y++)
-		{
-			if (data_nodes[x][y].data_handle)
-			{
-				starpu_data_unregister(data_nodes[x][y].data_handle);
-			}
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	if (display)
-	{
-		FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
-		for(x = 0; x < X; x++)
-		{
-			FPRINTF(stdout, "[%d] ", my_rank);
-			for (y = 0; y < Y; y++)
-			{
-				FPRINTF(stdout, "%2.2f ", matrix[x][y]);
-			}
-			FPRINTF(stdout, "\n");
-		}
-	}
-
-	return 0;
-}

+ 0 - 228
nmad/examples/user_datatype/my_interface.c

@@ -1,228 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-
-#include "my_interface.h"
-
-void starpu_my_interface_display_codelet_cpu(void *descr[], void *_args)
-{
-	char c = STARPU_MY_INTERFACE_GET_CHAR(descr[0]);
-	int d = STARPU_MY_INTERFACE_GET_INT(descr[0]);
-	char msg[100];
-
-	if (_args)
-		starpu_codelet_unpack_args(_args, &msg);
-
-	fprintf(stderr, "[%s] My value = '%c' %d\n", _args?msg:NULL, c, d);
-}
-
-void starpu_my_interface_compare_codelet_cpu(void *descr[], void *_args)
-{
-	int *compare;
-
-	starpu_codelet_unpack_args(_args, &compare);
-
-	int d0 = STARPU_MY_INTERFACE_GET_INT(descr[0]);
-	char c0 = STARPU_MY_INTERFACE_GET_CHAR(descr[0]);
-	int d1 = STARPU_MY_INTERFACE_GET_INT(descr[1]);
-	char c1 = STARPU_MY_INTERFACE_GET_CHAR(descr[1]);
-
-	*compare = (d0 == d1 && c0 == c1);
-}
-
-void _starpu_my_interface_datatype_allocate(MPI_Datatype *mpi_datatype)
-{
-	int ret;
-	int blocklengths[2] = {1, 1};
-	MPI_Aint displacements[2];
-	MPI_Datatype types[2] = {MPI_INT, MPI_CHAR};
-	struct starpu_my_interface *myinterface = NULL;
-	myinterface = malloc(sizeof(struct starpu_my_interface));
-
-	MPI_Address(myinterface, displacements);
-	MPI_Address(&myinterface[0].c, displacements+1);
-	displacements[1] -= displacements[0];
-	displacements[0] = 0;
-
-	ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
-
-	ret = MPI_Type_commit(mpi_datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-
-	free(myinterface);
-}
-
-void starpu_my_interface_datatype_allocate(starpu_data_handle_t handle, MPI_Datatype *mpi_datatype)
-{
-	(void)handle;
-	_starpu_my_interface_datatype_allocate(mpi_datatype);
-}
-
-void starpu_my_interface_datatype_free(MPI_Datatype *mpi_datatype)
-{
-	MPI_Type_free(mpi_datatype);
-}
-
-int starpu_my_interface_get_int(starpu_data_handle_t handle)
-{
-	struct starpu_my_interface *my_interface =
-		(struct starpu_my_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return my_interface->d;
-}
-
-char starpu_my_interface_get_char(starpu_data_handle_t handle)
-{
-	struct starpu_my_interface *my_interface =
-		(struct starpu_my_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return my_interface->c;
-}
-
-static void data_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
-{
-	struct starpu_my_interface *my_interface = (struct starpu_my_interface *) data_interface;
-
-	unsigned node;
-	for (node = 0; node < STARPU_MAXNODES; node++)
-	{
-		struct starpu_my_interface *local_interface = (struct starpu_my_interface *)
-			starpu_data_get_interface_on_node(handle, node);
-
-		if (node == home_node)
-		{
-			local_interface->d = my_interface->d;
-			local_interface->c = my_interface->c;
-		}
-		else
-		{
-			local_interface->d = 0;
-			local_interface->c = 0;
-		}
-	}
-}
-
-static starpu_ssize_t data_allocate_data_on_node(void *data_interface, unsigned node)
-{
-	(void)data_interface;
-	(void)node;
-	return 0;
-}
-
-static void data_free_data_on_node(void *data_interface, unsigned node)
-{
-	(void)data_interface;
-	(void)node;
-}
-
-static size_t data_get_size(starpu_data_handle_t handle)
-{
-	(void)handle;
-	return sizeof(int) + sizeof(char);
-}
-
-static uint32_t data_footprint(starpu_data_handle_t handle)
-{
-	return starpu_hash_crc32c_be(starpu_my_interface_get_int(handle), 0);
-}
-
-static int data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
-{
-	(void)handle;
-	(void)node;
-	(void)ptr;
-	(void)count;
-	STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the pack_data function should not happen\n");
-	return 0;
-}
-
-static int data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
-{
-	(void)handle;
-	(void)node;
-	(void)ptr;
-	(void)count;
-	STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n");
-	return 0;
-}
-
-static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size)
-{
-	struct starpu_my_interface *my_interface = (struct starpu_my_interface *) data_interface;
-	return snprintf(buf, size, "Data%d-%c", my_interface->d, my_interface->c);
-}
-
-static void *data_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
-{
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct starpu_my_interface *my_interface = (struct starpu_my_interface *) starpu_data_get_interface_on_node(handle, node);
-
-	return (void*) &my_interface->d;
-}
-
-static int copy_any_to_any(void *src_interface, unsigned src_node,
-			   void *dst_interface, unsigned dst_node,
-			   void *async_data)
-{
-	struct starpu_my_interface *src_data = src_interface;
-	struct starpu_my_interface *dst_data = dst_interface;
-	int ret = 0;
-
-	if (starpu_interface_copy((uintptr_t) src_data->d, 0, src_node,
-				  (uintptr_t) dst_data->d, 0, dst_node,
-				  sizeof(src_data->d), async_data))
-		ret = -EAGAIN;
-	if (starpu_interface_copy((uintptr_t) src_data->c, 0, src_node,
-				  (uintptr_t) dst_data->c, 0, dst_node,
-				  sizeof(src_data->c),
-				  async_data))
-		ret = -EAGAIN;
-	return ret;
-}
-
-static const struct starpu_data_copy_methods data_copy_methods =
-{
-	.any_to_any = copy_any_to_any
-};
-
-static struct starpu_data_interface_ops interface_data_ops =
-{
-	.register_data_handle = data_register_data_handle,
-	.allocate_data_on_node = data_allocate_data_on_node,
-	.free_data_on_node = data_free_data_on_node,
-	.copy_methods = &data_copy_methods,
-	.get_size = data_get_size,
-	.footprint = data_footprint,
-	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
-	.interface_size = sizeof(struct starpu_my_interface),
-	.handle_to_pointer = data_handle_to_pointer,
-	.pack_data = data_pack_data,
-	.unpack_data = data_unpack_data,
-	.describe = data_describe
-};
-
-void starpu_my_interface_data_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_interface *xc)
-{
-	if (interface_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
-	{
-		interface_data_ops.interfaceid = starpu_data_interface_get_next_id();
-	}
-
-	starpu_data_register(handleptr, home_node, xc, &interface_data_ops);
-}

+ 0 - 62
nmad/examples/user_datatype/my_interface.h

@@ -1,62 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <mpi.h>
-
-#ifndef __DATA_INTERFACE_H
-#define __DATA_INTERFACE_H
-
-struct starpu_my_interface
-{
-	int d;
-	char c;
-};
-
-void starpu_my_interface_data_register(starpu_data_handle_t *handle, unsigned home_node, struct starpu_my_interface *xc);
-
-char starpu_my_interface_get_char(starpu_data_handle_t handle);
-int starpu_my_interface_get_int(starpu_data_handle_t handle);
-
-#define STARPU_MY_INTERFACE_GET_CHAR(interface)	(((struct starpu_my_interface *)(interface))->c)
-#define STARPU_MY_INTERFACE_GET_INT(interface)	(((struct starpu_my_interface *)(interface))->d)
-
-void _starpu_my_interface_datatype_allocate(MPI_Datatype *mpi_datatype);
-void starpu_my_interface_datatype_allocate(starpu_data_handle_t handle, MPI_Datatype *mpi_datatype);
-void starpu_my_interface_datatype_free(MPI_Datatype *mpi_datatype);
-
-void starpu_my_interface_display_codelet_cpu(void *descr[], void *_args);
-void starpu_my_interface_compare_codelet_cpu(void *descr[], void *_args);
-
-static struct starpu_codelet starpu_my_interface_display_codelet =
-{
-	.cpu_funcs = {starpu_my_interface_display_codelet_cpu},
-	.cpu_funcs_name = {"starpu_my_interface_display_codelet_cpu"},
-	.nbuffers = 1,
-	.modes = {STARPU_R},
-	.name = "starpu_my_interface_display_codelet"
-};
-
-static struct starpu_codelet starpu_my_interface_compare_codelet =
-{
-	.cpu_funcs = {starpu_my_interface_compare_codelet_cpu},
-	.cpu_funcs_name = {"starpu_my_interface_compare_codelet_cpu"},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_R},
-	.name = "starpu_my_interface_compare_codelet"
-};
-
-#endif /* __MY_INTERFACE_H */

+ 0 - 118
nmad/examples/user_datatype/user_datatype.c

@@ -1,118 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "my_interface.h"
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-
-int main(int argc, char **argv)
-{
-	int rank, nodes;
-	int ret=0;
-	int compare=0;
-
-	struct starpu_my_interface my1 = {.d = 98 , .c = 'z'};
-	struct starpu_my_interface my0 = {.d = 42 , .c = 'n'};
-
-	starpu_data_handle_t handle0;
-	starpu_data_handle_t handle1;
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
-
-	if (nodes < 2 || (starpu_cpu_worker_get_count() == 0))
-	{
-		if (rank == 0)
-		{
-			if (nodes < 2)
-				fprintf(stderr, "We need at least 2 processes.\n");
-			else
-				fprintf(stderr, "We need at least 1 CPU.\n");
-		}
-		starpu_mpi_shutdown();
-		starpu_shutdown();
-		return 77;
-	}
-
-	if (rank == 1)
-	{
-		my0.d = 0;
-		my0.c = 'z';
-	}
-	starpu_my_interface_data_register(&handle0, STARPU_MAIN_RAM, &my0);
-	starpu_my_interface_data_register(&handle1, -1, &my1);
-	starpu_mpi_datatype_register(handle1, starpu_my_interface_datatype_allocate, starpu_my_interface_datatype_free);
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-
-	if (rank == 0)
-	{
-		MPI_Datatype mpi_datatype;
-		_starpu_my_interface_datatype_allocate(&mpi_datatype);
-		MPI_Send(&my0, 1, mpi_datatype, 1, 42, MPI_COMM_WORLD);
-		starpu_my_interface_datatype_free(&mpi_datatype);
-	}
-	else if (rank == 1)
-	{
-		MPI_Datatype mpi_datatype;
-		MPI_Status status;
-		_starpu_my_interface_datatype_allocate(&mpi_datatype);
-		MPI_Recv(&my0, 1, mpi_datatype, 0, 42, MPI_COMM_WORLD, &status);
-		FPRINTF(stderr, "Received value: '%c' %d\n", my0.c, my0.d);
-		starpu_my_interface_datatype_free(&mpi_datatype);
-	}
-
-	if (rank == 0)
-	{
-		int *compare_ptr = &compare;
-
-		starpu_task_insert(&starpu_my_interface_display_codelet, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle0, 0);
-		starpu_mpi_isend_detached(handle0, 1, 10, MPI_COMM_WORLD, NULL, NULL);
-		starpu_mpi_irecv_detached(handle1, 1, 20, MPI_COMM_WORLD, NULL, NULL);
-
-		starpu_task_insert(&starpu_my_interface_display_codelet, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle1, 0);
-		starpu_task_insert(&starpu_my_interface_compare_codelet, STARPU_R, handle0, STARPU_R, handle1, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
-	}
-	else if (rank == 1)
-	{
-		starpu_task_insert(&starpu_my_interface_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, handle0, 0);
-		starpu_mpi_irecv_detached(handle0, 0, 10, MPI_COMM_WORLD, NULL, NULL);
-		starpu_task_insert(&starpu_my_interface_display_codelet, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle0, 0);
-		starpu_mpi_isend_detached(handle0, 0, 20, MPI_COMM_WORLD, NULL, NULL);
-	}
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	starpu_mpi_wait_for_all(MPI_COMM_WORLD);
-
-	starpu_mpi_datatype_unregister(handle0);
-	starpu_data_unregister(handle0);
-	starpu_data_unregister(handle1);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	if (rank == 0)
-	{
-		FPRINTF(stderr, "[node 0] %s\n", compare==1?"SUCCESS":"FAILURE");
-	}
-
-	return (rank == 0) ? !compare : 0;
-}

+ 0 - 752
nmad/include/fstarpu_mpi_mod.f90

@@ -1,752 +0,0 @@
-! StarPU --- Runtime system for heterogeneous multicore architectures.
-!
-! Copyright (C) 2016  Inria
-! Copyright (C) 2017  Université de Bordeaux
-!
-! StarPU is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at
-! your option) any later version.
-!
-! StarPU is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-!
-! See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-module fstarpu_mpi_mod
-        use iso_c_binding
-        use fstarpu_mod
-        implicit none
-
-        interface
-                ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_isend (dh, mpi_req, dst, mpi_tag, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_ptr), value, intent(in) :: mpi_req
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_isend
-
-                ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_isend_prio (dh, mpi_req, dst, mpi_tag, prio, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_ptr), value, intent(in) :: mpi_req
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_isend_prio
-
-                ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_irecv (dh, mpi_req, src, mpi_tag, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_irecv
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_ptr), value, intent(in) :: mpi_req
-                        integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_irecv
-
-                ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_send (dh, dst, mpi_tag, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_send
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_send
-
-                ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_send_prio (dh, dst, mpi_tag, prio, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_send_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_send_prio
-
-                ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status);
-                function fstarpu_mpi_recv (dh, src, mpi_tag, mpi_comm, mpi_status) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_recv
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: mpi_status
-                end function fstarpu_mpi_recv
-
-                ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_isend_detached (dh, dst, mpi_tag, mpi_comm, callback, arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_detached
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end function fstarpu_mpi_isend_detached
-
-                ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_isend_detached_prio (dh, dst, mpi_tag, prio, mpi_comm, callback, arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_detached_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end function fstarpu_mpi_isend_detached_prio
-
-                ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_recv_detached (dh, src, mpi_tag, mpi_comm, callback, arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_recv_detached
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end function fstarpu_mpi_recv_detached
-
-                ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_issend (dh, mpi_req, dst, mpi_tag, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_issend
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_ptr), value, intent(in) :: mpi_req
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_issend
-
-                ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_issend_prio (dh, mpi_req, dst, mpi_tag, prio, mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_issend_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_ptr), value, intent(in) :: mpi_req
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_issend_prio
-
-                ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_issend_detached (dh, dst, mpi_tag, mpi_comm, callback, arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_issend_detached
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end function fstarpu_mpi_issend_detached
-
-                ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_issend_detached_prio (dh, dst, mpi_tag, prio, mpi_comm, callback, arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_issend_detached_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end function fstarpu_mpi_issend_detached_prio
-
-                ! int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
-                function fstarpu_mpi_wait(req,st) bind(C,name="starpu_mpi_wait")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_wait
-                        type(c_ptr), value, intent(in) :: req
-                        type(c_ptr), value, intent(in) :: st
-                end function fstarpu_mpi_wait
-
-                ! int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
-                function fstarpu_mpi_test(req,flag,st) bind(C,name="starpu_mpi_test")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_test
-                        type(c_ptr), value, intent(in) :: req
-                        type(c_ptr), value, intent(in) :: flag
-                        type(c_ptr), value, intent(in) :: st
-                end function fstarpu_mpi_test
-
-                ! int starpu_mpi_barrier(MPI_Comm comm);
-                function fstarpu_mpi_barrier (mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_barrier
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_barrier
-
-                ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
-                function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, mpi_tag, mpi_comm, callback, arg, seq_const) &
-                                bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                        integer(c_int), value, intent(in) :: seq_const
-                end function fstarpu_mpi_recv_detached_sequential_consistency
-
-
-                ! int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
-                ! -> cf fstarpu_mpi_init
-                ! int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
-                ! -> cf fstarpu_mpi_init
-                ! int starpu_mpi_initialize(void) STARPU_DEPRECATED;
-                ! -> cf fstarpu_mpi_init
-                ! int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;
-                ! -> cf fstarpu_mpi_init
-
-                ! int starpu_mpi_shutdown(void);
-                function fstarpu_mpi_shutdown () bind(C,name="starpu_mpi_shutdown")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_shutdown
-                end function fstarpu_mpi_shutdown
-
-                ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_build(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
-                        type(c_ptr) :: fstarpu_mpi_task_build
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_task_build
-
-                ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_post_build(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_task_post_build
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_task_post_build
-
-                ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_insert(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_task_insert
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_task_insert
-                function fstarpu_mpi_insert_task(mpi_comm,arglist) bind(C,name="fstarpu_mpi_task_insert")
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_insert_task
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_insert_task
-
-                ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
-                subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: node
-                end subroutine fstarpu_mpi_get_data_on_node
-
-                ! void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg);
-                subroutine fstarpu_mpi_get_data_on_node_detached(mpi_comm,dh,node,callback,arg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: node
-                        type(c_funptr), value, intent(in) :: callback
-                        type(c_ptr), value, intent(in) :: arg
-                end subroutine fstarpu_mpi_get_data_on_node_detached
-
-                ! void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle);
-                subroutine fstarpu_mpi_redux_data(mpi_comm,dh) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                end subroutine fstarpu_mpi_redux_data
-
-                ! void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio);
-                subroutine fstarpu_mpi_redux_data_prio(mpi_comm,dh, prio) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: prio
-                end subroutine fstarpu_mpi_redux_data_prio
-
-                ! int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
-                function fstarpu_mpi_scatter_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_scatter_detached
-                        type(c_ptr), intent(in) :: dhs(*)
-                        integer(c_int), value, intent(in) :: cnt
-                        integer(c_int), value, intent(in) :: root
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: scallback
-                        type(c_ptr), value, intent(in) :: sarg
-                        type(c_funptr), value, intent(in) :: rcallback
-                        type(c_ptr), value, intent(in) :: rarg
-                end function fstarpu_mpi_scatter_detached
-
-                ! int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
-                function fstarpu_mpi_gather_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_gather_detached
-                        type(c_ptr), intent(in) :: dhs(*)
-                        integer(c_int), value, intent(in) :: cnt
-                        integer(c_int), value, intent(in) :: root
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_funptr), value, intent(in) :: scallback
-                        type(c_ptr), value, intent(in) :: sarg
-                        type(c_funptr), value, intent(in) :: rcallback
-                        type(c_ptr), value, intent(in) :: rarg
-                end function fstarpu_mpi_gather_detached
-
-
-                ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, mpi_tag, mpi_comm, starpu_tag) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_isend_detached_unlock_tag
-
-                ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, mpi_tag, prio, mpi_comm, starpu_tag) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: prio
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_isend_detached_unlock_tag_prio
-
-                ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_recv_detached_unlock_tag (dh, src, mpi_tag, mpi_comm, starpu_tag) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_recv_detached_unlock_tag
-
-                ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, mpi_tags, mpi_comms, starpu_tag) &
-                                bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag
-                        integer(c_int), value, intent(in) :: array_size
-                        type(c_ptr), intent(in) :: dhs(*)
-                        integer(c_int), intent(in) :: dsts(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
-                        integer(c_int), intent(in) :: mpi_comms(*)
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_isend_array_detached_unlock_tag
-
-                ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, mpi_tags, prio, mpi_comms, &
-                                starpu_tag) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag_prio
-                        integer(c_int), value, intent(in) :: array_size
-                        type(c_ptr), intent(in) :: dhs(*)
-                        integer(c_int), intent(in) :: dsts(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
-                        integer(c_int), intent(in) :: prio(*)
-                        integer(c_int), intent(in) :: mpi_comms(*)
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_isend_array_detached_unlock_tag_prio
-
-                ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, mpi_tags, mpi_comms, starpu_tag) &
-                                bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_recv_array_detached_unlock_tag
-                        integer(c_int), value, intent(in) :: array_size
-                        type(c_ptr), intent(in) :: dhs(*)
-                        integer(c_int), intent(in) :: srcs(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
-                        integer(c_int), intent(in) :: mpi_comms(*)
-                        type(c_ptr), value, intent(in) :: starpu_tag
-                end function fstarpu_mpi_recv_array_detached_unlock_tag
-
-                ! void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
-                subroutine fstarpu_mpi_comm_amounts_retrieve (comm_amounts) bind(C,name="starpu_mpi_comm_amounts_retrieve")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_size_t), intent(in) :: comm_amounts(*)
-                end subroutine fstarpu_mpi_comm_amounts_retrieve
-
-
-                ! void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle);
-                subroutine fstarpu_mpi_cache_flush(mpi_comm,dh) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                end subroutine fstarpu_mpi_cache_flush
-
-                ! void starpu_mpi_cache_flush_all_data(MPI_Comm comm);
-                subroutine fstarpu_mpi_cache_flush_all_data(mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end subroutine fstarpu_mpi_cache_flush_all_data
-
-                ! int starpu_mpi_comm_size(MPI_Comm comm, int *size);
-                function fstarpu_mpi_comm_size(mpi_comm,sz) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        integer(c_int), intent(out) :: sz
-                        integer(c_int) :: fstarpu_mpi_comm_size
-                end function fstarpu_mpi_comm_size
-
-                ! int starpu_mpi_comm_rank(MPI_Comm comm, int *rank);
-                function fstarpu_mpi_comm_rank(mpi_comm,rank) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        integer(c_int), intent(out) :: rank
-                        integer(c_int) :: fstarpu_mpi_comm_rank
-                end function fstarpu_mpi_comm_rank
-
-
-                ! int starpu_mpi_world_rank(void);
-                function fstarpu_mpi_world_rank() bind(C,name="starpu_mpi_world_rank")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_world_rank
-                end function fstarpu_mpi_world_rank
-
-                ! int starpu_mpi_world_size(void);
-                function fstarpu_mpi_world_size() bind(C,name="starpu_mpi_world_size")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_world_size
-                end function fstarpu_mpi_world_size
-
-                ! int starpu_mpi_world_size(void);
-                function fstarpu_mpi_world_comm() bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_world_comm
-                end function fstarpu_mpi_world_comm
-
-                ! int starpu_mpi_get_communication_tag(void);
-                function fstarpu_mpi_get_communication_tag() bind(C,name="starpu_mpi_get_communication_tag")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_get_communication_tag
-                end function fstarpu_mpi_get_communication_tag
-
-                ! void starpu_mpi_set_communication_tag(int tag);
-                subroutine fstarpu_mpi_set_communication_tag(tag) bind(C,name="starpu_mpi_set_communication_tag")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: tag
-                end subroutine fstarpu_mpi_set_communication_tag
-
-                ! void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm);
-                subroutine fstarpu_mpi_data_register_comm(dh,tag,rank,mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: tag
-                        integer(c_int), value, intent(in) :: rank
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end subroutine fstarpu_mpi_data_register_comm
-
-                ! #define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD)
-                subroutine fstarpu_mpi_data_register(dh,tag,rank) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: tag
-                        integer(c_int), value, intent(in) :: rank
-                end subroutine fstarpu_mpi_data_register
-
-                ! void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm);
-                subroutine fstarpu_mpi_data_set_rank_comm(dh,rank,mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: rank
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end subroutine fstarpu_mpi_data_set_rank_comm
-
-                ! #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD)
-                subroutine fstarpu_mpi_data_set_rank(dh,rank) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: rank
-                end subroutine fstarpu_mpi_data_set_rank
-
-                ! void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag);
-                subroutine fstarpu_mpi_data_set_tag(dh,tag) bind(C,name="starpu_mpi_data_set_tag")
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: tag
-                end subroutine fstarpu_mpi_data_set_tag
-
-                ! int starpu_mpi_data_get_rank(starpu_data_handle_t handle);
-                function fstarpu_mpi_data_get_rank(dh) bind(C,name="starpu_mpi_data_get_rank")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_data_get_rank
-                        type(c_ptr), value, intent(in) :: dh
-                end function fstarpu_mpi_data_get_rank
-
-                ! int starpu_mpi_data_get_tag(starpu_data_handle_t handle);
-                function fstarpu_mpi_data_get_tag(dh) bind(C,name="starpu_mpi_data_get_tag")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_data_get_tag
-                        type(c_ptr), value, intent(in) :: dh
-                end function fstarpu_mpi_data_get_tag
-
-                ! void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int rank);
-                subroutine fstarpu_mpi_data_migrate(mpi_comm,dh,rank) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), value, intent(in) :: dh
-                        integer(c_int), value, intent(in) :: rank
-                end subroutine fstarpu_mpi_data_migrate
-
-                ! #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1
-                ! #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA    0
-
-                ! int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func);
-                function fstarpu_mpi_node_selection_register_policy(policy_func) &
-                                bind(C,name="starpu_mpi_node_selection_register_policy")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_node_selection_register_policy
-                        type(c_funptr), value, intent(in) :: policy_func
-                end function fstarpu_mpi_node_selection_register_policy
-
-                ! int starpu_mpi_node_selection_unregister_policy(int policy);
-                function fstarpu_mpi_node_selection_unregister_policy(policy) &
-                                bind(C,name="starpu_mpi_node_selection_unregister_policy")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_node_selection_unregister_policy
-                        type(c_ptr), value, intent(in) :: policy
-                end function fstarpu_mpi_node_selection_unregister_policy
-
-                ! int starpu_mpi_node_selection_get_current_policy();
-                function fstarpu_mpi_data_selection_get_current_policy() &
-                                bind(C,name="starpu_mpi_data_selection_get_current_policy")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_data_selection_get_current_policy
-                end function fstarpu_mpi_data_selection_get_current_policy
-
-                ! int starpu_mpi_node_selection_set_current_policy(int policy);
-                function fstarpu_mpi_data_selection_set_current_policy(policy) &
-                                bind(C,name="starpu_mpi_data_selection_set_current_policy")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_data_selection_set_current_policy
-                        type(c_ptr), value, intent(in) :: policy
-                end function fstarpu_mpi_data_selection_set_current_policy
-
-                ! int starpu_mpi_cache_is_enabled();
-                function fstarpu_mpi_cache_is_enabled() bind(C,name="starpu_mpi_cache_is_enabled")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_cache_is_enabled
-                end function fstarpu_mpi_cache_is_enabled
-
-                ! int starpu_mpi_cache_set(int enabled);
-                function fstarpu_mpi_cache_set(enabled) bind(C,name="starpu_mpi_cache_set")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_cache_set
-                        integer(c_int), value, intent(in) :: enabled
-                end function fstarpu_mpi_cache_set
-
-                ! int starpu_mpi_wait_for_all(MPI_Comm comm);
-                function fstarpu_mpi_wait_for_all (mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_wait_for_all
-                        integer(c_int), value, intent(in) :: mpi_comm
-                end function fstarpu_mpi_wait_for_all
-
-                ! int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
-                function fstarpu_mpi_datatype_register(dh, alloc_func, free_func) bind(C,name="starpu_mpi_datatype_register")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_datatype_register
-                        type(c_ptr), value, intent(in) :: dh
-                        type(c_funptr), value, intent(in) :: alloc_func
-                        type(c_funptr), value, intent(in) :: free_func
-                end function fstarpu_mpi_datatype_register
-
-                ! int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
-                function fstarpu_mpi_datatype_unregister(dh) bind(C,name="starpu_mpi_datatype_unregister")
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_datatype_unregister
-                        type(c_ptr), value, intent(in) :: dh
-                end function fstarpu_mpi_datatype_unregister
-
-
-                function fstarpu_mpi_req_alloc() bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr) :: fstarpu_mpi_req_alloc
-                end function fstarpu_mpi_req_alloc
-
-                subroutine fstarpu_mpi_req_free(req) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr),value,intent(in) :: req
-                end subroutine fstarpu_mpi_req_free
-
-                function fstarpu_mpi_status_alloc() bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr) :: fstarpu_mpi_status_alloc
-                end function fstarpu_mpi_status_alloc
-
-                subroutine fstarpu_mpi_status_free(st) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        type(c_ptr),value,intent(in) :: st
-                end subroutine fstarpu_mpi_status_free
-
-
-
-        end interface
-
-        contains
-                function fstarpu_mpi_init (initialize_mpi,mpi_comm) bind(C)
-                        use iso_c_binding
-                        implicit none
-                        integer(c_int) :: fstarpu_mpi_init
-                        integer(c_int), intent(in) :: initialize_mpi
-                        integer(c_int), optional, intent(in) :: mpi_comm
-                        type(c_ptr) :: argcv
-                        integer(c_int) :: fargc,i,farg_len
-                        character(len=1) :: farg_1
-                        character(len=:), allocatable :: farg
-                        integer(c_int) :: mpi_comm_present, mpi_comm_or_0
-                        integer(c_int) :: ret
-
-                        interface
-                                function fstarpu_mpi_argcv_alloc(argc, initialize_mpi, comm_present, comm) bind(C)
-                                        use iso_c_binding
-                                        implicit none
-                                        type(c_ptr) :: fstarpu_mpi_argcv_alloc
-                                        integer(c_int),value,intent(in) :: argc
-                                        integer(c_int),value,intent(in) :: initialize_mpi
-                                        integer(c_int),value,intent(in) :: comm_present
-                                        integer(c_int),value,intent(in) :: comm
-                                end function fstarpu_mpi_argcv_alloc
-
-                                subroutine fstarpu_mpi_argcv_set_arg(argcv, i, l, s) bind(C)
-                                        use iso_c_binding
-                                        implicit none
-                                        type(c_ptr),value,intent(in) :: argcv
-                                        integer(c_int),value,intent(in) :: i
-                                        integer(c_int),value,intent(in) :: l
-                                        character(c_char),intent(in) :: s
-                                end subroutine fstarpu_mpi_argcv_set_arg
-
-                                subroutine fstarpu_mpi_argcv_free(argcv) bind(C)
-                                        use iso_c_binding
-                                        implicit none
-                                        type(c_ptr),value,intent(in) :: argcv
-                                end subroutine fstarpu_mpi_argcv_free
-
-                                function fstarpu_mpi_init_c(argcv) bind(C)
-                                        use iso_c_binding
-                                        implicit none
-                                        integer(c_int) :: fstarpu_mpi_init_c
-                                        type(c_ptr),value,intent(in) :: argcv
-                                end function fstarpu_mpi_init_c
-                        end interface
-
-                        fargc = command_argument_count()
-                        write(*,*) "fargc",fargc
-                        if (present(mpi_comm)) then
-                                mpi_comm_present = 1
-                                mpi_comm_or_0 = mpi_comm
-                        else
-                                mpi_comm_present = 0
-                                mpi_comm_or_0 = 0
-                        end if
-                        write(*,*) "initialize_mpi",initialize_mpi
-                        write(*,*) "mpi_comm_present",mpi_comm_present
-                        argcv = fstarpu_mpi_argcv_alloc(fargc, initialize_mpi, mpi_comm_present, mpi_comm_or_0)
-                        do i=0,fargc-1
-                                call get_command_argument(i, farg_1, farg_len)
-                                allocate (character(len=farg_len) :: farg)
-                                call get_command_argument(i, farg)
-                                call fstarpu_mpi_argcv_set_arg(argcv, i, farg_len, farg)
-                                deallocate (farg)
-                        end do
-                        ret = fstarpu_mpi_init_c(argcv)
-                        call fstarpu_mpi_argcv_free(argcv)
-                        fstarpu_mpi_init = ret
-                end function fstarpu_mpi_init
-
-end module fstarpu_mpi_mod

+ 0 - 145
nmad/include/starpu_mpi.h

@@ -1,145 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012, 2014-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_H__
-#define __STARPU_MPI_H__
-
-#include <starpu.h>
-
-#if defined(STARPU_USE_MPI)
-
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-typedef void *starpu_mpi_req;
-
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status);
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
-int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
-int starpu_mpi_barrier(MPI_Comm comm);
-
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
-
-int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
-int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
-int starpu_mpi_initialize(void) STARPU_DEPRECATED;
-int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;
-int starpu_mpi_shutdown(void);
-
-struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-/* the function starpu_mpi_insert_task has the same semantics as starpu_mpi_task_insert, it is kept to avoid breaking old codes */
-int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-
-void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
-void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg);
-void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle);
-void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle);
-void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio);
-
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
-int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
-
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-
-void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
-
-void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle);
-void starpu_mpi_cache_flush_all_data(MPI_Comm comm);
-
-int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
-int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
-
-int starpu_mpi_comm_size(MPI_Comm comm, int *size);
-int starpu_mpi_comm_rank(MPI_Comm comm, int *rank);
-int starpu_mpi_world_rank(void);
-int starpu_mpi_world_size(void);
-
-int starpu_mpi_get_communication_tag(void);
-void starpu_mpi_set_communication_tag(int tag);
-
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm);
-#define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD)
-
-#define STARPU_MPI_PER_NODE -2
-
-void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm);
-#define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD)
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag);
-#define starpu_data_set_rank starpu_mpi_data_set_rank
-#define starpu_data_set_tag starpu_mpi_data_set_tag
-
-int starpu_mpi_data_get_rank(starpu_data_handle_t handle);
-int starpu_mpi_data_get_tag(starpu_data_handle_t handle);
-#define starpu_data_get_rank starpu_mpi_data_get_rank
-#define starpu_data_get_tag starpu_mpi_data_get_tag
-
-void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank);
-
-#define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1
-#define STARPU_MPI_NODE_SELECTION_MOST_R_DATA    0
-
-typedef int (*starpu_mpi_select_node_policy_func_t)(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data);
-int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func);
-int starpu_mpi_node_selection_unregister_policy(int policy);
-
-int starpu_mpi_node_selection_get_current_policy();
-int starpu_mpi_node_selection_set_current_policy(int policy);
-
-int starpu_mpi_cache_is_enabled();
-int starpu_mpi_cache_set(int enabled);
-
-int starpu_mpi_wait_for_all(MPI_Comm comm);
-
-typedef void (*starpu_mpi_datatype_allocate_func_t)(starpu_data_handle_t, MPI_Datatype *);
-typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *);
-int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
-int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
-
-int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *));
-int starpu_mpi_pre_submit_hook_unregister();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // STARPU_USE_MPI
-#endif // __STARPU_MPI_H__

+ 0 - 44
nmad/include/starpu_mpi_lb.h

@@ -1,44 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_LOAD_BALANCER_H__
-#define __STARPU_MPI_LOAD_BALANCER_H__
-
-#include <starpu.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-struct starpu_mpi_lb_conf
-{
-	void (*get_neighbors)(int **neighbor_ids, int *nneighbors);
-	void (*get_data_unit_to_migrate)(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node);
-};
-
-/* Inits the load balancer's environment with the load policy provided by the
- * user
- */
-void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *);
-void starpu_mpi_lb_shutdown();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_LOAD_BALANCER_H__

+ 0 - 29
nmad/libstarpumpi.pc.in

@@ -1,29 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2011, 2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012  CNRS
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: starpumpi
-Description: offers MPI support for heterogeneous multicore architecture
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ -DSTARPU_USE_DEPRECATED_API
-Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@
-Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@
-Requires: libstarpu
-Requires.private:

+ 0 - 105
nmad/src/Makefile.am

@@ -1,105 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2012  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-CC=$(MPICC)
-CCLD=$(MPICC)
-
-BUILT_SOURCES =
-
-CLEANFILES = *.gcno *.gcda *.linkinfo
-
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) $(NMAD_CFLAGS)
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS) $(NMAD_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS) $(NMAD_LDFLAGS)
-
-ldflags =
-
-if STARPU_HAVE_WINDOWS
-
-LC_MESSAGES=C
-export LC_MESSAGES
-
-ldflags += -Xlinker --output-def -Xlinker .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def
-
-if STARPU_HAVE_MS_LIB
-.libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib: libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la dolib
-	./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpumpi_so_version) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib
-all-local: .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib
-endif STARPU_HAVE_MS_LIB
-
-install-exec-hook:
-	$(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir)
-if STARPU_HAVE_MS_LIB
-	$(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir)
-	$(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir)
-endif STARPU_HAVE_MS_LIB
-
-endif STARPU_HAVE_WINDOWS
-
-lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined					\
-  -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) \
-  $(MPICC_LDFLAGS) $(FXT_LDFLAGS)
-noinst_HEADERS =					\
-	starpu_mpi_private.h				\
-	starpu_mpi_fxt.h				\
-	starpu_mpi_stats.h				\
-	starpu_mpi_datatype.h				\
-	starpu_mpi_cache.h				\
-	starpu_mpi_select_node.h			\
-	starpu_mpi_cache_stats.h			\
-	starpu_mpi_task_insert.h			\
-	starpu_mpi_init.h				\
-	mpi/starpu_mpi_early_data.h			\
-	mpi/starpu_mpi_early_request.h			\
-	mpi/starpu_mpi_sync_data.h			\
-	mpi/starpu_mpi_comm.h				\
-	mpi/starpu_mpi_tag.h				\
-	load_balancer/policy/data_movements_interface.h	\
-	load_balancer/policy/load_data_interface.h	\
-	load_balancer/policy/load_balancer_policy.h
-
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
-	starpu_mpi.c					\
-	starpu_mpi_helper.c				\
-	starpu_mpi_datatype.c				\
-	starpu_mpi_task_insert.c			\
-	starpu_mpi_collective.c				\
-	starpu_mpi_stats.c				\
-	starpu_mpi_private.c				\
-	starpu_mpi_cache.c				\
-	starpu_mpi_select_node.c			\
-	starpu_mpi_cache_stats.c			\
-	starpu_mpi_fortran.c				\
-	starpu_mpi_task_insert_fortran.c		\
-	starpu_mpi_init.c				\
-	nmad/starpu_mpi_nmad.c				\
-	mpi/starpu_mpi_mpi.c				\
-	mpi/starpu_mpi_early_data.c			\
-	mpi/starpu_mpi_early_request.c			\
-	mpi/starpu_mpi_sync_data.c			\
-	mpi/starpu_mpi_comm.c				\
-	mpi/starpu_mpi_tag.c				\
-	load_balancer/policy/data_movements_interface.c	\
-	load_balancer/policy/load_data_interface.c	\
-	load_balancer/policy/load_heat_propagation.c	\
-	load_balancer/load_balancer.c
-
-showcheck:
-	-cat /dev/null

+ 0 - 161
nmad/src/load_balancer/load_balancer.c

@@ -1,161 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include <starpu_scheduler.h>
-#include <common/utils.h>
-#include <common/config.h>
-
-#include <starpu_mpi_lb.h>
-#include "policy/load_balancer_policy.h"
-
-#if defined(STARPU_USE_MPI_MPI)
-
-static struct load_balancer_policy *defined_policy = NULL;
-typedef void (*_post_exec_hook_func_t)(struct starpu_task *task, unsigned sched_ctx_id);
-static _post_exec_hook_func_t saved_post_exec_hook[STARPU_NMAX_SCHED_CTXS];
-
-static void post_exec_hook_wrapper(struct starpu_task *task, unsigned sched_ctx_id)
-{
-	//fprintf(stderr,"I am called ! \n");
-	if (defined_policy && defined_policy->finished_task_entry_point)
-		defined_policy->finished_task_entry_point();
-	if (saved_post_exec_hook[sched_ctx_id])
-		saved_post_exec_hook[sched_ctx_id](task, sched_ctx_id);
-}
-
-static struct load_balancer_policy *predefined_policies[] =
-{
-	&load_heat_propagation_policy,
-	NULL
-};
-
-void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *itf)
-{
-	int ret;
-
-	const char *policy_name = starpu_getenv("STARPU_MPI_LB");
-	if (!policy_name)
-		policy_name = lb_policy_name;
-
-	if (!policy_name || (strcmp(policy_name, "help") == 0))
-	{
-		_STARPU_MSG("Warning : load balancing is disabled for this run.\n");
-		_STARPU_MSG("Use the STARPU_MPI_LB = <name> environment variable to use a load balancer.\n");
-		_STARPU_MSG("Available load balancers :\n");
-		struct load_balancer_policy **policy;
-		for(policy=predefined_policies ; *policy!=NULL ; policy++)
-		{
-			struct load_balancer_policy *p = *policy;
-			fprintf(stderr," - %s\n", p->policy_name);
-		}
-		return;
-	}
-
-	if (policy_name)
-	{
-		struct load_balancer_policy **policy;
-		for(policy=predefined_policies ; *policy!=NULL ; policy++)
-		{
-			struct load_balancer_policy *p = *policy;
-			if (p->policy_name)
-			{
-				if (strcmp(policy_name, p->policy_name) == 0)
-				{
-					/* we found a policy with the requested name */
-					defined_policy = p;
-					break;
-				}
-			}
-		}
-	}
-
-	if (!defined_policy)
-	{
-		_STARPU_MSG("Error : no load balancer with the name %s. Load balancing will be disabled for this run.\n", policy_name);
-		return;
-	}
-
-	ret = defined_policy->init(itf);
-	if (ret != 0)
-	{
-		_STARPU_MSG("Error (%d) in %s->init: invalid starpu_mpi_lb_conf. Load balancing will be disabled for this run.\n", ret, defined_policy->policy_name);
-		return;
-	}
-
-	/* starpu_register_hook(submitted_task, defined_policy->submitted_task_entry_point); */
-	if (defined_policy->submitted_task_entry_point)
-		starpu_mpi_pre_submit_hook_register(defined_policy->submitted_task_entry_point);
-
-	/* starpu_register_hook(finished_task, defined_policy->finished_task_entry_point); */
-	if (defined_policy->finished_task_entry_point)
-	{
-		int i;
-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		{
-			struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i);
-			if (sched_policy)
-			{
-				_STARPU_DEBUG("Setting post_exec_hook for scheduling context %d %s (%d)\n", i, sched_policy->policy_name, STARPU_NMAX_SCHED_CTXS);
-				saved_post_exec_hook[i] = sched_policy->post_exec_hook;
-				sched_policy->post_exec_hook = post_exec_hook_wrapper;
-			}
-			else
-				saved_post_exec_hook[i] = NULL;
-		}
-	}
-
-	return;
-}
-
-void starpu_mpi_lb_shutdown()
-{
-	if (!defined_policy)
-		return;
-
-	int ret = defined_policy->deinit();
-	if (ret != 0)
-	{
-		_STARPU_MSG("Error (%d) in %s->deinit\n", ret, defined_policy->policy_name);
-		return;
-	}
-
-	/* starpu_unregister_hook(submitted_task, defined_policy->submitted_task_entry_point); */
-	if (defined_policy->submitted_task_entry_point)
-		starpu_mpi_pre_submit_hook_unregister();
-
-	/* starpu_unregister_hook(finished_task, defined_policy->finished_task_entry_point); */
-	if (defined_policy->finished_task_entry_point)
-	{
-		int i;
-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		{
-			if (saved_post_exec_hook[i])
-			{
-				struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i);
-				sched_policy->post_exec_hook = saved_post_exec_hook[i];
-				saved_post_exec_hook[i] = NULL;
-			}
-		}
-	}
-	defined_policy = NULL;
-}
-
-#endif /* STARPU_USE_MPI_MPI */

+ 0 - 286
nmad/src/load_balancer/policy/data_movements_interface.c

@@ -1,286 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <starpu_mpi_private.h>
-#include <common/config.h>
-
-#include "data_movements_interface.h"
-
-#if defined(STARPU_USE_MPI_MPI)
-
-int **data_movements_get_ref_tags_table(starpu_data_handle_t handle)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	if (dm_interface->tags)
-		return &dm_interface->tags;
-	else
-		return NULL;
-}
-
-int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	if (dm_interface->ranks)
-		return &dm_interface->ranks;
-	else
-		return NULL;
-}
-
-int *data_movements_get_tags_table(starpu_data_handle_t handle)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return dm_interface->tags;
-}
-
-int *data_movements_get_ranks_table(starpu_data_handle_t handle)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return dm_interface->ranks;
-}
-
-int data_movements_get_size_tables(starpu_data_handle_t handle)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return dm_interface->size;
-}
-
-int data_movements_reallocate_tables(starpu_data_handle_t handle, int size)
-{
-	struct data_movements_interface *dm_interface =
-		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	if (dm_interface->size)
-	{
-		STARPU_ASSERT(dm_interface->tags);
-		free(dm_interface->tags);
-		dm_interface->tags = NULL;
-
-		STARPU_ASSERT(dm_interface->ranks);
-		free(dm_interface->ranks);
-		dm_interface->ranks = NULL;
-	}
-	else
-	{
-		STARPU_ASSERT(!dm_interface->tags);
-		STARPU_ASSERT(!dm_interface->ranks);
-	}
-
-	dm_interface->size = size;
-
-	if (dm_interface->size)
-	{
-		_STARPU_MPI_MALLOC(dm_interface->tags, size*sizeof(int));
-		_STARPU_MPI_MALLOC(dm_interface->ranks, size*sizeof(int));
-	}
-
-	return 0 ;
-}
-
-static void data_movements_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
-{
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
-
-	unsigned node;
-	for (node = 0; node < STARPU_MAXNODES; node++)
-	{
-		struct data_movements_interface *local_interface = (struct data_movements_interface *)
-			starpu_data_get_interface_on_node(handle, node);
-
-		local_interface->size = dm_interface->size;
-		if (node == home_node)
-		{
-			local_interface->tags = dm_interface->tags;
-			local_interface->ranks = dm_interface->ranks;
-		}
-		else
-		{
-			local_interface->tags = NULL;
-			local_interface->ranks = NULL;
-		}
-	}
-}
-
-static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node)
-{
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
-
-	int *addr_tags = NULL;
-	int *addr_ranks = NULL;
-	starpu_ssize_t requested_memory = dm_interface->size * sizeof(int);
-
-	addr_tags = (int*) starpu_malloc_on_node(node, requested_memory);
-	if (!addr_tags)
-		goto fail_tags;
-	addr_ranks = (int*) starpu_malloc_on_node(node, requested_memory);
-	if (!addr_ranks)
-		goto fail_ranks;
-
-	/* update the data properly in consequence */
-	dm_interface->tags = addr_tags;
-	dm_interface->ranks = addr_ranks;
-
-	return 2*requested_memory;
-
-fail_ranks:
-	starpu_free_on_node(node, (uintptr_t) addr_tags, requested_memory);
-fail_tags:
-	return -ENOMEM;
-}
-
-static void data_movements_free_data_on_node(void *data_interface, unsigned node)
-{
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
-	starpu_ssize_t requested_memory = dm_interface->size * sizeof(int);
-
-	starpu_free_on_node(node, (uintptr_t) dm_interface->tags, requested_memory);
-	starpu_free_on_node(node, (uintptr_t) dm_interface->ranks, requested_memory);
-}
-
-static size_t data_movements_get_size(starpu_data_handle_t handle)
-{
-	size_t size;
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	size = (dm_interface->size * 2 * sizeof(int)) + sizeof(int);
-	return size;
-}
-
-static uint32_t data_movements_footprint(starpu_data_handle_t handle)
-{
-	return starpu_hash_crc32c_be(data_movements_get_size(handle), 0);
-}
-
-static int data_movements_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
-{
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *)
-		starpu_data_get_interface_on_node(handle, node);
-
-	*count = data_movements_get_size(handle);
-	if (ptr != NULL)
-	{
-		char *data;
-		starpu_malloc_flags((void**) &data, *count, 0);
-		assert(data);
-		*ptr = data;
-		memcpy(data, &dm_interface->size, sizeof(int));
-		if (dm_interface->size)
-		{
-			memcpy(data+sizeof(int), dm_interface->tags, (dm_interface->size*sizeof(int)));
-			memcpy(data+sizeof(int)+(dm_interface->size*sizeof(int)), dm_interface->ranks, dm_interface->size*sizeof(int));
-		}
-	}
-
-	return 0;
-}
-
-static int data_movements_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
-{
-	char *data = ptr;
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct data_movements_interface *dm_interface = (struct data_movements_interface *)
-		starpu_data_get_interface_on_node(handle, node);
-
-	int size = 0;
-	memcpy(&size, data, sizeof(int));
-	STARPU_ASSERT(count == (2 * size * sizeof(int)) + sizeof(int));
-
-	data_movements_reallocate_tables(handle, size);
-
-	if (dm_interface->size)
-	{
-		memcpy(dm_interface->tags, data+sizeof(int), dm_interface->size*sizeof(int));
-		memcpy(dm_interface->ranks, data+sizeof(int)+(dm_interface->size*sizeof(int)), dm_interface->size*sizeof(int));
-	}
-
-    return 0;
-}
-
-static int copy_any_to_any(void *src_interface, unsigned src_node,
-			   void *dst_interface, unsigned dst_node,
-			   void *async_data)
-{
-	struct data_movements_interface *src_data_movements = src_interface;
-	struct data_movements_interface *dst_data_movements = dst_interface;
-	int ret = 0;
-
-	if (starpu_interface_copy((uintptr_t) src_data_movements->tags, 0, src_node,
-				    (uintptr_t) dst_data_movements->tags, 0, dst_node,
-				     src_data_movements->size*sizeof(int),
-				     async_data))
-		ret = -EAGAIN;
-	if (starpu_interface_copy((uintptr_t) src_data_movements->ranks, 0, src_node,
-				    (uintptr_t) dst_data_movements->ranks, 0, dst_node,
-				     src_data_movements->size*sizeof(int),
-				     async_data))
-		ret = -EAGAIN;
-	return ret;
-}
-
-static const struct starpu_data_copy_methods data_movements_copy_methods =
-{
-	.any_to_any = copy_any_to_any
-};
-
-static struct starpu_data_interface_ops interface_data_movements_ops =
-{
-	.register_data_handle = data_movements_register_data_handle,
-	.allocate_data_on_node = data_movements_allocate_data_on_node,
-	.free_data_on_node = data_movements_free_data_on_node,
-	.copy_methods = &data_movements_copy_methods,
-	.get_size = data_movements_get_size,
-	.footprint = data_movements_footprint,
-	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
-	.interface_size = sizeof(struct data_movements_interface),
-	.handle_to_pointer = NULL,
-	.pack_data = data_movements_pack_data,
-	.unpack_data = data_movements_unpack_data,
-	.describe = NULL
-};
-
-void data_movements_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *tags, int *ranks, int size)
-{
-	struct data_movements_interface data_movements =
-	{
-		.tags = tags,
-		.ranks = ranks,
-		.size = size
-	};
-
-	if (interface_data_movements_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
-	{
-		interface_data_movements_ops.interfaceid = starpu_data_interface_get_next_id();
-	}
-
-	starpu_data_register(handleptr, home_node, &data_movements, &interface_data_movements_ops);
-}
-
-#endif

+ 0 - 48
nmad/src/load_balancer/policy/data_movements_interface.h

@@ -1,48 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-
-#ifndef __DATA_MOVEMENTS_INTERFACE_H
-#define __DATA_MOVEMENTS_INTERFACE_H
-
-/* interface for data_movements */
-struct data_movements_interface
-{
-	/* Data tags table */
-	int *tags;
-	/* Ranks table (where to move the corresponding data) */
-	int *ranks;
-	/* Size of the tables */
-	int size;
-};
-
-void data_movements_data_register(starpu_data_handle_t *handle, unsigned home_node, int *ranks, int *tags, int size);
-
-int **data_movements_get_ref_tags_table(starpu_data_handle_t handle);
-int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle);
-int data_movements_reallocate_tables(starpu_data_handle_t handle, int size);
-
-int *data_movements_get_tags_table(starpu_data_handle_t handle);
-int *data_movements_get_ranks_table(starpu_data_handle_t handle);
-int data_movements_get_size_tables(starpu_data_handle_t handle);
-
-#define DATA_MOVEMENTS_GET_SIZE_TABLES(interface)	(((struct data_movements_interface *)(interface))->size)
-#define DATA_MOVEMENTS_GET_TAGS_TABLE(interface)	(((struct data_movements_interface *)(interface))->tags)
-#define DATA_MOVEMENTS_GET_RANKS_TABLE(interface)	(((struct data_movements_interface *)(interface))->ranks)
-
-#endif /* __DATA_MOVEMENTS_INTERFACE_H */

+ 0 - 53
nmad/src/load_balancer/policy/load_balancer_policy.h

@@ -1,53 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __LOAD_BALANCER_POLICY_H__
-#define __LOAD_BALANCER_POLICY_H__
-
-#include <starpu_mpi_lb.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/* A load balancer consists in a collection of operations on a data
- * representing the load of the application (in terms of computation, memory,
- * whatever). StarPU allows several entry points for the user. The load
- * balancer allows the user to give its load balancing methods to be used on
- * these entry points of the runtime system. */
-struct load_balancer_policy
-{
-	int (*init)(struct starpu_mpi_lb_conf *);
-	int (*deinit)();
-	void (*submitted_task_entry_point)();
-	void (*finished_task_entry_point)();
-
-	/* Name of the load balancing policy. The selection of the load balancer is
-	 * performed through the use of the STARPU_MPI_LB=name environment
-	 * variable.
-	 */
-	const char *policy_name;
-};
-
-extern struct load_balancer_policy load_heat_propagation_policy;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __LOAD_BALANCER_POLICY_H__

+ 0 - 274
nmad/src/load_balancer/policy/load_data_interface.c

@@ -1,274 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <common/config.h>
-
-#include "load_data_interface.h"
-
-#if defined(STARPU_USE_MPI_MPI)
-
-int load_data_get_sleep_threshold(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->sleep_task_threshold;
-}
-
-int load_data_get_wakeup_threshold(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->wakeup_task_threshold;
-}
-
-int load_data_get_current_phase(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->phase;
-}
-
-int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->nsubmitted_tasks;
-}
-
-int load_data_get_nfinished_tasks(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->nfinished_tasks;
-}
-
-int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	(ld_interface->nsubmitted_tasks)++;
-
-	return 0;
-}
-
-int load_data_inc_nfinished_tasks(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	(ld_interface->nfinished_tasks)++;
-
-	return 0;
-}
-
-int load_data_next_phase(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	ld_interface->phase++;
-
-	return 0;
-}
-
-int load_data_update_elapsed_time(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	ld_interface->elapsed_time = starpu_timing_now() - ld_interface->start;
-
-	return 0;
-}
-
-double load_data_get_elapsed_time(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return ld_interface->elapsed_time;
-}
-
-int load_data_update_wakeup_cond(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	int previous_threshold = ld_interface->wakeup_task_threshold;
-	ld_interface->wakeup_task_threshold += (ld_interface->nsubmitted_tasks - previous_threshold) * ld_interface->wakeup_ratio;
-
-	return 0;
-}
-
-int load_data_wakeup_cond(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-
-	return (ld_interface->wakeup_task_threshold > 0) && (ld_interface->nfinished_tasks == ld_interface->wakeup_task_threshold);
-}
-
-static void load_data_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
-{
-	(void) home_node;
-	struct load_data_interface *ld_interface = (struct load_data_interface *) data_interface;
-
-	unsigned node;
-	for (node = 0; node < STARPU_MAXNODES; node++)
-	{
-		struct load_data_interface *local_interface = (struct load_data_interface *)
-			starpu_data_get_interface_on_node(handle, node);
-
-		local_interface->start = ld_interface->start;
-		local_interface->elapsed_time = ld_interface->elapsed_time;
-		local_interface->phase = ld_interface->phase;
-		local_interface->nsubmitted_tasks = ld_interface->nsubmitted_tasks;
-		local_interface->nfinished_tasks = ld_interface->nsubmitted_tasks;
-		local_interface->wakeup_task_threshold = ld_interface->wakeup_task_threshold;
-		local_interface->wakeup_ratio = ld_interface->wakeup_ratio;
-		local_interface->sleep_task_threshold = ld_interface->sleep_task_threshold;
-	}
-}
-
-static starpu_ssize_t load_data_allocate_data_on_node(void *data_interface, unsigned node)
-{
-	(void) data_interface;
-	(void) node;
-
-	return 0;
-}
-
-static void load_data_free_data_on_node(void *data_interface, unsigned node)
-{
-	(void) data_interface;
-	(void) node;
-}
-
-static size_t load_data_get_size(starpu_data_handle_t handle)
-{
-	(void) handle;
-	return sizeof(struct load_data_interface);
-}
-
-static uint32_t load_data_footprint(starpu_data_handle_t handle)
-{
-	struct load_data_interface *ld_interface =
-		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-	return starpu_hash_crc32c_be(ld_interface->start,
-				     starpu_hash_crc32c_be(ld_interface->elapsed_time,
-							   starpu_hash_crc32c_be(ld_interface->nsubmitted_tasks,
-										 starpu_hash_crc32c_be(ld_interface->sleep_task_threshold, ld_interface->wakeup_task_threshold))));
-}
-
-static int load_data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
-{
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct load_data_interface *ld_interface = (struct load_data_interface *)
-		starpu_data_get_interface_on_node(handle, node);
-
-	*count = load_data_get_size(handle);
-	if (ptr != NULL)
-	{
-		char *data;
-		starpu_malloc_flags((void**) &data, *count, 0);
-		*ptr = data;
-		memcpy(data, ld_interface, *count);
-	}
-
-	return 0;
-}
-
-static int load_data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
-{
-	char *data = ptr;
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct load_data_interface *ld_interface = (struct load_data_interface *)
-		starpu_data_get_interface_on_node(handle, node);
-
-	STARPU_ASSERT(count == sizeof(struct load_data_interface));
-	memcpy(ld_interface, data, count);
-
-	return 0;
-}
-
-static int copy_any_to_any(void *src_interface, unsigned src_node,
-			   void *dst_interface, unsigned dst_node,
-			   void *async_data)
-{
-	(void) src_interface;
-	(void) dst_interface;
-	(void) src_node;
-	(void) dst_node;
-	(void) async_data;
-
-	return 0;
-}
-
-static const struct starpu_data_copy_methods load_data_copy_methods =
-{
-	.any_to_any = copy_any_to_any
-};
-
-static struct starpu_data_interface_ops interface_load_data_ops =
-{
-	.register_data_handle = load_data_register_data_handle,
-	.allocate_data_on_node = load_data_allocate_data_on_node,
-	.free_data_on_node = load_data_free_data_on_node,
-	.copy_methods = &load_data_copy_methods,
-	.get_size = load_data_get_size,
-	.footprint = load_data_footprint,
-	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
-	.interface_size = sizeof(struct load_data_interface),
-	.handle_to_pointer = NULL,
-	.pack_data = load_data_pack_data,
-	.unpack_data = load_data_unpack_data,
-	.describe = NULL
-};
-
-void load_data_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int sleep_task_threshold, double wakeup_ratio)
-{
-	struct load_data_interface load_data =
-	{
-		.start = starpu_timing_now(),
-		.elapsed_time = 0,
-		.phase = 0,
-		.nsubmitted_tasks = 0,
-		.nfinished_tasks = 0,
-		.sleep_task_threshold = sleep_task_threshold,
-		.wakeup_task_threshold = 0,
-		.wakeup_ratio = wakeup_ratio
-	};
-
-	if (interface_load_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
-	{
-		interface_load_data_ops.interfaceid = starpu_data_interface_get_next_id();
-	}
-
-	starpu_data_register(handleptr, home_node, &load_data, &interface_load_data_ops);
-}
-
-#endif

+ 0 - 70
nmad/src/load_balancer/policy/load_data_interface.h

@@ -1,70 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-
-#ifndef __LOAD_DATA_INTERFACE_H
-#define __LOAD_DATA_INTERFACE_H
-
-/* interface for load_data */
-struct load_data_interface
-{
-	/* Starting time of the execution */
-	double start;
-	/* Elapsed time until the start time and the time when event "launch a load
-	 * balancing phase" is triggered */
-	double elapsed_time;
-	/* Current submission phase, i.e how many balanced steps have already
-	 * happened so far. */
-	int phase;
-	/* Number of currently submitted tasks */
-	int nsubmitted_tasks;
-	/* Number of currently finished tasks */
-	int nfinished_tasks;
-	/* Task threshold to sleep the submission thread */
-	int sleep_task_threshold;
-	/* Task threshold to wake-up the submission thread */
-	int wakeup_task_threshold;
-	/* Ratio of submitted tasks to wait for completion before waking up the
-	 * submission thread */
-	double wakeup_ratio;
-};
-
-void load_data_data_register(starpu_data_handle_t *handle, unsigned home_node, int sleep_task_threshold, double wakeup_ratio);
-
-int load_data_get_sleep_threshold(starpu_data_handle_t handle);
-int load_data_get_wakeup_threshold(starpu_data_handle_t handle);
-int load_data_get_current_phase(starpu_data_handle_t handle);
-int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle);
-int load_data_get_nfinished_tasks(starpu_data_handle_t handle);
-
-int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle);
-int load_data_inc_nfinished_tasks(starpu_data_handle_t handle);
-
-int load_data_next_phase(starpu_data_handle_t handle);
-
-int load_data_update_elapsed_time(starpu_data_handle_t handle);
-double load_data_get_elapsed_time(starpu_data_handle_t handle);
-
-int load_data_update_wakeup_cond(starpu_data_handle_t handle);
-int load_data_wakeup_cond(starpu_data_handle_t handle);
-
-#define LOAD_DATA_GET_NSUBMITTED_TASKS(interface)	(((struct load_data_interface *)(interface))->nsubmitted_tasks)
-#define LOAD_DATA_GET_SLEEP_THRESHOLD(interface)	(((struct load_data_interface *)(interface))->sleep_task_threshold)
-#define LOAD_DATA_GET_WAKEUP_THRESHOLD(interface)	(((struct load_data_interface *)(interface))->wakeup_task_threshold)
-
-#endif /* __LOAD_DATA_INTERFACE_H */

+ 0 - 643
nmad/src/load_balancer/policy/load_heat_propagation.c

@@ -1,643 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <mpi/starpu_mpi_tag.h>
-#include <common/uthash.h>
-#include <common/utils.h>
-#include <math.h>
-#include <starpu_mpi_private.h>
-#include "load_balancer_policy.h"
-#include "data_movements_interface.h"
-#include "load_data_interface.h"
-#include <common/config.h>
-
-#if defined(STARPU_USE_MPI_MPI)
-
-static int TAG_LOAD(int n)
-{
-	return (n+1) << 24;
-}
-
-static int TAG_MOV(int n)
-{
-	return (n+1) << 20;
-}
-
-/* Hash table of local pieces of data that has been moved out of the local MPI
- * node by the load balancer. All of these pieces of data must be migrated back
- * to the local node at the end of the execution. */
-struct moved_data_entry
-{
-	UT_hash_handle hh;
-	starpu_data_handle_t handle;
-};
-
-static struct moved_data_entry *mdh = NULL;
-
-static starpu_pthread_mutex_t load_data_mutex;
-static starpu_pthread_cond_t load_data_cond;
-
-/* MPI infos */
-static int my_rank;
-static int world_size;
-
-/* Number of neighbours of the local MPI node and their IDs. These are given by
- * the get_neighbors() method, and thus can be easily changed. */
-static int *neighbor_ids = NULL;
-static int nneighbors = 0;
-
-/* Local load data */
-static starpu_data_handle_t *load_data_handle = NULL;
-static starpu_data_handle_t *load_data_handle_cpy = NULL;
-/* Load data of neighbours */
-static starpu_data_handle_t *neighbor_load_data_handles = NULL;
-
-/* Table which contains a data_movements_handle for each MPI node of
- * MPI_COMM_WORLD. Since all the MPI nodes must be advised of any data
- * movement, this table will be used to perform communications of data
- * movements handles following an all-to-all model. */
-static starpu_data_handle_t *data_movements_handles = NULL;
-
-/* Load balancer interface which contains the application-specific methods for
- * the load balancer to use. */
-static struct starpu_mpi_lb_conf *user_itf = NULL;
-
-static double time_threshold = 20000;
-
-/******************************************************************************
- *                              Balancing                                     *
- *****************************************************************************/
-
-
-/* Decides which data has to move where, and fills the
- * data_movements_handles[my_rank] data handle from that.
- * In data :
- *  - local load_data_handle
- *  - nneighbors
- *  - neighbor_ids[nneighbors]
- *  - neighbor_load_data_handles[nneighbors]
- * Out data :
- *  - data_movements_handles[my_rank]
- */
-
-static void balance(starpu_data_handle_t load_data_cpy)
-{
-	int less_loaded = -1;
-	int n;
-	double ref_elapsed_time;
-	double my_elapsed_time = load_data_get_elapsed_time(load_data_cpy);
-
-	/* Search for the less loaded neighbor */
-	ref_elapsed_time = my_elapsed_time;
-	for (n = 0; n < nneighbors; n++)
-	{
-		double elapsed_time = load_data_get_elapsed_time(neighbor_load_data_handles[n]);
-		if (ref_elapsed_time > elapsed_time)
-		{
-			//fprintf(stderr,"Node%d: ref local time %lf vs neighbour%d time %lf\n", my_rank, ref_elapsed_time, neighbor_ids[n], elapsed_time);
-			less_loaded = neighbor_ids[n];
-			ref_elapsed_time = elapsed_time;
-		}
-	}
-
-	/* We found it */
-	if (less_loaded >= 0)
-	{
-		_STARPU_DEBUG("Less loaded found on node %d : %d\n", my_rank, less_loaded);
-		double diff_time = my_elapsed_time - ref_elapsed_time;
-		/* If the difference is higher than a time threshold, we move
-		 * one data to the less loaded neighbour. */
-		/* TODO: How to decide the time threshold ? */
-		if ((time_threshold > 0) && (diff_time >= time_threshold))
-		{
-			starpu_data_handle_t *handles = NULL;
-			int nhandles = 0;
-			user_itf->get_data_unit_to_migrate(&handles, &nhandles, less_loaded);
-
-			data_movements_reallocate_tables(data_movements_handles[my_rank], nhandles);
-
-			if (nhandles)
-			{
-				int *tags = data_movements_get_tags_table(data_movements_handles[my_rank]);
-				int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]);
-
-				for (n = 0; n < nhandles; n++)
-				{
-					tags[n] = starpu_mpi_data_get_tag(handles[n]);
-					ranks[n] = less_loaded;
-				}
-
-				free(handles);
-			}
-		}
-		else
-			data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
-	}
-	else
-		data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
-}
-
-static void exchange_load_data_infos(starpu_data_handle_t load_data_cpy)
-{
-	int i;
-
-	/* Allocate all requests and status for point-to-point communications */
-	starpu_mpi_req load_send_req[nneighbors];
-	starpu_mpi_req load_recv_req[nneighbors];
-
-	MPI_Status load_send_status[nneighbors];
-	MPI_Status load_recv_status[nneighbors];
-
-	int flag;
-
-	/* Send the local load data to neighbour nodes, and receive the remote load
-	 * data from neighbour nodes */
-	for (i = 0; i < nneighbors; i++)
-	{
-		//_STARPU_DEBUG("[node %d] sending and receiving with %i-th neighbor %i\n", my_rank, i, neighbor_ids[i]);
-		starpu_mpi_isend(load_data_cpy, &load_send_req[i], neighbor_ids[i], TAG_LOAD(my_rank), MPI_COMM_WORLD);
-		starpu_mpi_irecv(neighbor_load_data_handles[i], &load_recv_req[i], neighbor_ids[i], TAG_LOAD(neighbor_ids[i]), MPI_COMM_WORLD);
-	}
-
-	/* Wait for completion of all send requests */
-	for (i = 0; i < nneighbors; i++)
-	{
-		flag = 0;
-		while (!flag)
-			starpu_mpi_test(&load_send_req[i], &flag, &load_send_status[i]);
-	}
-
-	/* Wait for completion of all receive requests */
-	for (i = 0; i < nneighbors; i++)
-	{
-		flag = 0;
-		while (!flag)
-			starpu_mpi_test(&load_recv_req[i], &flag, &load_recv_status[i]);
-	}
-}
-
-static void exchange_data_movements_infos()
-{
-	int i;
-
-	/* Allocate all requests and status for point-to-point communications */
-	starpu_mpi_req data_movements_send_req[world_size];
-	starpu_mpi_req data_movements_recv_req[world_size];
-
-	MPI_Status data_movements_send_status[world_size];
-	MPI_Status data_movements_recv_status[world_size];
-
-	int flag;
-
-	/* Send the new ranks of local data to all other nodes, and receive the new
-	 * ranks of all remote data from all other nodes */
-	for (i = 0; i < world_size; i++)
-	{
-		if (i != my_rank)
-		{
-			//_STARPU_DEBUG("[node %d] Send and receive data movement with %d\n", my_rank, i);
-			starpu_mpi_isend(data_movements_handles[my_rank], &data_movements_send_req[i], i, TAG_MOV(my_rank), MPI_COMM_WORLD);
-			starpu_mpi_irecv(data_movements_handles[i], &data_movements_recv_req[i], i, TAG_MOV(i), MPI_COMM_WORLD);
-		}
-	}
-
-	/* Wait for completion of all send requests */
-	for (i = 0; i < world_size; i++)
-	{
-		if (i != my_rank)
-		{
-			//fprintf(stderr,"Wait for sending data movement of %d to %d\n", my_rank, i);
-			flag = 0;
-			while (!flag)
-				starpu_mpi_test(&data_movements_send_req[i], &flag, &data_movements_send_status[i]);
-		}
-	}
-
-	/* Wait for completion of all receive requests */
-	for (i = 0; i < world_size; i++)
-	{
-		if (i != my_rank)
-		{
-			//fprintf(stderr,"Wait for recieving data movement from %d on %d\n", i, my_rank);
-			flag = 0;
-			while (!flag)
-				starpu_mpi_test(&data_movements_recv_req[i], &flag, &data_movements_recv_status[i]);
-		}
-	}
-}
-
-static void update_data_ranks()
-{
-	int i,j;
-
-	/* Update the new ranks for all concerned data */
-	for (i = 0; i < world_size; i++)
-	{
-		int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]);
-		if (ndata_to_update)
-		{
-			//fprintf(stderr,"Update %d data from table %d on node %d\n", ndata_to_update, i, my_rank);
-
-			for (j = 0; j < ndata_to_update; j++)
-			{
-				starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]);
-				STARPU_ASSERT(handle);
-				int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j];
-
-				/* Save the fact that the data has been moved out of this node */
-				if (i == my_rank)
-				{
-					struct moved_data_entry *md;
-					_STARPU_MPI_MALLOC(md, sizeof(struct moved_data_entry));
-					md->handle = handle;
-					HASH_ADD_PTR(mdh, handle, md);
-				}
-				else if (dst_rank == my_rank)
-				{
-					/* The data has been moved out, and now is moved back, so
-					 * update the state of the moved_data hash table to reflect
-					 * this change */
-					struct moved_data_entry *md = NULL;
-					HASH_FIND_PTR(mdh, &handle, md);
-					if (md)
-					{
-						HASH_DEL(mdh, md);
-						free(md);
-					}
-				}
-
-				//if (i == my_rank)
-				//{
-				//    if (dst_rank != my_rank)
-				//        fprintf(stderr,"Move data %p (tag %d) from node %d to node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], my_rank, dst_rank);
-				//    else
-				//        fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank);
-				//}
-
-				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
-
-				/* Migrate the data handle */
-				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
-
-				_STARPU_DEBUG("New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
-				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
-			}
-		}
-	}
-}
-
-static void clean_balance()
-{
-	int i;
-	starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy);
-	for (i = 0; i < nneighbors; i++)
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]);
-	for (i = 0; i < world_size; i++)
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]);
-}
-
-/* Core function of the load balancer. Computes from the load_data_cpy handle a
- * load balancing of the work to come (if needed), perform the necessary data
- * communications and negociate with the other nodes the rebalancing. */
-static void heat_balance(starpu_data_handle_t load_data_cpy)
-{
-	/* Exchange load data handles with neighboring nodes */
-	exchange_load_data_infos(load_data_cpy);
-
-	/* Determine if this node should sent data to other nodes :
-	 * which ones, how much data */
-	balance(load_data_cpy);
-
-	/* Exchange data movements with neighboring nodes */
-	exchange_data_movements_infos();
-
-	/* Perform data movements */
-	update_data_ranks();
-
-	/* Clean the data handles to properly launch the next balance phase */
-	clean_balance();
-}
-
-/******************************************************************************
- *                      Heat Load Balancer Entry Points                       *
- *****************************************************************************/
-
-static void submitted_task_heat(struct starpu_task *task)
-{
-	load_data_inc_nsubmitted_tasks(*load_data_handle);
-	//if (load_data_get_nsubmitted_tasks(*load_data_handle) > task->tag_id)
-	//{
-	//    fprintf(stderr,"Error : nsubmitted_tasks (%d) > tag_id (%lld) ! \n", load_data_get_nsubmitted_tasks(*load_data_handle), (long long int)task->tag_id);
-	//    STARPU_ASSERT(0);
-	//}
-
-	int phase = load_data_get_current_phase(*load_data_handle);
-	/* Numbering of tasks in StarPU-MPI should be given by the application with
-	 * the STARPU_TAG_ONLY insert task option for now. */
-	/* TODO: Properly implement a solution for numbering tasks in StarPU-MPI */
-	if ((task->tag_id / load_data_get_sleep_threshold(*load_data_handle)) > phase)
-	{
-		STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex);
-		load_data_update_wakeup_cond(*load_data_handle);
-		//fprintf(stderr,"Node %d sleep on tag %lld\n", my_rank, (long long int)task->tag_id);
-		//if (load_data_get_nsubmitted_tasks(*load_data_handle) < load_data_get_wakeup_threshold(*load_data_handle))
-		//{
-		//    fprintf(stderr,"Error : nsubmitted_tasks (%d) lower than wakeup_threshold (%d) !\n", load_data_get_nsubmitted_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle));
-		//    STARPU_ASSERT(0);
-		//}
-
-		if (load_data_get_wakeup_threshold(*load_data_handle) > load_data_get_nfinished_tasks(*load_data_handle))
-			STARPU_PTHREAD_COND_WAIT(&load_data_cond, &load_data_mutex);
-
-		load_data_next_phase(*load_data_handle);
-
-		/* Register a copy of the load data at this moment, to allow to compute
-		 * the heat balance while not locking the load data during the whole
-		 * balance step, which could cause all the workers to wait on the lock
-		 * to update the data. */
-		struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle);
-		void* itf_src = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM);
-		void* itf_dst = starpu_data_get_interface_on_node(*load_data_handle_cpy, STARPU_MAIN_RAM);
-		memcpy(itf_dst, itf_src, itf_load_data->interface_size);
-
-		_STARPU_DEBUG("[node %d] Balance phase %d\n", my_rank, load_data_get_current_phase(*load_data_handle));
-		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
-
-		heat_balance(*load_data_handle_cpy);
-	}
-}
-
-static void finished_task_heat()
-{
-	//fprintf(stderr,"Try to decrement nsubmitted_tasks...");
-	STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex);
-
-	load_data_inc_nfinished_tasks(*load_data_handle);
-	//fprintf(stderr,"Decrement nsubmitted_tasks, now %d\n", load_data_get_nsubmitted_tasks(*load_data_handle));
-	if (load_data_wakeup_cond(*load_data_handle))
-	{
-		//fprintf(stderr,"Wakeup ! nfinished_tasks = %d, wakeup_threshold = %d\n", load_data_get_nfinished_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle));
-		load_data_update_elapsed_time(*load_data_handle);
-		STARPU_PTHREAD_COND_SIGNAL(&load_data_cond);
-		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
-	}
-	else
-		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
-}
-
-/******************************************************************************
- *                  Initialization / Deinitialization                         *
- *****************************************************************************/
-
-static int init_heat(struct starpu_mpi_lb_conf *itf)
-{
-	int i;
-	int sleep_task_threshold;
-	double wakeup_ratio;
-
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
-
-	/* Immediately return if the starpu_mpi_lb_conf is invalid. */
-	if (!(itf && itf->get_neighbors && itf->get_data_unit_to_migrate))
-	{
-		_STARPU_MSG("Error: struct starpu_mpi_lb_conf %p invalid\n", itf);
-		return 1;
-	}
-
-	_STARPU_MPI_MALLOC(user_itf, sizeof(struct starpu_mpi_lb_conf));
-	memcpy(user_itf, itf, sizeof(struct starpu_mpi_lb_conf));
-
-	/* Get the neighbors of the local MPI node */
-	user_itf->get_neighbors(&neighbor_ids, &nneighbors);
-	if (nneighbors == 0)
-	{
-		_STARPU_MSG("Error: Function get_neighbors returning 0 neighbor\n");
-		free(user_itf);
-		user_itf = NULL;
-		return 2;
-	}
-
-	/* The sleep threshold is deducted from the numbering of tasks by the
-	 * application. For example, with this threshold, the submission thread
-	 * will stop when a task for which the numbering is 2000 or above will be
-	 * submitted to StarPU-MPI. However, much less tasks can be really
-	 * submitted to the local MPI node: the sleeping of the submission threads
-	 * checks the numbering of the tasks, not how many tasks have been
-	 * submitted to the local MPI node, which are two different things. */
-	char *sleep_env = starpu_getenv("LB_HEAT_SLEEP_THRESHOLD");
-	if (sleep_env)
-		sleep_task_threshold = atoi(sleep_env);
-	else
-		sleep_task_threshold = 2000;
-
-	char *wakeup_env = starpu_getenv("LB_HEAT_WAKEUP_RATIO");
-	if (wakeup_env)
-		wakeup_ratio = atof(wakeup_env);
-	else
-		wakeup_ratio = 0.5;
-
-	char *time_env = starpu_getenv("LB_HEAT_TIME_THRESHOLD");
-	if (time_env)
-		time_threshold = atoi(time_env);
-	else
-		time_threshold = 2000;
-
-	STARPU_PTHREAD_MUTEX_INIT(&load_data_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&load_data_cond, NULL);
-
-	/* Allocate, initialize and register all the data handles that will be
-	 * needed for the load balancer, to not reallocate them at each balance
-	 * step. */
-
-	/* Local load data */
-	_STARPU_MPI_CALLOC(load_data_handle, 1, sizeof(starpu_data_handle_t));
-	load_data_data_register(load_data_handle, STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio);
-
-	/* Copy of the local load data to enable parallel update of the load data
-	 * with communications to neighbor nodes */
-	_STARPU_MPI_CALLOC(load_data_handle_cpy, 1, sizeof(starpu_data_handle_t));
-	void *local_interface = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM);
-	struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle);
-	starpu_data_register(load_data_handle_cpy, STARPU_MAIN_RAM, local_interface, itf_load_data);
-	starpu_mpi_data_register(*load_data_handle_cpy, TAG_LOAD(my_rank), my_rank);
-
-	/* Remote load data */
-	_STARPU_MPI_CALLOC(neighbor_load_data_handles, nneighbors, sizeof(starpu_data_handle_t));
-	for (i = 0; i < nneighbors; i++)
-	{
-		load_data_data_register(&neighbor_load_data_handles[i], STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio);
-		starpu_mpi_data_register(neighbor_load_data_handles[i], TAG_LOAD(neighbor_ids[i]), neighbor_ids[i]);
-	}
-
-	/* Data movements handles */
-	_STARPU_MPI_MALLOC(data_movements_handles, world_size*sizeof(starpu_data_handle_t));
-	for (i = 0; i < world_size; i++)
-	{
-		data_movements_data_register(&data_movements_handles[i], STARPU_MAIN_RAM, NULL, NULL, 0);
-		starpu_mpi_data_register(data_movements_handles[i], TAG_MOV(i), i);
-	}
-
-	/* Hash table of moved data that will be brought back on the node at
-	 * termination time */
-	mdh = NULL;
-
-	return 0;
-}
-
-/* Move back all the data that has been migrated out of this node at
- * denitialization time of the load balancer, to ensure the consistency with
- * the ranks of data originally registered by the application. */
-static void move_back_data()
-{
-	int i,j;
-
-	/* Update the new ranks for all concerned data */
-	for (i = 0; i < world_size; i++)
-	{
-		/* In this case, each data_movements_handles contains the handles to move back on the specific node */
-		int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]);
-		if (ndata_to_update)
-		{
-			_STARPU_DEBUG("Move back %d data from table %d on node %d\n", ndata_to_update, i, my_rank);
-
-			for (j = 0; j < ndata_to_update; j++)
-			{
-				starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]);
-				STARPU_ASSERT(handle);
-
-				int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j];
-				STARPU_ASSERT(i == dst_rank);
-
-				if (i == my_rank)
-				{
-					/* The data is moved back, so update the state of the
-					 * moved_data hash table to reflect this change */
-					struct moved_data_entry *md = NULL;
-					HASH_FIND_PTR(mdh, &handle, md);
-					if (md)
-					{
-						HASH_DEL(mdh, md);
-						free(md);
-					}
-				}
-
-				//fprintf(stderr,"Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
-
-				/* Migrate the data handle */
-				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
-
-				//fprintf(stderr,"New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
-				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
-			}
-		}
-	}
-}
-
-static int deinit_heat()
-{
-	int i;
-
-	if ((!user_itf) || (nneighbors == 0))
-		return 1;
-
-	_STARPU_DEBUG("Shutting down heat lb policy\n");
-
-	unsigned int ndata_to_move_back = HASH_COUNT(mdh);
-
-	if (ndata_to_move_back)
-	{
-		_STARPU_DEBUG("Move back %u data on node %d ..\n", ndata_to_move_back, my_rank);
-		data_movements_reallocate_tables(data_movements_handles[my_rank], ndata_to_move_back);
-
-		int *tags = data_movements_get_tags_table(data_movements_handles[my_rank]);
-		int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]);
-
-		int n = 0;
-		struct moved_data_entry *md, *tmp;
-		HASH_ITER(hh, mdh, md, tmp)
-		{
-			tags[n] = starpu_mpi_data_get_tag(md->handle);
-			ranks[n] = my_rank;
-			n++;
-		}
-	}
-	else
-		data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
-
-	exchange_data_movements_infos();
-	move_back_data();
-
-	/* This assert ensures that all nodes have properly gotten back all the
-	 * data that has been moven out of the node. */
-	STARPU_ASSERT(HASH_COUNT(mdh) == 0);
-	free(mdh);
-	mdh = NULL;
-
-	starpu_data_unregister(*load_data_handle);
-	free(load_data_handle);
-	load_data_handle = NULL;
-
-	starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy);
-	starpu_data_unregister(*load_data_handle_cpy);
-	free(load_data_handle_cpy);
-	load_data_handle_cpy = NULL;
-
-	for (i = 0; i < nneighbors; i++)
-	{
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]);
-		starpu_data_unregister(neighbor_load_data_handles[i]);
-	}
-	free(neighbor_load_data_handles);
-	neighbor_load_data_handles = NULL;
-
-	nneighbors = 0;
-	free(neighbor_ids);
-	neighbor_ids = NULL;
-
-	for (i = 0; i < world_size; i++)
-	{
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]);
-		data_movements_reallocate_tables(data_movements_handles[i], 0);
-		starpu_data_unregister(data_movements_handles[i]);
-	}
-	free(data_movements_handles);
-	data_movements_handles = NULL;
-
-	STARPU_PTHREAD_MUTEX_DESTROY(&load_data_mutex);
-	STARPU_PTHREAD_COND_DESTROY(&load_data_cond);
-	free(user_itf);
-	user_itf = NULL;
-
-	return 0;
-}
-
-/******************************************************************************
- *                                  Policy                                    *
- *****************************************************************************/
-
-struct load_balancer_policy load_heat_propagation_policy =
-{
-	.init = init_heat,
-	.deinit = deinit_heat,
-	.submitted_task_entry_point = submitted_task_heat,
-	.finished_task_entry_point = finished_task_heat,
-	.policy_name = "heat"
-};
-
-#endif

+ 0 - 224
nmad/src/mpi/starpu_mpi_comm.c

@@ -1,224 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011-2016  Université de Bordeaux
- * Copyright (C) 2014 INRIA
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-#include <mpi/starpu_mpi_comm.h>
-#include <common/list.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-struct _starpu_mpi_comm
-{
-	MPI_Comm comm;
-	struct _starpu_mpi_envelope *envelope;
-	MPI_Request request;
-	int posted;
-
-#ifdef STARPU_SIMGRID
-	MPI_Status status;
-	starpu_pthread_queue_t queue;
-	unsigned done;
-#endif
-};
-struct _starpu_mpi_comm_hashtable
-{
-	UT_hash_handle hh;
-	MPI_Comm comm;
-};
-
-static starpu_pthread_mutex_t _starpu_mpi_comms_mutex;
-struct _starpu_mpi_comm_hashtable *_starpu_mpi_comms_cache;
-struct _starpu_mpi_comm **_starpu_mpi_comms;
-int _starpu_mpi_comm_nb;
-int _starpu_mpi_comm_allocated;
-int _starpu_mpi_comm_tested;
-
-void _starpu_mpi_comm_init(MPI_Comm comm)
-{
-	_STARPU_MPI_DEBUG(10, "allocating for %d communicators\n", _starpu_mpi_comm_allocated);
-	_starpu_mpi_comm_allocated=10;
-	_STARPU_MPI_CALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated, sizeof(struct _starpu_mpi_comm *));
-	_starpu_mpi_comm_nb=0;
-	_starpu_mpi_comm_tested=0;
-	_starpu_mpi_comms_cache = NULL;
-	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_comms_mutex, NULL);
-
-	_starpu_mpi_comm_register(comm);
-}
-
-void _starpu_mpi_comm_shutdown()
-{
-	int i;
-	for(i=0 ; i<_starpu_mpi_comm_nb ; i++)
-	{
-		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
-		free(_comm->envelope);
-#ifdef STARPU_SIMGRID
-		starpu_pthread_queue_unregister(&wait, &_comm->queue);
-		starpu_pthread_queue_destroy(&_comm->queue);
-#endif
-		free(_comm);
-	}
-	free(_starpu_mpi_comms);
-
-	struct _starpu_mpi_comm_hashtable *entry, *tmp;
-	HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp)
-	{
-		HASH_DEL(_starpu_mpi_comms_cache, entry);
-		free(entry);
-	}
-
-	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_comms_mutex);
-}
-
-void _starpu_mpi_comm_register(MPI_Comm comm)
-{
-	struct _starpu_mpi_comm_hashtable *found;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_comms_mutex);
-	HASH_FIND(hh, _starpu_mpi_comms_cache, &comm, sizeof(MPI_Comm), found);
-	if (found)
-	{
-		_STARPU_MPI_DEBUG(10, "comm %ld (%ld) already registered\n", (long int)comm, (long int)MPI_COMM_WORLD);
-	}
-	else
-	{
-		if (_starpu_mpi_comm_nb == _starpu_mpi_comm_allocated)
-		{
-			_starpu_mpi_comm_allocated *= 2;
-			_STARPU_MPI_DEBUG(10, "reallocating for %d communicators\n", _starpu_mpi_comm_allocated);
-			_STARPU_MPI_REALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated * sizeof(struct _starpu_mpi_comm *));
-		}
-		_STARPU_MPI_DEBUG(10, "registering comm %ld (%ld) number %d\n", (long int)comm, (long int)MPI_COMM_WORLD, _starpu_mpi_comm_nb);
-		struct _starpu_mpi_comm *_comm;
-		_STARPU_MPI_CALLOC(_comm, 1, sizeof(struct _starpu_mpi_comm));
-		_comm->comm = comm;
-		_STARPU_MPI_CALLOC(_comm->envelope, 1,sizeof(struct _starpu_mpi_envelope));
-		_comm->posted = 0;
-		_starpu_mpi_comms[_starpu_mpi_comm_nb] = _comm;
-		_starpu_mpi_comm_nb++;
-		struct _starpu_mpi_comm_hashtable *entry;
-		_STARPU_MPI_MALLOC(entry, sizeof(*entry));
-		entry->comm = comm;
-		HASH_ADD(hh, _starpu_mpi_comms_cache, comm, sizeof(entry->comm), entry);
-
-#ifdef STARPU_SIMGRID
-		starpu_pthread_queue_init(&_comm->queue);
-		starpu_pthread_queue_register(&wait, &_comm->queue);
-		_comm->done = 0;
-#endif
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-}
-
-void _starpu_mpi_comm_post_recv()
-{
-	int i;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_comms_mutex);
-	for(i=0 ; i<_starpu_mpi_comm_nb ; i++)
-	{
-		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
-		if (_comm->posted == 0)
-		{
-			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop on comm %d %ld\n", i, (long int)_comm->comm);
-			_STARPU_MPI_COMM_FROM_DEBUG(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm);
-			MPI_Irecv(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm, &_comm->request);
-#ifdef STARPU_SIMGRID
-			_starpu_mpi_simgrid_wait_req(&_comm->request, &_comm->status, &_comm->queue, &_comm->done);
-#endif
-			_comm->posted = 1;
-		}
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-}
-
-int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm)
-{
-	int i=_starpu_mpi_comm_tested;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_comms_mutex);
-	while (1)
-	{
-		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
-
-		if (_comm->posted)
-		{
-			int flag, res;
-			/* test whether an envelope has arrived. */
-#ifdef STARPU_SIMGRID
-			res = _starpu_mpi_simgrid_mpi_test(&_comm->done, &flag);
-			memcpy(status, &_comm->status, sizeof(*status));
-#else
-			res = MPI_Test(&_comm->request, &flag, status);
-#endif
-			STARPU_ASSERT(res == MPI_SUCCESS);
-			if (flag)
-			{
-				_comm->posted = 0;
-				_starpu_mpi_comm_tested++;
-				if (_starpu_mpi_comm_tested == _starpu_mpi_comm_nb)
-					_starpu_mpi_comm_tested = 0;
-				*envelope = _comm->envelope;
-				*comm = _comm->comm;
-				STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-				return 1;
-			}
-		}
-		i++;
-		if (i == _starpu_mpi_comm_nb)
-		{
-			i=0;
-		}
-		if (i == _starpu_mpi_comm_tested)
-		{
-			// We have tested all the requests, none has completed
-			STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-			return 0;
-		}
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-	return 0;
-}
-
-void _starpu_mpi_comm_cancel_recv()
-{
-	int i;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_comms_mutex);
-	for(i=0 ; i<_starpu_mpi_comm_nb ; i++)
-	{
-		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
-		if (_comm->posted == 1)
-		{
-			MPI_Cancel(&_comm->request);
-#ifndef STARPU_SIMGRID
-			{
-				MPI_Status status;
-				MPI_Wait(&_comm->request, &status);
-			}
-#endif
-			_comm->posted = 0;
-		}
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
-}
-
-#endif /* STARPU_USE_MPI_MPI */

+ 0 - 43
nmad/src/mpi/starpu_mpi_comm.h

@@ -1,43 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_COMM_H__
-#define __STARPU_MPI_COMM_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_comm_init(MPI_Comm comm);
-void _starpu_mpi_comm_shutdown();
-void _starpu_mpi_comm_register(MPI_Comm comm);
-void _starpu_mpi_comm_post_recv();
-int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm);
-void _starpu_mpi_comm_cancel_recv();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // STARPU_USE_MPI_MPI
-#endif // __STARPU_MPI_COMM_H__

+ 0 - 124
nmad/src/mpi/starpu_mpi_early_data.c

@@ -1,124 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2014, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <mpi/starpu_mpi_early_data.h>
-#include <starpu_mpi_private.h>
-#include <common/uthash.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-struct _starpu_mpi_early_data_handle_hashlist
-{
-	struct _starpu_mpi_early_data_handle_list list;
-	UT_hash_handle hh;
-	struct _starpu_mpi_node_tag node_tag;
-};
-
-/** stores data which have been received by MPI but have not been requested by the application */
-static starpu_pthread_mutex_t _starpu_mpi_early_data_handle_mutex;
-static struct _starpu_mpi_early_data_handle_hashlist *_starpu_mpi_early_data_handle_hashmap = NULL;
-static int _starpu_mpi_early_data_handle_hashmap_count = 0;
-
-void _starpu_mpi_early_data_init(void)
-{
-	_starpu_mpi_early_data_handle_hashmap = NULL;
-	_starpu_mpi_early_data_handle_hashmap_count = 0;
-	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_data_handle_mutex, NULL);
-}
-
-void _starpu_mpi_early_data_check_termination(void)
-{
-	STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not zero (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count);
-}
-
-void _starpu_mpi_early_data_shutdown(void)
-{
-	struct _starpu_mpi_early_data_handle_hashlist *current, *tmp;
-	HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp)
-	{
-		STARPU_ASSERT(_starpu_mpi_early_data_handle_list_empty(&current->list));
-		HASH_DEL(_starpu_mpi_early_data_handle_hashmap, current);
-		free(current);
-	}
-	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_data_handle_mutex);
-}
-
-struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm)
-{
-	struct _starpu_mpi_early_data_handle* early_data_handle;
-	_STARPU_MPI_CALLOC(early_data_handle, 1, sizeof(struct _starpu_mpi_early_data_handle));
-	STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL);
-	early_data_handle->env = envelope;
-	early_data_handle->node_tag.comm = comm;
-	early_data_handle->node_tag.rank = source;
-	early_data_handle->node_tag.data_tag = envelope->data_tag;
-	return early_data_handle;
-}
-
-struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag)
-{
-	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
-	struct _starpu_mpi_early_data_handle *early_data_handle;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %d\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
-	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-	if (hashlist == NULL)
-	{
-		early_data_handle = NULL;
-	}
-	else
-	{
-		if (_starpu_mpi_early_data_handle_list_empty(&hashlist->list))
-		{
-			early_data_handle = NULL;
-		}
-		else
-		{
-			_starpu_mpi_early_data_handle_hashmap_count --;
-			early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&hashlist->list);
-		}
-	}
-	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %d\n", early_data_handle, (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
-	return early_data_handle;
-}
-
-void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle)
-{
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Trying to add early_data_handle %p with comm %ld source %d tag %d\n", early_data_handle, (long int)early_data_handle->node_tag.comm,
-			  early_data_handle->node_tag.rank, early_data_handle->node_tag.data_tag);
-
-	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
-	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &early_data_handle->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-	if (hashlist == NULL)
-	{
-		_STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_data_handle_hashlist));
-		_starpu_mpi_early_data_handle_list_init(&hashlist->list);
-		hashlist->node_tag = early_data_handle->node_tag;
-		HASH_ADD(hh, _starpu_mpi_early_data_handle_hashmap, node_tag, sizeof(hashlist->node_tag), hashlist);
-	}
-	_starpu_mpi_early_data_handle_list_push_back(&hashlist->list, early_data_handle);
-	_starpu_mpi_early_data_handle_hashmap_count ++;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
-}
-
-#endif // STARPU_USE_MPI_MPI

+ 0 - 59
nmad/src/mpi/starpu_mpi_early_data.h

@@ -1,59 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2014, 2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_EARLY_DATA_H__
-#define __STARPU_MPI_EARLY_DATA_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-#include <common/config.h>
-#include <common/list.h>
-#include <starpu_mpi_private.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-LIST_TYPE(_starpu_mpi_early_data_handle,
-	  starpu_data_handle_t handle;
-	  struct _starpu_mpi_envelope *env;
-	  struct _starpu_mpi_req *req;
-	  void *buffer;
-	  int req_ready;
-	  struct _starpu_mpi_node_tag node_tag;
-	  starpu_pthread_mutex_t req_mutex;
-	  starpu_pthread_cond_t req_cond;
-);
-
-void _starpu_mpi_early_data_init(void);
-void _starpu_mpi_early_data_check_termination(void);
-void _starpu_mpi_early_data_shutdown(void);
-
-struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC;
-struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag);
-void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*  STARPU_USE_MPI_MPI */
-#endif /* __STARPU_MPI_EARLY_DATA_H__ */

+ 0 - 121
nmad/src/mpi/starpu_mpi_early_request.c

@@ -1,121 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2014, 2016-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-#include <mpi/starpu_mpi_early_request.h>
-#include <common/uthash.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-/** stores application requests for which data have not been received yet */
-struct _starpu_mpi_early_request_hashlist
-{
-	struct _starpu_mpi_req_list list;
-	UT_hash_handle hh;
-	struct _starpu_mpi_node_tag node_tag;
-};
-
-static starpu_pthread_mutex_t _starpu_mpi_early_request_mutex;
-struct _starpu_mpi_early_request_hashlist *_starpu_mpi_early_request_hash;
-int _starpu_mpi_early_request_hash_count;
-
-void _starpu_mpi_early_request_init()
-{
-	_starpu_mpi_early_request_hash = NULL;
-	_starpu_mpi_early_request_hash_count = 0;
-	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_request_mutex, NULL);
-}
-
-void _starpu_mpi_early_request_shutdown()
-{
-	struct _starpu_mpi_early_request_hashlist *entry, *tmp;
-	HASH_ITER(hh, _starpu_mpi_early_request_hash, entry, tmp)
-	{
-		STARPU_ASSERT(_starpu_mpi_req_list_empty(&entry->list));
-		HASH_DEL(_starpu_mpi_early_request_hash, entry);
-		free(entry);
-	}
-	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_request_mutex);
-}
-
-int _starpu_mpi_early_request_count()
-{
-	return _starpu_mpi_early_request_hash_count;
-}
-
-void _starpu_mpi_early_request_check_termination()
-{
-	STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of early requests left is not zero");
-}
-
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int source, MPI_Comm comm)
-{
-	struct _starpu_mpi_node_tag node_tag;
-	struct _starpu_mpi_req *found;
-	struct _starpu_mpi_early_request_hashlist *hashlist;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
-	memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
-	node_tag.comm = comm;
-	node_tag.rank = source;
-	node_tag.data_tag = data_tag;
-
-	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %d\n", (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
-	HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-	if (hashlist == NULL)
-	{
-		found = NULL;
-	}
-	else
-	{
-		if (_starpu_mpi_req_list_empty(&hashlist->list))
-		{
-			found = NULL;
-		}
-		else
-		{
-			found = _starpu_mpi_req_list_pop_front(&hashlist->list);
-			_starpu_mpi_early_request_hash_count --;
-		}
-	}
-	_STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %d\n", found, (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
-	return found;
-}
-
-void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req)
-{
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
-	_STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %d in the application request hashmap\n", req, (long int)req->node_tag.comm, req->node_tag.rank, req->node_tag.data_tag);
-
-	struct _starpu_mpi_early_request_hashlist *hashlist;
-	HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-	if (hashlist == NULL)
-	{
-		_STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_request_hashlist));
-		_starpu_mpi_req_list_init(&hashlist->list);
-		hashlist->node_tag = req->node_tag;
-		HASH_ADD(hh, _starpu_mpi_early_request_hash, node_tag, sizeof(hashlist->node_tag), hashlist);
-	}
-	_starpu_mpi_req_list_push_back(&hashlist->list, req);
-	_starpu_mpi_early_request_hash_count ++;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
-}
-
-#endif // STARPU_USE_MPI_MPI

+ 0 - 47
nmad/src/mpi/starpu_mpi_early_request.h

@@ -1,47 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_EARLY_REQUEST_H__
-#define __STARPU_MPI_EARLY_REQUEST_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-#include <common/config.h>
-#include <common/list.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_early_request_init(void);
-void _starpu_mpi_early_request_shutdown(void);
-int _starpu_mpi_early_request_count(void);
-void _starpu_mpi_early_request_check_termination(void);
-
-void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int source, MPI_Comm comm);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* STARPU_USE_MPI_MPI */
-#endif /* __STARPU_MPI_EARLY_REQUEST_H__ */

文件差異過大導致無法顯示
+ 0 - 1640
nmad/src/mpi/starpu_mpi_mpi.c


+ 0 - 153
nmad/src/mpi/starpu_mpi_sync_data.c

@@ -1,153 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <mpi/starpu_mpi_sync_data.h>
-#include <starpu_mpi_private.h>
-#include <common/uthash.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-struct _starpu_mpi_sync_data_handle_hashlist
-{
-	struct _starpu_mpi_req_list list;
-	UT_hash_handle hh;
-	struct _starpu_mpi_node_tag node_tag;
-};
-
-/** stores data which have been received by MPI but have not been requested by the application */
-static starpu_pthread_mutex_t _starpu_mpi_sync_data_handle_mutex;
-static struct _starpu_mpi_sync_data_handle_hashlist *_starpu_mpi_sync_data_handle_hashmap = NULL;
-static int _starpu_mpi_sync_data_handle_hashmap_count = 0;
-
-void _starpu_mpi_sync_data_init(void)
-{
-	_starpu_mpi_sync_data_handle_hashmap = NULL;
-	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_sync_data_handle_mutex, NULL);
-	_starpu_mpi_sync_data_handle_hashmap_count = 0;
-}
-
-void _starpu_mpi_sync_data_shutdown(void)
-{
-	struct _starpu_mpi_sync_data_handle_hashlist *current, *tmp;
-	HASH_ITER(hh, _starpu_mpi_sync_data_handle_hashmap, current, tmp)
-	{
-		STARPU_ASSERT(_starpu_mpi_req_list_empty(&current->list));
-		HASH_DEL(_starpu_mpi_sync_data_handle_hashmap, current);
-		free(current);
-	}
-	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_sync_data_handle_mutex);
-}
-
-#ifdef STARPU_VERBOSE
-static
-void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node_tag)
-{
-	struct _starpu_mpi_sync_data_handle_hashlist *hashlist;
-	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-
-	if (hashlist == NULL)
-	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %d does not exist\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
-	}
-	else if (_starpu_mpi_req_list_empty(&hashlist->list))
-	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %d is empty\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
-	}
-	else
-	{
-		struct _starpu_mpi_req *cur;
-		for (cur = _starpu_mpi_req_list_begin(&hashlist->list) ;
-		     cur != _starpu_mpi_req_list_end(&hashlist->list);
-		     cur = _starpu_mpi_req_list_next(cur))
-		{
-			_STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %d: %p\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag, cur);
-		}
-	}
-}
-#endif
-
-void _starpu_mpi_sync_data_check_termination(void)
-{
-	STARPU_ASSERT_MSG(_starpu_mpi_sync_data_handle_hashmap_count == 0, "Number of sync received messages left is not zero, did you forget to post a receive corresponding to a send?");
-}
-
-int _starpu_mpi_sync_data_count(void)
-{
-	return _starpu_mpi_sync_data_handle_hashmap_count;
-}
-
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI_Comm comm)
-{
-	struct _starpu_mpi_req *req;
-	struct _starpu_mpi_node_tag node_tag;
-	struct _starpu_mpi_sync_data_handle_hashlist *found;
-
-	memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
-	node_tag.comm = comm;
-	node_tag.rank = source;
-	node_tag.data_tag = data_tag;
-
-	_STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %d in the hashmap\n", (long int)comm, source, data_tag);
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex);
-	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &node_tag, sizeof(struct _starpu_mpi_node_tag), found);
-	if (found == NULL)
-	{
-		req = NULL;
-	}
-	else
-	{
-		if (_starpu_mpi_req_list_empty(&found->list))
-		{
-			req = NULL;
-		}
-		else
-		{
-			req = _starpu_mpi_req_list_pop_front(&found->list);
-			_starpu_mpi_sync_data_handle_hashmap_count --;
-		}
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Found sync_data_handle %p with comm %ld source %d tag %d in the hashmap\n", req, (long int)comm, source, data_tag);
-	return req;
-}
-
-void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *sync_req)
-{
-	struct _starpu_mpi_sync_data_handle_hashlist *hashlist;
-
-	_STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %d in the hashmap\n", sync_req, (long int)sync_req->node_tag.comm, sync_req->node_tag.rank, sync_req->node_tag.data_tag);
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex);
-	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &sync_req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
-	if (hashlist == NULL)
-	{
-		_STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_sync_data_handle_hashlist));
-		_starpu_mpi_req_list_init(&hashlist->list);
-		hashlist->node_tag = sync_req->node_tag;
-		HASH_ADD(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(hashlist->node_tag), hashlist);
-	}
-	_starpu_mpi_req_list_push_back(&hashlist->list, sync_req);
-	_starpu_mpi_sync_data_handle_hashmap_count ++;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex);
-#ifdef STARPU_VERBOSE
-	_starpu_mpi_sync_data_handle_display_hash(&sync_req->node_tag);
-#endif
-}
-
-#endif // STARPU_USE_MPI_MPI

+ 0 - 46
nmad/src/mpi/starpu_mpi_sync_data.h

@@ -1,46 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_SYNC_DATA_H__
-#define __STARPU_MPI_SYNC_DATA_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-#include <common/config.h>
-#include <common/list.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_sync_data_init(void);
-void _starpu_mpi_sync_data_check_termination(void);
-void _starpu_mpi_sync_data_shutdown(void);
-
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI_Comm comm);
-void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *req);
-int _starpu_mpi_sync_data_count();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* STARPU_USE_MPI_MPI */
-#endif /* __STARPU_MPI_SYNC_DATA_H__ */

+ 0 - 122
nmad/src/mpi/starpu_mpi_tag.c

@@ -1,122 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011-2015, 2017  Université de Bordeaux
- * Copyright (C) 2014 INRIA
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-#include <common/uthash.h>
-#include <common/starpu_spinlock.h>
-#include <datawizard/coherency.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-/* Entry in the `registered_tag_handles' hash table.  */
-struct handle_tag_entry
-{
-	UT_hash_handle hh;
-	int tag;
-	starpu_data_handle_t handle;
-};
-
-/* Hash table mapping host tags to data handles.  */
-static struct handle_tag_entry *registered_tag_handles;
-static struct _starpu_spinlock    registered_tag_handles_lock;
-
-void _starpu_mpi_tag_init(void)
-{
-	_starpu_spin_init(&registered_tag_handles_lock);
-}
-
-void _starpu_mpi_tag_shutdown(void)
-{
-     	struct handle_tag_entry *tag_entry, *tag_tmp;
-
-	_starpu_spin_destroy(&registered_tag_handles_lock);
-
-	HASH_ITER(hh, registered_tag_handles, tag_entry, tag_tmp)
-	{
-		HASH_DEL(registered_tag_handles, tag_entry);
-		free(tag_entry);
-	}
-
-	registered_tag_handles = NULL;
-}
-
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int tag)
-{
-	struct handle_tag_entry *ret;
-
-	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_FIND_INT(registered_tag_handles, &tag, ret);
-	_starpu_spin_unlock(&registered_tag_handles_lock);
-
-	if (ret)
-	{
-		return ret->handle;
-	}
-	else
-	{
-		return NULL;
-	}
-}
-
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int tag)
-{
-	struct handle_tag_entry *entry;
-	if (tag == -1)
-		/* No tag for this data, probably a temporary data not to be communicated */
-		return;
-	_STARPU_MPI_MALLOC(entry, sizeof(*entry));
-
-	STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(tag)),
-			  "There is already a data handle %p registered with the tag %d\n", _starpu_mpi_tag_get_data_handle_from_tag(tag), tag);
-
-	_STARPU_MPI_DEBUG(42, "Adding handle %p with tag %d in hashtable\n", handle, tag);
-
-	entry->handle = handle;
-	entry->tag = tag;
-
-	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_ADD_INT(registered_tag_handles, tag, entry);
-	_starpu_spin_unlock(&registered_tag_handles_lock);
-}
-
-int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
-{
-	int tag = starpu_mpi_data_get_tag(handle);
-
-	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %d from hashtable\n", handle, tag);
-
-	if (tag != -1)
-	{
-		struct handle_tag_entry *tag_entry;
-
-		_starpu_spin_lock(&registered_tag_handles_lock);
-		HASH_FIND_INT(registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), tag_entry);
-		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %d isn't in the hashmap !",handle,tag);
-
-		HASH_DEL(registered_tag_handles, tag_entry);
-
-		_starpu_spin_unlock(&registered_tag_handles_lock);
-
-		free(tag_entry);
-	}
-	return 0;
-}
-
-#endif // STARPU_USE_MPI_MPI

+ 0 - 43
nmad/src/mpi/starpu_mpi_tag.h

@@ -1,43 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_TAG_H__
-#define __STARPU_MPI_TAG_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-
-#ifdef STARPU_USE_MPI_MPI
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_tag_init(void);
-void _starpu_mpi_tag_shutdown(void);
-
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int tag);
-int _starpu_mpi_tag_data_release(starpu_data_handle_t handle);
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int tag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // STARPU_USE_MPI_MPI
-#endif // __STARPU_MPI_TAG_H__

+ 0 - 767
nmad/src/nmad/starpu_mpi_nmad.c

@@ -1,767 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2014, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <limits.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_datatype.h>
-#include <starpu_mpi_private.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_profiling.h>
-#include <starpu_mpi_stats.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_mpi_select_node.h>
-#include <starpu_mpi_init.h>
-#include <common/config.h>
-#include <common/thread.h>
-#include <datawizard/coherency.h>
-#include <core/task.h>
-#include <core/topology.h>
-
-#ifdef STARPU_USE_MPI_NMAD
-
-#include <nm_sendrecv_interface.h>
-#include <nm_mpi_nmad.h>
-
-static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
-#ifdef STARPU_VERBOSE
-static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
-#endif
-static void _starpu_mpi_handle_new_request(void *arg);
-
-static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req);
-static void _starpu_mpi_add_sync_point_in_fxt(void);
-
-static int mpi_thread_cpuid = -1;
-int _starpu_mpi_fake_world_size = -1;
-int _starpu_mpi_fake_world_rank = -1;
-
-/* Condition to wake up waiting for all current MPI requests to finish */
-static starpu_pthread_t progress_thread;
-static starpu_pthread_cond_t progress_cond;
-static starpu_pthread_mutex_t progress_mutex;
-static volatile int running = 0;
-
-extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count);
-
-/* Count requests posted by the application and not yet submitted to MPI, i.e pushed into the new_requests list */
-
-static volatile int pending_request = 0;
-
-#define REQ_FINALIZED 0x1
-
-PUK_LFSTACK_TYPE(callback,	struct _starpu_mpi_req *req;);
-static callback_lfstack_t callback_stack = NULL;
-
-static starpu_sem_t callback_sem;
-
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
-{
-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
-
-	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
-	(*req)->completed = 0;
-
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
-	(*req)->count = -1;
-	(*req)->registered_datatype = -1;
-
-	(*req)->node_tag.rank = -1;
-	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
-
-	(*req)->func = NULL;
-
-	(*req)->status = NULL;
-	//	(*req)->data_request = 0;
-	(*req)->flag = NULL;
-
-	(*req)->ret = -1;
-	piom_cond_init(&((*req)->req_cond), 0);
-	//STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
-	//	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
-
-	(*req)->request_type = UNKNOWN_REQ;
-
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
-
-	//(*req)->other_request = NULL;
-
-	(*req)->sync = 0;
-	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
-
-	//	(*req)->size_req = 0;
-	//(*req)->internal_req = NULL;
-	//(*req)->is_internal_req = 0;
-	//(*req)->to_destroy = 1;
-	//(*req)->early_data_handle = NULL;
-	//(*req)->envelope = NULL;
-	(*req)->sequential_consistency = 1;
-	(*req)->pre_sync_jobid = -1;
-	(*req)->post_sync_jobid = -1;
-
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_init(&((*req)->queue));
-	starpu_pthread_queue_register(&wait, &((*req)->queue));
-	(*req)->done = 0;
-#endif
-}
-
-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
-{
-	piom_cond_destroy(&(req->req_cond));
-	free(req);
-}
-
-/********************************************************/
-/*                                                      */
-/*  Send/Receive functionalities                        */
-/*                                                      */
-/********************************************************/
-
-static void nop_acquire_cb(void *arg)
-{
-	starpu_data_release(arg);
-}
-
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, int data_tag, MPI_Comm comm,
-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
-						       int sequential_consistency,
-						       int is_internal_req,
-						       starpu_ssize_t count)
-{
-
-	struct _starpu_mpi_req *req;
-
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		/* Don't actually do the communication */
-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
-		return NULL;
-	}
-
-	_STARPU_MPI_LOG_IN();
-	STARPU_ATOMIC_ADD( &pending_request, 1);
-
-	/* Initialize the request structure */
-	_starpu_mpi_request_init(&req);
-	req->request_type = request_type;
-	/* prio_list is sorted by increasing values */
-	req->prio = prio;
-	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
-	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
-	req->detached = detached;
-	req->sync = sync;
-	req->callback = callback;
-	req->callback_arg = arg;
-	req->func = func;
-	req->sequential_consistency = sequential_consistency;
-	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
-	 * the request is actually submitted */
-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_handle_new_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
-
-	_STARPU_MPI_LOG_OUT();
-	return req;
-}
-
-/********************************************************/
-/*                                                      */
-/*  Send functionalities                                */
-/*                                                      */
-/********************************************************/
-
-static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_STARPU_MPI_DEBUG(30, "post MPI isend request %p type %s tag %d src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
-
-	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
-
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag, 0);
-
-	struct nm_data_s data;
-	nm_mpi_nmad_data(&data, (void*)req->ptr, req->datatype, req->count);
-	nm_sr_send_init(req->session, &(req->data_request));
-	nm_sr_send_pack_data(req->session, &(req->data_request), &data);
-	nm_sr_send_set_priority(req->session, &req->data_request, req->prio);
-
-	if (req->sync == 0)
-	{
-		req->ret = nm_sr_send_isend(req->session, &(req->data_request), req->gate, req->node_tag.data_tag);
-		STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Isend returning %d", req->ret);
-	}
-	else
-	{
-		req->ret = nm_sr_send_issend(req->session, &(req->data_request), req->gate, req->node_tag.data_tag);
-		STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Issend returning %d", req->ret);
-	}
-
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
-
-	_starpu_mpi_handle_pending_request(req);
-
-	_STARPU_MPI_LOG_OUT();
-}
-
-void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
-{
-	_starpu_mpi_datatype_allocate(req->data_handle, req);
-
-	if (req->registered_datatype == 1)
-	{
-		req->waited = 1;
-		req->count = 1;
-		req->ptr = starpu_data_get_local_ptr(req->data_handle);
-	}
-	else
-	{
-		starpu_ssize_t psize = -1;
-		int ret;
-		req->waited =2;
-
-		// Do not pack the data, just try to find out the size
-		starpu_data_pack(req->data_handle, NULL, &psize);
-
-		if (psize != -1)
-		{
-			// We already know the size of the data, let's send it to overlap with the packing of the data
-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", psize, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
-			req->count = psize;
-			//ret = nm_sr_isend(nm_mpi_communicator_get_session(p_req->p_comm),nm_mpi_communicator_get_gate(p_comm,req->srcdst), req->mpi_tag,&req->count, sizeof(req->count), &req->size_req);
-			ret = nm_sr_isend(req->session,req->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->size_req);
-
-			//	ret = MPI_Isend(&req->count, sizeof(req->count), MPI_BYTE, req->srcdst, req->mpi_tag, req->comm, &req->size_req);
-			STARPU_ASSERT_MSG(ret == NM_ESUCCESS, "when sending size, nm_sr_isend returning %d", ret);
-		}
-
-		// Pack the data
-		starpu_data_pack(req->data_handle, &req->ptr, &req->count);
-		if (psize == -1)
-		{
-			// We know the size now, let's send it
-			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %d to node %d (second call to pack)\n", req->count, sizeof(req->count), "MPI_BYTE", req->node_tag.data_tag, req->node_tag.rank);
-			ret = nm_sr_isend(req->session,req->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->size_req);
-			STARPU_ASSERT_MSG(ret == NM_ESUCCESS, "when sending size, nm_sr_isend returning %d", ret);
-		}
-		else
-		{
-			// We check the size returned with the 2 calls to pack is the same
-			STARPU_ASSERT_MSG(req->count == psize, "Calls to pack_data returned different sizes %ld != %ld", req->count, psize);
-		}
-
-		// We can send the data now
-	}
-	_starpu_mpi_isend_data_func(req);
-}
-
-/********************************************************/
-/*                                                      */
-/*  Receive functionalities                             */
-/*                                                      */
-/********************************************************/
-
-static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_STARPU_MPI_DEBUG(20, "post MPI irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
-
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
-
-	//req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
-	struct nm_data_s data;
-	nm_mpi_nmad_data(&data, (void*)req->ptr, req->datatype, req->count);
-	nm_sr_recv_init(req->session, &(req->data_request));
-	nm_sr_recv_unpack_data(req->session, &(req->data_request), &data);
-	nm_sr_recv_irecv(req->session, &(req->data_request), req->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL);
-
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag);
-
-	_starpu_mpi_handle_pending_request(req);
-
-	_STARPU_MPI_LOG_OUT();
-}
-
-struct _starpu_mpi_irecv_size_callback
-{
-	starpu_data_handle_t handle;
-	struct _starpu_mpi_req *req;
-};
-
-static void _starpu_mpi_irecv_size_callback(void *arg)
-{
-	struct _starpu_mpi_irecv_size_callback *callback = (struct _starpu_mpi_irecv_size_callback *)arg;
-
-	starpu_data_unregister(callback->handle);
-	callback->req->ptr = malloc(callback->req->count);
-	STARPU_ASSERT_MSG(callback->req->ptr, "cannot allocate message of size %ld", callback->req->count);
-	_starpu_mpi_irecv_data_func(callback->req);
-	free(callback);
-}
-
-void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_starpu_mpi_datatype_allocate(req->data_handle, req);
-	if (req->registered_datatype == 1)
-	{
-		req->count = 1;
-		req->ptr = starpu_data_get_local_ptr(req->data_handle);
-		_starpu_mpi_irecv_data_func(req);
-	}
-	else
-	{
-		struct _starpu_mpi_irecv_size_callback *callback = malloc(sizeof(struct _starpu_mpi_irecv_size_callback));
-		callback->req = req;
-		starpu_variable_data_register(&callback->handle, 0, (uintptr_t)&(callback->req->count), sizeof(callback->req->count));
-		_STARPU_MPI_DEBUG(4, "Receiving size with tag %d from node %d\n", req->node_tag.data_tag, req->node_tag.rank);
-		_starpu_mpi_irecv_common(callback->handle, req->node_tag.rank, req->node_tag.data_tag, req->node_tag.comm, 1, 0, _starpu_mpi_irecv_size_callback, callback,1,0,0);
-	}
-
-}
-
-/********************************************************/
-/*                                                      */
-/*  Wait functionalities                                */
-/*                                                      */
-/********************************************************/
-
-#define _starpu_mpi_req_status(PUBLIC_REQ,STATUS) do {			\
-	STATUS->MPI_SOURCE=PUBLIC_REQ->node_tag.rank; /**< field name mandatory by spec */ \
-	STATUS->MPI_TAG=PUBLIC_REQ->node_tag.data_tag;    /**< field name mandatory by spec */ \
-	STATUS->MPI_ERROR=PUBLIC_REQ->ret;  /**< field name mandatory by spec */ \
-	STATUS->size=PUBLIC_REQ->count;       /**< size of data received */ \
-	STATUS->cancelled=0;  /**< whether request was cancelled */	\
-} while(0)
-
-int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
-{
-	_STARPU_MPI_LOG_IN();
-	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_wait needs a valid starpu_mpi_req");
-	struct _starpu_mpi_req *req = *public_req;
-	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Wait cannot be called on a detached request");
-
-	/* we must do a test_locked to avoid race condition :
-	 * without req_cond could still be used and couldn't be freed)*/
-	while (!req->completed || ! piom_cond_test_locked(&(req->req_cond),REQ_FINALIZED))
-	{
-		piom_cond_wait(&(req->req_cond),REQ_FINALIZED);
-	}
-
-	if (status!=MPI_STATUS_IGNORE)
-		_starpu_mpi_req_status(req,status);
-
-	_starpu_mpi_request_destroy(req);
-	*public_req = NULL;
-	_STARPU_MPI_LOG_OUT();
-	return MPI_SUCCESS;
-}
-
-/********************************************************/
-/*                                                      */
-/*  Test functionalities                                */
-/*                                                      */
-/********************************************************/
-
-int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
-{
-	_STARPU_MPI_LOG_IN();
-	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req");
-	struct _starpu_mpi_req *req = *public_req;
-	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
-	_STARPU_MPI_DEBUG(2, "Test request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
-
-	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
-
-	/* we must do a test_locked to avoid race condition :
-	 * without req_cond could still be used and couldn't be freed)*/
-	*flag = req->completed && piom_cond_test_locked(&(req->req_cond),REQ_FINALIZED);
-	if (*flag && status!=MPI_STATUS_IGNORE)
-		_starpu_mpi_req_status(req,status);
-
-	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.rank, req->node_tag.data_tag);
-
-	if(*flag)
-	{
-		_starpu_mpi_request_destroy(req);
-		*public_req = NULL;
-	}
-	_STARPU_MPI_LOG_OUT();
-	return MPI_SUCCESS;
-}
-
-/********************************************************/
-/*                                                      */
-/*  Barrier functionalities                             */
-/*                                                      */
-/********************************************************/
-
-int _starpu_mpi_barrier(MPI_Comm comm)
-{
-	_STARPU_MPI_LOG_IN();
-	int ret;
-	//	STARPU_ASSERT_MSG(!barrier_running, "Concurrent starpu_mpi_barrier is not implemented, even on different communicators");
-	ret = MPI_Barrier(comm);
-
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %d", ret);
-
-	_STARPU_MPI_LOG_OUT();
-	return ret;
-}
-
-/********************************************************/
-/*                                                      */
-/*  Progression                                         */
-/*                                                      */
-/********************************************************/
-
-#ifdef STARPU_VERBOSE
-static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type)
-{
-	switch (request_type)
-	{
-		case SEND_REQ: return "SEND_REQ";
-		case RECV_REQ: return "RECV_REQ";
-		case WAIT_REQ: return "WAIT_REQ";
-		case TEST_REQ: return "TEST_REQ";
-		case BARRIER_REQ: return "BARRIER_REQ";
-		default: return "unknown request type";
-	}
-}
-#endif
-
-static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
-
-	if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
-	{
-		if (req->registered_datatype == 0)
-		{
-			if (req->request_type == SEND_REQ)
-			{
-				req->waited--;
-				// We need to make sure the communication for sending the size
-				// has completed, as MPI can re-order messages, let's count
-				// recerived message.
-				// FIXME concurent access.
-				STARPU_ASSERT_MSG(event == NM_SR_EVENT_FINALIZED, "Callback with event %d", event);
-				if(req->waited>0)
-					return;
-
-			}
-			if (req->request_type == RECV_REQ)
-				// req->ptr is freed by starpu_data_unpack
-				starpu_data_unpack(req->data_handle, req->ptr, req->count);
-			else
-				free(req->ptr);
-		}
-		else
-		{
-			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
-		}
-		starpu_data_release(req->data_handle);
-	}
-
-	/* Execute the specified callback, if any */
-	if (req->callback)
-	{
-		struct callback_lfstack_cell_s* c = padico_malloc(sizeof(struct callback_lfstack_cell_s));
-		c->req = req;
-		/* The main thread can exit without waiting
-		* the end of the detached request. Callback thread
-		* must then be kept alive if they have a callback.*/
-
-		callback_lfstack_push(&callback_stack, c);
-		starpu_sem_post(&callback_sem);
-	}
-	else
-	{
-		if(req->detached)
-		{
-			_starpu_mpi_request_destroy(req);
-			// a detached request wont be wait/test (and freed inside).
-		}
-		else
-		{
-			/* tell anyone potentially waiting on the request that it is
-			 * terminated now (should be done after the callback)*/
-			req->completed = 1;
-			piom_cond_signal(&req->req_cond, REQ_FINALIZED);
-		}
-		int pending_remaining = STARPU_ATOMIC_ADD(&pending_request, -1);
-		if (!running && !pending_remaining)
-			starpu_sem_post(&callback_sem);
-	}
-	_STARPU_MPI_LOG_OUT();
-}
-
-void _starpu_mpi_handle_request_termination_callback(nm_sr_event_t event, const nm_sr_event_info_t*event_info, void*ref)
-{
-	_starpu_mpi_handle_request_termination(ref,event);
-}
-
-static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req)
-{
-	if(req->request_type == SEND_REQ && req->waited>1)
-	{
-		nm_sr_request_set_ref(&(req->size_req), req);
-		nm_sr_request_monitor(req->session, &(req->size_req), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
-	}
-	/* the if must be before, because the first callback can directly free
-	* a detached request (the second callback free if req->waited>1). */
-	nm_sr_request_set_ref(&(req->data_request), req);
-
-	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
-}
-
-static void _starpu_mpi_handle_new_request(void *arg)
-{
-	_STARPU_MPI_LOG_IN();
-	struct _starpu_mpi_req *req = arg;
-	STARPU_ASSERT_MSG(req, "Invalid request");
-
-	/* submit the request to MPI */
-	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
-	req->func(req);
-
-	_STARPU_MPI_LOG_OUT();
-}
-
-static void *_starpu_mpi_progress_thread_func(void *arg)
-{
-	struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg;
-
-	starpu_pthread_setname("MPI");
-
-#ifndef STARPU_SIMGRID
-	if (mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
-	_starpu_mpi_do_initialize(argc_argv);
-	if (mpi_thread_cpuid >= 0)
-		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
-#endif
-
-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
-
-#ifdef STARPU_SIMGRID
-	/* Now that MPI is set up, let the rest of simgrid get initialized */
-	char **argv_cpy;
-	_STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*));
-	int i;
-	for (i = 0; i < *(argc_argv->argc); i++)
-		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
-	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
-	/* And set TSD for us */
-	void **tsd;
-	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
-	if (!smpi_process_set_user_data)
-	{
-		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
-	}
-	smpi_process_set_user_data(tsd);
-#endif
-
-#ifdef STARPU_USE_FXT
-	_starpu_fxt_wait_initialisation();
-#endif //STARPU_USE_FXT
-
-	{
-		_STARPU_MPI_TRACE_START(argc_argv->rank, argc_argv->world_size);
-#ifdef STARPU_USE_FXT
-		starpu_profiling_set_id(argc_argv->rank);
-#endif //STARPU_USE_FXT
-	}
-
-	_starpu_mpi_add_sync_point_in_fxt();
-	_starpu_mpi_comm_amounts_init(argc_argv->comm);
-	_starpu_mpi_cache_init(argc_argv->comm);
-	_starpu_mpi_select_node_init();
-	_starpu_mpi_datatype_init();
-
-	/* notify the main thread that the progression thread is ready */
-	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
-	running = 1;
-	STARPU_PTHREAD_COND_SIGNAL(&progress_cond);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
-
-	while (1)
-	{
-		struct callback_lfstack_cell_s* c = callback_lfstack_pop(&callback_stack);
-		int err=0;
-
-		if(running || pending_request>0)
-		{/* shall we block ? */
-			err = starpu_sem_wait(&callback_sem);
-			//running pending_request can change while waiting
-		}
-		if(c==NULL)
-		{
-			c = callback_lfstack_pop(&callback_stack);
-			if (c == NULL)
-			{
-				if(running && pending_request>0)
-				{
-					STARPU_ASSERT_MSG(c!=NULL, "Callback thread awakened without callback ready with error %d.",err);
-				}
-				else
-				{
-					if (pending_request==0)
-						break;
-				}
-				continue;
-			}
-		}
-
-
-		c->req->callback(c->req->callback_arg);
-		if (c->req->detached)
-		{
-			_starpu_mpi_request_destroy(c->req);
-		}
-		else
-		{
-			c->req->completed=1;
-			piom_cond_signal(&(c->req->req_cond), REQ_FINALIZED);
-		}
-		STARPU_ATOMIC_ADD( &pending_request, -1);
-		/* we signal that the request is completed.*/
-
-		free(c);
-
-	}
-	STARPU_ASSERT_MSG(callback_lfstack_pop(&callback_stack)==NULL, "List of callback not empty.");
-	STARPU_ASSERT_MSG(pending_request==0, "Request still pending.");
-
-	if (argc_argv->initialize_mpi)
-	{
-		_STARPU_MPI_DEBUG(3, "Calling MPI_Finalize()\n");
-		MPI_Finalize();
-	}
-
-	starpu_sem_destroy(&callback_sem);
-	free(argc_argv);
-	return NULL;
-}
-
-/********************************************************/
-/*                                                      */
-/*  (De)Initialization methods                          */
-/*                                                      */
-/********************************************************/
-
-// #ifdef STARPU_MPI_ACTIVITY
-// static int hookid = - 1;
-// #endif /* STARPU_MPI_ACTIVITY */
-
-static void _starpu_mpi_add_sync_point_in_fxt(void)
-{
-#ifdef STARPU_USE_FXT
-	int rank;
-	int worldsize;
-	int ret;
-
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize);
-
-	ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(ret));
-
-	/* We generate a "unique" key so that we can make sure that different
-	 * FxT traces come from the same MPI run. */
-	int random_number;
-
-	/* XXX perhaps we don't want to generate a new seed if the application
-	 * specified some reproductible behaviour ? */
-	if (rank == 0)
-	{
-		srand(time(NULL));
-		random_number = rand();
-	}
-
-	ret = MPI_Bcast(&random_number, 1, MPI_INT, 0, MPI_COMM_WORLD);
-	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Bcast returning %s", _starpu_mpi_get_mpi_error_code(ret));
-
-	_STARPU_MPI_TRACE_BARRIER(rank, worldsize, random_number);
-
-	_STARPU_MPI_DEBUG(3, "unique key %x\n", random_number);
-#endif
-}
-
-int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
-{
-        STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL);
-        STARPU_PTHREAD_COND_INIT(&progress_cond, NULL);
-
-	starpu_sem_init(&callback_sem, 0, 0);
-	running = 0;
-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
-
-	STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv);
-
-        STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
-        while (!running)
-                STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex);
-        STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
-
-        return 0;
-}
-
-void _starpu_mpi_progress_shutdown(int *value)
-{
-	/* kill the progression thread */
-        STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
-        running = 0;
-        STARPU_PTHREAD_COND_BROADCAST(&progress_cond);
-        STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
-
-	starpu_sem_post(&callback_sem);
-
-	starpu_pthread_join(progress_thread, &value);
-
-        STARPU_PTHREAD_MUTEX_DESTROY(&progress_mutex);
-        STARPU_PTHREAD_COND_DESTROY(&progress_cond);
-}
-
-#endif /* STARPU_USE_MPI_NMAD*/

+ 0 - 410
nmad/src/starpu_mpi.c

@@ -1,410 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <limits.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_datatype.h>
-#include <starpu_mpi_private.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_profiling.h>
-#include <starpu_mpi_stats.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_mpi_select_node.h>
-#include <starpu_mpi_init.h>
-#include <common/config.h>
-#include <common/thread.h>
-#include <datawizard/interfaces/data_interface.h>
-#include <datawizard/coherency.h>
-#include <core/simgrid.h>
-#include <core/task.h>
-#include <core/topology.h>
-#include <core/workers.h>
-
-#if defined(STARPU_USE_MPI_MPI)
-#include <mpi/starpu_mpi_comm.h>
-#include <mpi/starpu_mpi_tag.h>
-#endif
-
-static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
-							int dest, int data_tag, MPI_Comm comm,
-							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-							int sequential_consistency)
-{
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
-#ifdef STARPU_MPI_PEDANTIC_ISEND
-					      STARPU_RW,
-#else
-					      STARPU_R,
-#endif
-					      sequential_consistency, 0, 0);
-}
-
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, int prio, MPI_Comm comm)
-{
-	_STARPU_MPI_LOG_IN();
-	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
-
-	struct _starpu_mpi_req *req;
-	_STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, 0);
-	req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 0, prio, NULL, NULL, 1);
-	_STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, 0);
-
-	STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common");
-	*public_req = req;
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, MPI_Comm comm)
-{
-	return starpu_mpi_isend_prio(data_handle, public_req, dest, data_tag, 0, comm);
-}
-
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-	_STARPU_MPI_LOG_IN();
-	_starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 0, prio, callback, arg, 1);
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
-}
-
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm)
-{
-	starpu_mpi_req req;
-	MPI_Status status;
-
-	_STARPU_MPI_LOG_IN();
-	memset(&status, 0, sizeof(MPI_Status));
-
-	starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm);
-	starpu_mpi_wait(&req, &status);
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm)
-{
-	return starpu_mpi_send_prio(data_handle, dest, data_tag, 0, comm);
-}
-
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, int prio, MPI_Comm comm)
-{
-	_STARPU_MPI_LOG_IN();
-	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_issend needs a valid starpu_mpi_req");
-
-	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 1, prio, NULL, NULL, 1);
-
-	STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common");
-	*public_req = req;
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, MPI_Comm comm)
-{
-	return starpu_mpi_issend_prio(data_handle, public_req, dest, data_tag, 0, comm);
-}
-
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 1, prio, callback, arg, 1);
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_issend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
-}
-
-struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
-{
-	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
-}
-
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int data_tag, MPI_Comm comm)
-{
-	_STARPU_MPI_LOG_IN();
-	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req");
-
-	struct _starpu_mpi_req *req;
-	_STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(source, data_tag);
-	req = _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 0, 0, NULL, NULL, 1, 0, 0);
-	_STARPU_MPI_TRACE_IRECV_COMPLETE_END(source, data_tag);
-
-	STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
-	*public_req = req;
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, 1, 0, 0);
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
-{
-	_STARPU_MPI_LOG_IN();
-
-	_starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, sequential_consistency, 0, 0);
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, MPI_Status *status)
-{
-	starpu_mpi_req req;
-
-	_STARPU_MPI_LOG_IN();
-
-	starpu_mpi_irecv(data_handle, &req, source, data_tag, comm);
-	starpu_mpi_wait(&req, status);
-
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
-{
-	return _starpu_mpi_wait(public_req, status);
-}
-
-int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
-{
-	return _starpu_mpi_test(public_req, flag, status);
-}
-
-int starpu_mpi_barrier(MPI_Comm comm)
-{
-	return _starpu_mpi_barrier(comm);
-}
-
-void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
-{
-#if defined(STARPU_USE_MPI_MPI)
-	_starpu_mpi_tag_data_release(data_handle);
-#endif
-	_starpu_mpi_cache_data_clear(data_handle);
-	free(data_handle->mpi_data);
-}
-
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm)
-{
-	struct _starpu_mpi_data *mpi_data;
-	if (data_handle->mpi_data)
-	{
-		mpi_data = data_handle->mpi_data;
-	}
-	else
-	{
-		_STARPU_CALLOC(mpi_data, 1, sizeof(struct _starpu_mpi_data));
-		mpi_data->magic = 42;
-		mpi_data->node_tag.data_tag = -1;
-		mpi_data->node_tag.rank = -1;
-		mpi_data->node_tag.comm = MPI_COMM_WORLD;
-		data_handle->mpi_data = mpi_data;
-#if defined(STARPU_USE_MPI_MPI)
-		_starpu_mpi_tag_data_register(data_handle, tag);
-#endif
-		_starpu_mpi_cache_data_init(data_handle);
-		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
-	}
-
-	if (tag != -1)
-	{
-		mpi_data->node_tag.data_tag = tag;
-	}
-	if (rank != -1)
-	{
-		_STARPU_MPI_TRACE_DATA_SET_RANK(data_handle, rank);
-		mpi_data->node_tag.rank = rank;
-		mpi_data->node_tag.comm = comm;
-#if defined(STARPU_USE_MPI_MPI)
-		_starpu_mpi_comm_register(comm);
-#endif
-	}
-}
-
-void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm)
-{
-	starpu_mpi_data_register_comm(handle, -1, rank, comm);
-}
-
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag)
-{
-	starpu_mpi_data_register_comm(handle, tag, -1, MPI_COMM_WORLD);
-}
-
-int starpu_mpi_data_get_rank(starpu_data_handle_t data)
-{
-	STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data);
-	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.rank;
-}
-
-int starpu_mpi_data_get_tag(starpu_data_handle_t data)
-{
-	STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data);
-	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.data_tag;
-}
-
-void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
-{
-	int me, rank, tag;
-
-	rank = starpu_mpi_data_get_rank(data_handle);
-	if (rank == -1)
-	{
-		_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register() or starpu_mpi_data_register()\n");
-	}
-
-	starpu_mpi_comm_rank(comm, &me);
-	if (node == rank)
-		return;
-
-	tag = starpu_mpi_data_get_tag(data_handle);
-	if (tag == -1)
-	{
-		_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register() or starpu_mpi_data_register()\n");
-	}
-
-	if (me == node)
-	{
-		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
-		if (already_received == 0)
-		{
-			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
-			starpu_mpi_irecv_detached(data_handle, rank, tag, comm, callback, arg);
-		}
-	}
-	else if (me == rank)
-	{
-		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
-		if (already_sent == 0)
-		{
-			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
-			starpu_mpi_isend_detached(data_handle, node, tag, comm, NULL, NULL);
-		}
-	}
-}
-
-void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
-{
-	int me, rank, tag;
-
-	rank = starpu_mpi_data_get_rank(data_handle);
-	if (rank == -1)
-	{
-		_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
-	}
-
-	starpu_mpi_comm_rank(comm, &me);
-	if (node == rank)
-		return;
-
-	tag = starpu_mpi_data_get_tag(data_handle);
-	if (tag == -1)
-	{
-		_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
-	}
-
-	if (me == node)
-	{
-		MPI_Status status;
-		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
-		if (already_received == 0)
-		{
-			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
-			starpu_mpi_recv(data_handle, rank, tag, comm, &status);
-		}
-	}
-	else if (me == rank)
-	{
-		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
-		if (already_sent == 0)
-		{
-			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
-			starpu_mpi_send(data_handle, node, tag, comm);
-		}
-	}
-}
-
-void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle)
-{
-	int size, i;
-	starpu_mpi_comm_size(comm, &size);
-#ifdef STARPU_DEVEL
-#warning TODO: use binary communication tree to optimize broadcast
-#endif
-	for (i = 0; i < size; i++)
-		starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL);
-}
-
-void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_rank)
-{
-	int old_rank = starpu_mpi_data_get_rank(data);
-	if (new_rank == old_rank)
-		/* Already there */
-		return;
-
-	/* First submit data migration if it's not already on destination */
-	starpu_mpi_get_data_on_node_detached(comm, data, new_rank, NULL, NULL);
-
-	/* And note new owner */
-	starpu_mpi_data_set_rank_comm(data, new_rank, comm);
-
-	/* Flush cache in all other nodes */
-	/* TODO: Ideally we'd transmit the knowledge of who owns it */
-	starpu_mpi_cache_flush(comm, data);
-	return;
-}
-
-int starpu_mpi_wait_for_all(MPI_Comm comm)
-{
-	int mpi = 1;
-	int task = 1;
-	while (task || mpi)
-	{
-		task = _starpu_task_wait_for_all_and_return_nb_waited_tasks();
-		mpi = _starpu_mpi_barrier(comm);
-	}
-	return 0;
-}

+ 0 - 398
nmad/src/starpu_mpi_cache.c

@@ -1,398 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011-2017  Université de Bordeaux
- * Copyright (C) 2014 INRIA
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <common/uthash.h>
-#include <datawizard/coherency.h>
-
-#include <starpu_mpi_cache.h>
-#include <starpu_mpi_cache_stats.h>
-#include <starpu_mpi_private.h>
-
-/* Whether we are allowed to keep copies of remote data. */
-struct _starpu_data_entry
-{
-	UT_hash_handle hh;
-	starpu_data_handle_t data_handle;
-};
-
-static starpu_pthread_mutex_t _cache_mutex;
-static struct _starpu_data_entry *_cache_data = NULL;
-int _starpu_cache_enabled=1;
-static MPI_Comm _starpu_cache_comm;
-static int _starpu_cache_comm_size;
-
-static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle);
-
-int starpu_mpi_cache_is_enabled()
-{
-	return _starpu_cache_enabled==1;
-}
-
-int starpu_mpi_cache_set(int enabled)
-{
-	if (enabled == 1)
-	{
-		_starpu_cache_enabled = 1;
-	}
-	else
-	{
-		if (_starpu_cache_enabled)
-		{
-			// We need to clean the cache
-			starpu_mpi_cache_flush_all_data(_starpu_cache_comm);
-			_starpu_mpi_cache_shutdown();
-		}
-		_starpu_cache_enabled = 0;
-	}
-	return 0;
-}
-
-void _starpu_mpi_cache_init(MPI_Comm comm)
-{
-	_starpu_cache_enabled = starpu_get_env_number("STARPU_MPI_CACHE");
-	if (_starpu_cache_enabled == -1)
-	{
-		_starpu_cache_enabled = 1;
-	}
-
-	if (_starpu_cache_enabled == 0)
-	{
-		_STARPU_DISP("Warning: StarPU MPI Communication cache is disabled\n");
-		return;
-	}
-
-	_starpu_cache_comm = comm;
-	starpu_mpi_comm_size(comm, &_starpu_cache_comm_size);
-	_starpu_mpi_cache_stats_init();
-	STARPU_PTHREAD_MUTEX_INIT(&_cache_mutex, NULL);
-}
-
-void _starpu_mpi_cache_shutdown()
-{
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	struct _starpu_data_entry *entry, *tmp;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	HASH_ITER(hh, _cache_data, entry, tmp)
-	{
-		HASH_DEL(_cache_data, entry);
-		free(entry);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	STARPU_PTHREAD_MUTEX_DESTROY(&_cache_mutex);
-	free(_cache_data);
-	_starpu_mpi_cache_stats_shutdown();
-}
-
-void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle)
-{
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 1)
-	{
-		struct _starpu_data_entry *entry;
-		STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-		_starpu_mpi_cache_flush_nolock(data_handle);
-		HASH_FIND_PTR(_cache_data, &data_handle, entry);
-		if (entry != NULL)
-		{
-			HASH_DEL(_cache_data, entry);
-			free(entry);
-		}
-		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	}
-
-	free(mpi_data->cache_sent);
-}
-
-void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle)
-{
-	int i;
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	mpi_data->cache_received = 0;
-	_STARPU_MALLOC(mpi_data->cache_sent, _starpu_cache_comm_size*sizeof(mpi_data->cache_sent[0]));
-	for(i=0 ; i<_starpu_cache_comm_size ; i++)
-	{
-		mpi_data->cache_sent[i] = 0;
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}
-
-static void _starpu_mpi_cache_data_add_nolock(starpu_data_handle_t data_handle)
-{
-	struct _starpu_data_entry *entry;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	HASH_FIND_PTR(_cache_data, &data_handle, entry);
-	if (entry == NULL)
-	{
-		_STARPU_MPI_MALLOC(entry, sizeof(*entry));
-		entry->data_handle = data_handle;
-		HASH_ADD_PTR(_cache_data, data_handle, entry);
-	}
-}
-
-static void _starpu_mpi_cache_data_remove_nolock(starpu_data_handle_t data_handle)
-{
-	struct _starpu_data_entry *entry;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	HASH_FIND_PTR(_cache_data, &data_handle, entry);
-	if (entry)
-	{
-		HASH_DEL(_cache_data, entry);
-		free(entry);
-	}
-}
-
-/**************************************
- * Received cache
- **************************************/
-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data_handle)
-{
-	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	STARPU_ASSERT(mpi_data->magic == 42);
-	STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size);
-
-	if (mpi_data->cache_received == 1)
-	{
-#ifdef STARPU_DEVEL
-#  warning TODO: Somebody else will write to the data, so discard our cached copy if any. starpu_mpi could just remember itself.
-#endif
-		_STARPU_MPI_DEBUG(2, "Clearing receive cache for data %p\n", data_handle);
-		mpi_data->cache_received = 0;
-		starpu_data_invalidate_submit(data_handle);
-		_starpu_mpi_cache_data_remove_nolock(data_handle);
-		_starpu_mpi_cache_stats_dec(mpi_rank, data_handle);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}
-
-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data_handle)
-{
-	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return 0;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	STARPU_ASSERT(mpi_data->magic == 42);
-	STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size);
-
-	int already_received = mpi_data->cache_received;
-	if (already_received == 0)
-	{
-		_STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank);
-		mpi_data->cache_received = 1;
-		_starpu_mpi_cache_data_add_nolock(data_handle);
-		_starpu_mpi_cache_stats_inc(mpi_rank, data_handle);
-	}
-	else
-	{
-		_STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	return already_received;
-}
-
-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data_handle)
-{
-	int already_received;
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return 0;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	STARPU_ASSERT(mpi_data->magic == 42);
-	already_received = mpi_data->cache_received;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	return already_received;
-}
-
-int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
-{
-	return _starpu_mpi_cache_received_data_get(data_handle);
-}
-
-/**************************************
- * Send cache
- **************************************/
-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
-{
-	int n, size;
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	starpu_mpi_comm_size(mpi_data->node_tag.comm, &size);
-	for(n=0 ; n<size ; n++)
-	{
-		if (mpi_data->cache_sent[n] == 1)
-		{
-			_STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle);
-			mpi_data->cache_sent[n] = 0;
-			_starpu_mpi_cache_data_remove_nolock(data_handle);
-		}
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}
-
-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data_handle, int dest)
-{
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-
-	if (_starpu_cache_enabled == 0)
-		return 0;
-
-	STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size);
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	int already_sent = mpi_data->cache_sent[dest];
-	if (mpi_data->cache_sent[dest] == 0)
-	{
-		mpi_data->cache_sent[dest] = 1;
-		_starpu_mpi_cache_data_add_nolock(data_handle);
-		_STARPU_MPI_DEBUG(2, "Noting that data %p has already been sent to %d\n", data_handle, dest);
-	}
-	else
-	{
-		_STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data_handle, dest);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	return already_sent;
-}
-
-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data_handle, int dest)
-{
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-	int already_sent;
-
-	if (_starpu_cache_enabled == 0)
-		return 0;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size);
-	already_sent = mpi_data->cache_sent[dest];
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-	return already_sent;
-}
-
-int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
-{
-	return _starpu_mpi_cache_sent_data_get(data_handle, dest);
-}
-
-static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
-{
-	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
-	int i, nb_nodes;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	starpu_mpi_comm_size(mpi_data->node_tag.comm, &nb_nodes);
-	for(i=0 ; i<nb_nodes ; i++)
-	{
-		if (mpi_data->cache_sent[i] == 1)
-		{
-			_STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle);
-			mpi_data->cache_sent[i] = 0;
-			_starpu_mpi_cache_stats_dec(i, data_handle);
-		}
-	}
-
-	if (mpi_data->cache_received == 1)
-	{
-		int mpi_rank = starpu_mpi_data_get_rank(data_handle);
-		_STARPU_MPI_DEBUG(2, "Clearing received cache for data %p\n", data_handle);
-		mpi_data->cache_received = 0;
-		_starpu_mpi_cache_stats_dec(mpi_rank, data_handle);
-	}
-}
-
-void _starpu_mpi_cache_flush(starpu_data_handle_t data_handle)
-{
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	_starpu_mpi_cache_flush_nolock(data_handle);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}
-
-static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_data_handle_t data_handle)
-{
-	int my_rank, mpi_rank;
-
-	_starpu_mpi_cache_flush_nolock(data_handle);
-
-	starpu_mpi_comm_rank(comm, &my_rank);
-	mpi_rank = starpu_mpi_data_get_rank(data_handle);
-	if (mpi_rank != my_rank && mpi_rank != -1)
-		starpu_data_invalidate_submit(data_handle);
-}
-
-void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
-{
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	_starpu_mpi_cache_flush_and_invalidate_nolock(comm, data_handle);
-	_starpu_mpi_cache_data_remove_nolock(data_handle);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}
-
-void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
-{
-	struct _starpu_data_entry *entry, *tmp;
-
-	if (_starpu_cache_enabled == 0)
-		return;
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	HASH_ITER(hh, _cache_data, entry, tmp)
-	{
-		_starpu_mpi_cache_flush_and_invalidate_nolock(comm, entry->data_handle);
-		HASH_DEL(_cache_data, entry);
-		free(entry);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
-}

+ 0 - 59
nmad/src/starpu_mpi_cache.h

@@ -1,59 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011-2014, 2017  Université de Bordeaux
- * Copyright (C) 2014 INRIA
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_CACHE_H__
-#define __STARPU_MPI_CACHE_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-extern int _starpu_cache_enabled;
-void _starpu_mpi_cache_init(MPI_Comm comm);
-void _starpu_mpi_cache_shutdown();
-void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle);
-void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle);
-
-/*
- * If the data is already available in the cache, return a pointer to the data
- * If the data is NOT available in the cache, add it to the cache and return NULL
- */
-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data);
-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data);
-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data);
-
-/*
- * If the data is already available in the cache, return a pointer to the data
- * If the data is NOT available in the cache, add it to the cache and return NULL
- */
-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data, int dest);
-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data, int dest);
-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data);
-
-void _starpu_mpi_cache_flush(starpu_data_handle_t data_handle);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_CACHE_H__

+ 0 - 62
nmad/src/starpu_mpi_cache_stats.c

@@ -1,62 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi_cache_stats.h>
-#include <common/config.h>
-#include <stdio.h>
-#include <starpu_mpi_private.h>
-
-static int stats_enabled=0;
-
-void _starpu_mpi_cache_stats_init()
-{
-	stats_enabled = starpu_get_env_number("STARPU_MPI_CACHE_STATS");
-	if (stats_enabled == -1)
-	{
-		stats_enabled = 0;
-	}
-	if (stats_enabled == 0)
-		return;
-
-	_STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n");
-
-}
-
-void _starpu_mpi_cache_stats_shutdown()
-{
-	if (stats_enabled == 0)
-		return;
-}
-
-void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count)
-{
-	size_t size;
-
-	if (stats_enabled == 0)
-		return;
-
-	size = starpu_data_get_size(data_handle);
-
-	if (count == 1)
-	{
-		_STARPU_MPI_MSG("[communication cache] + %10ld to   %d\n", (long)size, dst);
-	}
-	else // count == -1
-	{
-		_STARPU_MPI_MSG("[communication cache] - %10ld from %d\n", (long)size, dst);
-	}
-}
-

+ 0 - 41
nmad/src/starpu_mpi_cache_stats.h

@@ -1,41 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_CACHE_STATS_H__
-#define __STARPU_MPI_CACHE_STATS_H__
-
-#include <starpu.h>
-#include <stdlib.h>
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_cache_stats_init();
-void _starpu_mpi_cache_stats_shutdown();
-
-void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count);
-
-#define _starpu_mpi_cache_stats_inc(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, +1)
-#define _starpu_mpi_cache_stats_dec(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, -1)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_CACHE_STATS_H__

+ 0 - 156
nmad/src/starpu_mpi_collective.c

@@ -1,156 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <mpi.h>
-#include <starpu.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-
-struct _callback_arg
-{
-	void (*callback)(void *);
-	void *arg;
-	int nb;
-	int count;
-};
-
-static
-void _callback_collective(void *arg)
-{
-	struct _callback_arg *callback_arg = arg;
-	callback_arg->nb ++;
-	if (callback_arg->nb == callback_arg->count)
-	{
-		callback_arg->callback(callback_arg->arg);
-		free(callback_arg);
-	}
-}
-
-static
-int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int root, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg, void (**callback_func)(void *), struct _callback_arg **callback_arg)
-{
-	void (*callback)(void *);
-
-	callback = (rank == root) ? scallback : rcallback;
-	if (*callback)
-	{
-		int x;
-
-		*callback_func = _callback_collective;
-
-		_STARPU_MPI_MALLOC(*callback_arg, sizeof(struct _callback_arg));
-		(*callback_arg)->count = 0;
-		(*callback_arg)->nb = 0;
-		(*callback_arg)->callback = (rank == root) ? scallback : rcallback;
-		(*callback_arg)->arg = (rank == root) ? sarg : rarg;
-
-		for(x = 0; x < count ; x++)
-		{
-			if (data_handles[x])
-			{
-				int owner = starpu_mpi_data_get_rank(data_handles[x]);
-				int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
-				STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
-				if ((rank == root) && (owner != root))
-				{
-					(*callback_arg)->count ++;
-				}
-				if ((rank != root) && (owner == rank))
-				{
-					(*callback_arg)->count ++;
-				}
-			}
-		}
-
-		if (!(*callback_arg)->count)
-		{
-			free(*callback_arg);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	int rank;
-	int x;
-	struct _callback_arg *callback_arg = NULL;
-	void (*callback_func)(void *) = NULL;
-
-	starpu_mpi_comm_rank(comm, &rank);
-
-	x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg);
-	if (x == 1)
-		return 0;
-
-	for(x = 0; x < count ; x++)
-	{
-		if (data_handles[x])
-		{
-			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
-			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
-			if ((rank == root) && (owner != root))
-			{
-				//fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner);
-				starpu_mpi_isend_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg);
-			}
-			if ((rank != root) && (owner == rank))
-			{
-				//fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root);
-				starpu_mpi_irecv_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg);
-			}
-		}
-	}
-	return 0;
-}
-
-int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	int rank;
-	int x;
-	struct _callback_arg *callback_arg = NULL;
-	void (*callback_func)(void *) = NULL;
-
-	starpu_mpi_comm_rank(comm, &rank);
-
-	x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg);
-	if (x == 1)
-		return 0;
-
-	for(x = 0; x < count ; x++)
-	{
-		if (data_handles[x])
-		{
-			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
-			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
-			if ((rank == root) && (owner != root))
-			{
-				//fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, owner);
-				starpu_mpi_irecv_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg);
-			}
-			if ((rank != root) && (owner == rank))
-			{
-				//fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, root);
-				starpu_mpi_isend_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg);
-			}
-		}
-	}
-	return 0;
-}

+ 0 - 323
nmad/src/starpu_mpi_datatype.c

@@ -1,323 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011, 2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi_datatype.h>
-#include <common/uthash.h>
-#include <datawizard/coherency.h>
-
-struct _starpu_mpi_datatype_funcs
-{
-	enum starpu_data_interface_id id;
-	starpu_mpi_datatype_allocate_func_t allocate_datatype_func;
-	starpu_mpi_datatype_free_func_t free_datatype_func;
-	UT_hash_handle hh;
-};
-
-static starpu_pthread_mutex_t _starpu_mpi_datatype_funcs_table_mutex;
-static struct _starpu_mpi_datatype_funcs *_starpu_mpi_datatype_funcs_table = NULL;
-
-void _starpu_mpi_datatype_init(void)
-{
-	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_datatype_funcs_table_mutex, NULL);
-}
-
-void _starpu_mpi_datatype_shutdown(void)
-{
-	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_datatype_funcs_table_mutex);
-}
-
-/*
- * 	Matrix
- */
-
-static void handle_to_datatype_matrix(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_matrix_get_nx(data_handle);
-	unsigned ny = starpu_matrix_get_ny(data_handle);
-	unsigned ld = starpu_matrix_get_local_ld(data_handle);
-	size_t elemsize = starpu_matrix_get_elemsize(data_handle);
-
-	ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed");
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-}
-
-/*
- * 	Block
- */
-
-static void handle_to_datatype_block(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_block_get_nx(data_handle);
-	unsigned ny = starpu_block_get_ny(data_handle);
-	unsigned nz = starpu_block_get_nz(data_handle);
-	unsigned ldy = starpu_block_get_local_ldy(data_handle);
-	unsigned ldz = starpu_block_get_local_ldz(data_handle);
-	size_t elemsize = starpu_block_get_elemsize(data_handle);
-
-	MPI_Datatype datatype_2dlayer;
-	ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed");
-
-	ret = MPI_Type_commit(&datatype_2dlayer);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-
-	ret = MPI_Type_hvector(nz, 1, ldz*elemsize, datatype_2dlayer, datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed");
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-}
-
-/*
- * 	Vector
- */
-
-static void handle_to_datatype_vector(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_vector_get_nx(data_handle);
-	size_t elemsize = starpu_vector_get_elemsize(data_handle);
-
-	ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-}
-
-/*
- * 	Variable
- */
-
-static void handle_to_datatype_variable(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	size_t elemsize = starpu_variable_get_elemsize(data_handle);
-
-	ret = MPI_Type_contiguous(elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-}
-
-/*
- * 	Void
- */
-
-static void handle_to_datatype_void(starpu_data_handle_t data_handle STARPU_ATTRIBUTE_UNUSED, MPI_Datatype *datatype)
-{
-	int ret;
-
-	ret = MPI_Type_contiguous(0, MPI_BYTE, datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
-}
-
-/*
- *	Generic
- */
-
-static starpu_mpi_datatype_allocate_func_t handle_to_datatype_funcs[STARPU_MAX_INTERFACE_ID] =
-{
-	[STARPU_MATRIX_INTERFACE_ID]	= handle_to_datatype_matrix,
-	[STARPU_BLOCK_INTERFACE_ID]	= handle_to_datatype_block,
-	[STARPU_VECTOR_INTERFACE_ID]	= handle_to_datatype_vector,
-	[STARPU_CSR_INTERFACE_ID]	= NULL, /* Sent through pack/unpack operations */
-	[STARPU_BCSR_INTERFACE_ID]	= NULL, /* Sent through pack/unpack operations */
-	[STARPU_VARIABLE_INTERFACE_ID]	= handle_to_datatype_variable,
-	[STARPU_VOID_INTERFACE_ID]	= handle_to_datatype_void,
-	[STARPU_MULTIFORMAT_INTERFACE_ID] = NULL,
-};
-
-void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req)
-{
-	enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle);
-
-	if (id < STARPU_MAX_INTERFACE_ID)
-	{
-		starpu_mpi_datatype_allocate_func_t func = handle_to_datatype_funcs[id];
-		if (func)
-		{
-			func(data_handle, &req->datatype);
-			req->registered_datatype = 1;
-		}
-		else
-		{
-			/* The datatype is predefined by StarPU but it will be sent as a memory area */
-			req->datatype = MPI_BYTE;
-			req->registered_datatype = 0;
-		}
-	}
-	else
-	{
-		struct _starpu_mpi_datatype_funcs *table;
-		STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-		HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
-		STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-		if (table)
-		{
-			STARPU_ASSERT_MSG(table->allocate_datatype_func, "Handle To Datatype Function not defined for StarPU data interface %d", id);
-			table->allocate_datatype_func(data_handle, &req->datatype);
-			req->registered_datatype = 1;
-		}
-		else
-		{
-			/* The datatype is not predefined by StarPU */
-			req->datatype = MPI_BYTE;
-			req->registered_datatype = 0;
-		}
-	}
-#ifdef STARPU_VERBOSE
-	{
-		char datatype_name[MPI_MAX_OBJECT_NAME];
-		int datatype_name_len;
-		MPI_Type_get_name(req->datatype, datatype_name, &datatype_name_len);
-		if (datatype_name_len == 0)
-			req->datatype_name = strdup("User defined datatype");
-		else
-			req->datatype_name = strdup(datatype_name);
-	}
-#endif
-}
-
-static void _starpu_mpi_handle_free_simple_datatype(MPI_Datatype *datatype)
-{
-	MPI_Type_free(datatype);
-}
-
-static void _starpu_mpi_handle_free_complex_datatype(MPI_Datatype *datatype)
-{
-	int num_ints, num_adds, num_datatypes, combiner;
-
-	MPI_Type_get_envelope(*datatype, &num_ints, &num_adds, &num_datatypes, &combiner);
-	if (combiner != MPI_COMBINER_NAMED)
-	{
-		int *array_of_ints;
-		MPI_Aint *array_of_adds;
-		MPI_Datatype *array_of_datatypes;
-		int i;
-
-		_STARPU_MPI_MALLOC(array_of_ints, num_ints * sizeof(int));
-		_STARPU_MPI_MALLOC(array_of_adds, num_adds * sizeof(MPI_Aint));
-		_STARPU_MPI_MALLOC(array_of_datatypes, num_datatypes * sizeof(MPI_Datatype));
-
-		MPI_Type_get_contents(*datatype, num_ints, num_adds, num_datatypes, array_of_ints, array_of_adds, array_of_datatypes);
-		for(i=0 ; i<num_datatypes ; i++)
-		{
-			_starpu_mpi_handle_free_complex_datatype(&array_of_datatypes[i]);
-		}
-		MPI_Type_free(datatype);
-		free(array_of_ints);
-		free(array_of_adds);
-		free(array_of_datatypes);
-	}
-}
-
-static starpu_mpi_datatype_free_func_t handle_free_datatype_funcs[STARPU_MAX_INTERFACE_ID] =
-{
-	[STARPU_MATRIX_INTERFACE_ID]	= _starpu_mpi_handle_free_simple_datatype,
-	[STARPU_BLOCK_INTERFACE_ID]	= _starpu_mpi_handle_free_complex_datatype,
-	[STARPU_VECTOR_INTERFACE_ID]	= _starpu_mpi_handle_free_simple_datatype,
-	[STARPU_CSR_INTERFACE_ID]	= NULL,  /* Sent through pack/unpack operations */
-	[STARPU_BCSR_INTERFACE_ID]	= NULL,  /* Sent through pack/unpack operations */
-	[STARPU_VARIABLE_INTERFACE_ID]	= _starpu_mpi_handle_free_simple_datatype,
-	[STARPU_VOID_INTERFACE_ID]      = _starpu_mpi_handle_free_simple_datatype,
-	[STARPU_MULTIFORMAT_INTERFACE_ID] = NULL,
-};
-
-void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle);
-
-	if (id < STARPU_MAX_INTERFACE_ID)
-	{
-		starpu_mpi_datatype_free_func_t func = handle_free_datatype_funcs[id];
-		if (func)
-			func(datatype);
-	}
-	else
-	{
-		struct _starpu_mpi_datatype_funcs *table;
-		STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-		HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
-		STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-		if (table)
-		{
-			STARPU_ASSERT_MSG(table->free_datatype_func, "Free Datatype Function not defined for StarPU data interface %d", id);
-			table->free_datatype_func(datatype);
-		}
-
-	}
-	/* else the datatype is not predefined by StarPU */
-}
-
-int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func)
-{
-	enum starpu_data_interface_id id = starpu_data_get_interface_id(handle);
-	struct _starpu_mpi_datatype_funcs *table;
-
-	STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype");
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-	HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
-	if (table)
-	{
-		table->allocate_datatype_func = allocate_datatype_func;
-		table->free_datatype_func = free_datatype_func;
-	}
-	else
-	{
-		_STARPU_MPI_MALLOC(table, sizeof(struct _starpu_mpi_datatype_funcs));
-		table->id = id;
-		table->allocate_datatype_func = allocate_datatype_func;
-		table->free_datatype_func = free_datatype_func;
-		HASH_ADD_INT(_starpu_mpi_datatype_funcs_table, id, table);
-	}
-	STARPU_ASSERT_MSG(handle->ops->handle_to_pointer, "The data interface must define the operation 'handle_to_pointer'\n");
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-	return 0;
-}
-
-int starpu_mpi_datatype_unregister(starpu_data_handle_t handle)
-{
-	enum starpu_data_interface_id id = starpu_data_get_interface_id(handle);
-	struct _starpu_mpi_datatype_funcs *table;
-
-	STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype");
-
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-	HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
-	if (table)
-	{
-		HASH_DEL(_starpu_mpi_datatype_funcs_table, table);
-		free(table);
-	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
-	return 0;
-}

+ 0 - 39
nmad/src/starpu_mpi_datatype.h

@@ -1,39 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2013, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_DATATYPE_H__
-#define __STARPU_MPI_DATATYPE_H__
-
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_datatype_init(void);
-void _starpu_mpi_datatype_shutdown(void);
-
-void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req);
-void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_DATATYPE_H__

+ 0 - 303
nmad/src/starpu_mpi_fortran.c

@@ -1,303 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
- * Copyright (C) 2017  Université de Bordeaux
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <common/config.h>
-#include "starpu_mpi_private.h"
-
-#ifdef HAVE_MPI_COMM_F2C
-/* Fortran related functions */
-struct _starpu_mpi_argc_argv *fstarpu_mpi_argcv_alloc(int argc, int initialize_mpi, int comm_present, MPI_Fint comm)
-{
-	struct _starpu_mpi_argc_argv *argcv;
-	_STARPU_MPI_CALLOC(argcv, 1,sizeof(*argcv));
-	argcv->initialize_mpi = initialize_mpi;
-	if (comm_present)
-	{
-		argcv->comm = MPI_Comm_f2c(comm);
-	}
-	else
-	{
-		argcv->comm = MPI_COMM_WORLD;
-	}
-	argcv->fargc = argc;
-	argcv->argc = &argcv->fargc;
-	_STARPU_MPI_CALLOC(argcv->fargv, argc, sizeof(char *));
-	argcv->argv = &argcv->fargv;
-	return argcv;
-}
-
-void fstarpu_mpi_argcv_set_arg(struct _starpu_mpi_argc_argv *argcv, int i, int len, char *_s)
-{
-	STARPU_ASSERT(len >= 0);
-	STARPU_ASSERT(i >= 0 && i < argcv->fargc);
-	char *s;
-	_STARPU_MPI_MALLOC(s, len+1);
-	memcpy(s, _s, len);
-	s[len] = '\0';
-	argcv->fargv[i] = s;
-}
-
-void fstarpu_mpi_argcv_free(struct _starpu_mpi_argc_argv *argcv)
-{
-	if (argcv->fargv != NULL)
-	{
-		int i;
-		for (i=0; i<argcv->fargc; i++)
-		{
-			free(argcv->fargv[i]);
-		}
-		free(argcv->fargv);
-	}
-	free(argcv);
-}
-
-starpu_mpi_req *fstarpu_mpi_req_alloc(void)
-{
-	void *ptr;
-	_STARPU_MPI_CALLOC(ptr, 1, sizeof(starpu_mpi_req));
-	return ptr;
-}
-
-void fstarpu_mpi_req_free(starpu_mpi_req *req)
-{
-	free(req);
-}
-
-MPI_Status *fstarpu_mpi_status_alloc(void)
-{
-	void *ptr;
-	_STARPU_MPI_CALLOC(ptr, 1, sizeof(MPI_Status));
-	return ptr;
-}
-
-void fstarpu_mpi_status_free(MPI_Status *status)
-{
-	free(status);
-}
-
-int fstarpu_mpi_barrier(MPI_Fint comm)
-{
-	return starpu_mpi_barrier(MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
-{
-	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
-}
-
-int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv)
-{
-	return starpu_mpi_init_comm(argcv->argc, argcv->argv, argcv->initialize_mpi, argcv->comm);
-}
-
-void fstarpu_mpi_get_data_on_node(MPI_Fint comm, starpu_data_handle_t data_handle, int node)
-{
-	starpu_mpi_get_data_on_node(MPI_Comm_f2c(comm), data_handle, node);
-}
-
-void fstarpu_mpi_get_data_on_node_detached(MPI_Fint comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg)
-{
-	starpu_mpi_get_data_on_node_detached(MPI_Comm_f2c(comm), data_handle, node, callback, arg);
-}
-
-void fstarpu_mpi_redux_data(MPI_Fint comm, starpu_data_handle_t data_handle)
-{
-	starpu_mpi_redux_data(MPI_Comm_f2c(comm), data_handle);
-}
-void fstarpu_mpi_redux_data_prio(MPI_Fint comm, starpu_data_handle_t data_handle, int prio)
-{
-	starpu_mpi_redux_data_prio(MPI_Comm_f2c(comm), data_handle, prio);
-}
-
-/* scatter/gather */
-int fstarpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	return starpu_mpi_scatter_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
-}
-
-int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	return starpu_mpi_gather_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
-}
-
-/* isend/irecv detached unlock tag */
-int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
-{
-	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
-}
-int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag)
-{
-	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), *starpu_tag);
-}
-
-int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
-{
-	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
-}
-
-/* isend/irecv array detached unlock tag */
-int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
-{
-	MPI_Comm comms[array_size];
-	int i;
-	for (i = 0; i < array_size; i++)
-	{
-		comms[i] = MPI_Comm_f2c(_comms[i]);
-	}
-	int ret = starpu_mpi_isend_array_detached_unlock_tag_prio((unsigned)array_size, data_handles, dsts, mpi_tags, prio, comms, *starpu_tag);
-	return ret;
-}
-int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
-{
-	return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, mpi_tags, NULL, _comms, starpu_tag);
-}
-
-int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
-{
-	MPI_Comm comms[array_size];
-	int i;
-	for (i = 0; i < array_size; i++)
-	{
-		comms[i] = MPI_Comm_f2c(_comms[i]);
-	}
-	int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, mpi_tags, comms, *starpu_tag);
-	return ret;
-}
-
-/* isend/irecv */
-int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_isend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, int prio, MPI_Fint comm)
-{
-	return starpu_mpi_isend_prio(data_handle, req, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_irecv(data_handle, req, src, mpi_tag, MPI_Comm_f2c(comm));
-}
-
-/* send/recv */
-int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_send(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm)
-{
-	return starpu_mpi_send_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, MPI_Status *status)
-{
-	return starpu_mpi_recv(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), status);
-}
-
-/* isend/irecv detached */
-int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_isend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_isend_detached_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), callback, arg);
-}
-
-int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_irecv_detached(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-
-/* issend / issend detached */
-int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_issend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, int prio, MPI_Fint comm)
-{
-	return starpu_mpi_issend_prio(data_handle, req, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_issend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_issend_detached_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), callback, arg);
-}
-
-/* cache */
-void fstarpu_mpi_cache_flush(MPI_Fint comm, starpu_data_handle_t data_handle)
-{
-	return starpu_mpi_cache_flush(MPI_Comm_f2c(comm), data_handle);
-}
-
-void fstarpu_mpi_cache_flush_all_data(MPI_Fint comm)
-{
-	return starpu_mpi_cache_flush_all_data(MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_comm_size(MPI_Fint comm, int *size)
-{
-	return starpu_mpi_comm_size(MPI_Comm_f2c(comm), size);
-}
-
-int fstarpu_mpi_comm_rank(MPI_Fint comm, int *rank)
-{
-	return starpu_mpi_comm_rank(MPI_Comm_f2c(comm), rank);
-}
-
-MPI_Fint fstarpu_mpi_world_comm()
-{
-	return MPI_Comm_c2f(MPI_COMM_WORLD);
-}
-
-void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int tag, int rank, MPI_Fint comm)
-{
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_Comm_f2c(comm));
-}
-
-void fstarpu_mpi_data_register(starpu_data_handle_t handle, int tag, int rank)
-{
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_COMM_WORLD);
-}
-
-void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm)
-{
-	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_Comm_f2c(comm));
-}
-
-void fstarpu_mpi_data_set_rank(starpu_data_handle_t handle, int rank)
-{
-	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD);
-}
-
-void fstarpu_mpi_data_migrate(MPI_Fint comm, starpu_data_handle_t handle, int rank)
-{
-	return starpu_mpi_data_migrate(MPI_Comm_f2c(comm), handle, rank);
-}
-
-int fstarpu_mpi_wait_for_all(MPI_Fint comm)
-{
-	return starpu_mpi_wait_for_all(MPI_Comm_f2c(comm));
-}
-#endif

+ 0 - 157
nmad/src/starpu_mpi_fxt.h

@@ -1,157 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_FXT_H__
-#define __STARPU_MPI_FXT_H__
-
-#include <starpu.h>
-#include <common/config.h>
-#include <common/fxt.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#define _STARPU_MPI_FUT_START				0x5201
-#define _STARPU_MPI_FUT_STOP				0x5202
-#define _STARPU_MPI_FUT_BARRIER				0x5203
-#define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN		0x5204
-#define _STARPU_MPI_FUT_ISEND_SUBMIT_END		0x5205
-#define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN		0x5206
-#define _STARPU_MPI_FUT_IRECV_SUBMIT_END		0x5207
-#define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN		0x5208
-#define _STARPU_MPI_FUT_ISEND_COMPLETE_END		0x5209
-#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN		0x5210
-#define _STARPU_MPI_FUT_IRECV_COMPLETE_END		0x5211
-#define _STARPU_MPI_FUT_SLEEP_BEGIN			0x5212
-#define _STARPU_MPI_FUT_SLEEP_END			0x5213
-#define _STARPU_MPI_FUT_DTESTING_BEGIN			0x5214
-#define _STARPU_MPI_FUT_DTESTING_END			0x5215
-#define _STARPU_MPI_FUT_UTESTING_BEGIN			0x5216
-#define _STARPU_MPI_FUT_UTESTING_END			0x5217
-#define _STARPU_MPI_FUT_UWAIT_BEGIN			0x5218
-#define _STARPU_MPI_FUT_UWAIT_END			0x5219
-#define _STARPU_MPI_FUT_DATA_SET_RANK			0x521a
-#define _STARPU_MPI_FUT_IRECV_TERMINATED		0x521b
-#define _STARPU_MPI_FUT_ISEND_TERMINATED		0x521c
-#define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN		0x521d
-#define _STARPU_MPI_FUT_TESTING_DETACHED_END		0x521e
-#define _STARPU_MPI_FUT_TEST_BEGIN			0x521f
-#define _STARPU_MPI_FUT_TEST_END			0x5220
-
-#ifdef STARPU_USE_FXT
-#define _STARPU_MPI_TRACE_START(rank, worldsize)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
-#define _STARPU_MPI_TRACE_STOP(rank, worldsize)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
-#define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(dest, mpi_tag, size, jobid)	\
-	FUT_DO_PROBE5(_STARPU_MPI_FUT_ISEND_SUBMIT_END, (dest), (mpi_tag), (size), (jobid), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, mpi_tag)		\
-	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (mpi_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (mpi_tag), 0); }
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, mpi_tag)		\
-	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (mpi_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (mpi_tag), 0); }
-#define _STARPU_MPI_TRACE_TERMINATED(req, rank, mpi_tag)		\
-	if ((req)->request_type == RECV_REQ) FUT_DO_PROBE4(_STARPU_MPI_FUT_IRECV_TERMINATED, (rank), (mpi_tag), (req)->post_sync_jobid, _starpu_gettid()); else \
-	if ((req)->request_type == SEND_REQ) FUT_DO_PROBE3(_STARPU_MPI_FUT_ISEND_TERMINATED, (rank), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_SLEEP_BEGIN()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid());
-#define _STARPU_MPI_TRACE_SLEEP_END()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_SLEEP_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_DTESTING_BEGIN()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_DTESTING_BEGIN,  _starpu_gettid());
-#define _STARPU_MPI_TRACE_DTESTING_END()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_DTESTING_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_BEGIN, (src), (mpi_tag),  _starpu_gettid());
-#define _STARPU_MPI_TRACE_UTESTING_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_BEGIN, (src), (mpi_tag),  _starpu_gettid());
-#define _STARPU_MPI_TRACE_UWAIT_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
-#if 0
-/* This is very expensive in the trace, only enable for debugging */
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid());
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_END()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (mpi_tag), _starpu_gettid());
-#else
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)		do {} while(0)
-#endif
-#define TRACE
-#else
-#define _STARPU_MPI_TRACE_START(a, b)				do {} while(0);
-#define _STARPU_MPI_TRACE_STOP(a, b)				do {} while(0);
-#define _STARPU_MPI_TRACE_BARRIER(a, b, c)			do {} while(0);
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(a, b, c)		do {} while(0);
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(a, b, c, d)		do {} while(0);
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(a, b)		do {} while(0);
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(a, b)		do {} while(0);
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(a, b, c)		do {} while(0);
-#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(a, b, c)		do {} while(0);
-#define _STARPU_MPI_TRACE_COMPLETE_END(a, b, c)			do {} while(0);
-#define _STARPU_MPI_TRACE_TERMINATED(a, b, c)			do {} while(0);
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(a, b, c)		do {} while(0);
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(a, b)		do {} while(0);
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(a, b)		do {} while(0);
-#define _STARPU_MPI_TRACE_SLEEP_BEGIN()				do {} while(0);
-#define _STARPU_MPI_TRACE_SLEEP_END()				do {} while(0);
-#define _STARPU_MPI_TRACE_DTESTING_BEGIN()			do {} while(0);
-#define _STARPU_MPI_TRACE_DTESTING_END()			do {} while(0);
-#define _STARPU_MPI_TRACE_UTESTING_BEGIN(a, b)			do {} while(0);
-#define _STARPU_MPI_TRACE_UTESTING_END(a, b)			do {} while(0);
-#define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b)			do {} while(0);
-#define _STARPU_MPI_TRACE_UWAIT_END(a, b)			do {} while(0);
-#define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b)			do {} while(0);
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
-#define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)		do {} while(0)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif // __STARPU_MPI_FXT_H__

+ 0 - 120
nmad/src/starpu_mpi_helper.c

@@ -1,120 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2015, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2014, 2016  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <starpu_mpi_private.h>
-
-static void starpu_mpi_unlock_tag_callback(void *arg)
-{
-	starpu_tag_t *tagptr = arg;
-
-	starpu_tag_notify_from_apps(*tagptr);
-
-	free(tagptr);
-}
-
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
-{
-	starpu_tag_t *tagptr;
-	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
-	*tagptr = tag;
-
-	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, prio, comm, starpu_mpi_unlock_tag_callback, tagptr);
-}
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, starpu_tag_t tag)
-{
-	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dest, data_tag, 0, comm, tag);
-}
-
-
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, starpu_tag_t tag)
-{
-	starpu_tag_t *tagptr;
-	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
-	*tagptr = tag;
-
-	return starpu_mpi_irecv_detached(data_handle, source, data_tag, comm, starpu_mpi_unlock_tag_callback, tagptr);
-}
-
-struct arg_array
-{
-	int array_size;
-	starpu_tag_t tag;
-};
-
-static void starpu_mpi_array_unlock_callback(void *_arg)
-{
-	struct arg_array *arg = _arg;
-
-	int remaining = STARPU_ATOMIC_ADD(&arg->array_size, -1);
-
-	if (remaining == 0)
-	{
-		starpu_tag_notify_from_apps(arg->tag);
-		free(arg);
-	}
-}
-
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size,
-		starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio,
-		MPI_Comm *comm, starpu_tag_t tag)
-{
-	if (!array_size)
-		return 0;
-	struct arg_array *arg;
-	_STARPU_MPI_MALLOC(arg, sizeof(struct arg_array));
-
-	arg->array_size = array_size;
-	arg->tag = tag;
-
-	unsigned elem;
-	for (elem = 0; elem < array_size; elem++)
-	{
-		int p = 0;
-		if (prio)
-			p = prio[elem];
-		starpu_mpi_isend_detached_prio(data_handle[elem], dest[elem], data_tag[elem], p, comm[elem], starpu_mpi_array_unlock_callback, arg);
-	}
-
-	return 0;
-}
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size,
-		starpu_data_handle_t *data_handle, int *dest, int *data_tag,
-		MPI_Comm *comm, starpu_tag_t tag)
-{
-	return starpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handle, dest, data_tag, NULL, comm, tag);
-}
-
-
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag)
-{
-	if (!array_size)
-		return 0;
-	struct arg_array *arg;
-	_STARPU_MPI_MALLOC(arg, sizeof(struct arg_array));
-
-	arg->array_size = array_size;
-	arg->tag = tag;
-
-	unsigned elem;
-	for (elem = 0; elem < array_size; elem++)
-	{
-		starpu_mpi_irecv_detached(data_handle[elem], source[elem], data_tag[elem], comm[elem], starpu_mpi_array_unlock_callback, arg);
-	}
-
-	return 0;
-}

+ 0 - 237
nmad/src/starpu_mpi_init.c

@@ -1,237 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_datatype.h>
-#include <starpu_mpi_private.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_profiling.h>
-#include <starpu_mpi_stats.h>
-#include <starpu_mpi_cache.h>
-#include <starpu_mpi_select_node.h>
-#include <common/config.h>
-#include <common/thread.h>
-#include <datawizard/interfaces/data_interface.h>
-#include <datawizard/coherency.h>
-#include <core/simgrid.h>
-#include <core/task.h>
-
-#if defined(STARPU_USE_MPI_MPI)
-#include <mpi/starpu_mpi_comm.h>
-#include <mpi/starpu_mpi_tag.h>
-#endif
-
-#ifdef STARPU_SIMGRID
-static int _mpi_world_size;
-static int _mpi_world_rank;
-#endif
-
-static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
-{
-	switch (thread_level)
-	{
-		case MPI_THREAD_SERIALIZED:
-		{
-			_STARPU_DEBUG("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
-			break;
-		}
-		case MPI_THREAD_FUNNELED:
-		{
-			_STARPU_DISP("MPI%s MPI_THREAD_FUNNELED; The application can safely make calls to StarPU-MPI functions, but should not call directly MPI communication functions.\n", msg);
-			break;
-		}
-		case MPI_THREAD_SINGLE:
-		{
-			_STARPU_DISP("MPI%s MPI_THREAD_SINGLE; MPI does not have multi-thread support, this might cause problems. The application can make calls to StarPU-MPI functions, but not call directly MPI Communication functions.\n", msg);
-			break;
-		}
-	}
-}
-
-void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv)
-{
-	if (argc_argv->initialize_mpi)
-	{
-		int thread_support;
-		_STARPU_DEBUG("Calling MPI_Init_thread\n");
-		if (MPI_Init_thread(argc_argv->argc, argc_argv->argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
-		{
-			_STARPU_ERROR("MPI_Init_thread failed\n");
-		}
-		_starpu_mpi_print_thread_level_support(thread_support, "_Init_thread level =");
-	}
-	else
-	{
-		int provided;
-		MPI_Query_thread(&provided);
-		_starpu_mpi_print_thread_level_support(provided, " has been initialized with");
-	}
-
-	MPI_Comm_rank(argc_argv->comm, &argc_argv->rank);
-	MPI_Comm_size(argc_argv->comm, &argc_argv->world_size);
-	MPI_Comm_set_errhandler(argc_argv->comm, MPI_ERRORS_RETURN);
-
-#ifdef STARPU_SIMGRID
-	_mpi_world_size = argc_argv->world_size;
-	_mpi_world_rank = argc_argv->rank;
-#endif
-}
-
-static
-int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
-{
-	struct _starpu_mpi_argc_argv *argc_argv;
-	_STARPU_MALLOC(argc_argv, sizeof(struct _starpu_mpi_argc_argv));
-	argc_argv->initialize_mpi = initialize_mpi;
-	argc_argv->argc = argc;
-	argc_argv->argv = argv;
-	argc_argv->comm = comm;
-
-#ifdef STARPU_SIMGRID
-	/* Call MPI_Init_thread as early as possible, to initialize simgrid
-	 * before working with mutexes etc. */
-	_starpu_mpi_do_initialize(argc_argv);
-#endif
-
-	return _starpu_mpi_progress_init(argc_argv);
-}
-
-#ifdef STARPU_SIMGRID
-/* This is called before application's main, to initialize SMPI before we can
- * create MSG processes to run application's main */
-int _starpu_mpi_simgrid_init(int argc, char *argv[])
-{
-	return _starpu_mpi_initialize(&argc, &argv, 1, MPI_COMM_WORLD);
-}
-#endif
-
-int starpu_mpi_init_comm(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv STARPU_ATTRIBUTE_UNUSED, int initialize_mpi STARPU_ATTRIBUTE_UNUSED, MPI_Comm comm STARPU_ATTRIBUTE_UNUSED)
-{
-#ifdef STARPU_SIMGRID
-	_starpu_mpi_wait_for_initialization();
-	return 0;
-#else
-	return _starpu_mpi_initialize(argc, argv, initialize_mpi, comm);
-#endif
-}
-
-int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
-{
-	return starpu_mpi_init_comm(argc, argv, initialize_mpi, MPI_COMM_WORLD);
-}
-
-int starpu_mpi_initialize(void)
-{
-#ifdef STARPU_SIMGRID
-	return 0;
-#else
-	return _starpu_mpi_initialize(NULL, NULL, 0, MPI_COMM_WORLD);
-#endif
-}
-
-int starpu_mpi_initialize_extended(int *rank, int *world_size)
-{
-#ifdef STARPU_SIMGRID
-	*world_size = _mpi_world_size;
-	*rank = _mpi_world_rank;
-	return 0;
-#else
-	int ret;
-
-	ret = _starpu_mpi_initialize(NULL, NULL, 1, MPI_COMM_WORLD);
-	if (ret == 0)
-	{
-		_STARPU_DEBUG("Calling MPI_Comm_rank\n");
-		MPI_Comm_rank(MPI_COMM_WORLD, rank);
-		MPI_Comm_size(MPI_COMM_WORLD, world_size);
-	}
-	return ret;
-#endif
-}
-
-int starpu_mpi_shutdown(void)
-{
-	int value;
-	int rank, world_size;
-
-	/* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
-
-	/* kill the progression thread */
-	_starpu_mpi_progress_shutdown(&value);
-
-	_STARPU_MPI_TRACE_STOP(rank, world_size);
-
-	_starpu_mpi_comm_amounts_display(stderr, rank);
-	_starpu_mpi_comm_amounts_shutdown();
-	_starpu_mpi_cache_shutdown(world_size);
-#if defined(STARPU_USE_MPI_MPI)
-	_starpu_mpi_tag_shutdown();
-	_starpu_mpi_comm_shutdown();
-#endif
-
-	return 0;
-}
-
-int starpu_mpi_comm_size(MPI_Comm comm, int *size)
-{
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		*size = _starpu_mpi_fake_world_size;
-		return 0;
-	}
-#ifdef STARPU_SIMGRID
-	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
-	*size = _mpi_world_size;
-	return 0;
-#else
-	return MPI_Comm_size(comm, size);
-#endif
-}
-
-int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
-{
-	if (_starpu_mpi_fake_world_rank != -1)
-	{
-		*rank = _starpu_mpi_fake_world_rank;
-		return 0;
-	}
-#ifdef STARPU_SIMGRID
-	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
-	*rank = _mpi_world_rank;
-	return 0;
-#else
-	return MPI_Comm_rank(comm, rank);
-#endif
-}
-
-int starpu_mpi_world_size(void)
-{
-	int size;
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
-	return size;
-}
-
-int starpu_mpi_world_rank(void)
-{
-	int rank;
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	return rank;
-}

+ 0 - 35
nmad/src/starpu_mpi_init.h

@@ -1,35 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_INIT_H__
-#define __STARPU_MPI_INIT_H__
-
-#include <starpu.h>
-#include <starpu_mpi.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_INIT_H__

+ 0 - 0
nmad/src/starpu_mpi_private.c


部分文件因文件數量過多而無法顯示