浏览代码

deleted mpi again

Andra Hugo 12 年之前
父节点
当前提交
75124c59d8
共有 71 个文件被更改,包括 0 次插入9388 次删除
  1. 0 1
      mpi/.gitignore
  2. 0 29
      mpi/Makefile.am
  3. 0 206
      mpi/examples/Makefile.am
  4. 0 251
      mpi/examples/cholesky/mpi_cholesky.c
  5. 0 106
      mpi/examples/cholesky/mpi_cholesky.h
  6. 0 174
      mpi/examples/cholesky/mpi_cholesky_codelets.c
  7. 0 27
      mpi/examples/cholesky/mpi_cholesky_codelets.h
  8. 0 117
      mpi/examples/cholesky/mpi_cholesky_distributed.c
  9. 0 236
      mpi/examples/cholesky/mpi_cholesky_kernels.c
  10. 0 40
      mpi/examples/cholesky/mpi_cholesky_models.c
  11. 0 27
      mpi/examples/cholesky/mpi_cholesky_models.h
  12. 0 75
      mpi/examples/complex/mpi_complex.c
  13. 0 42
      mpi/examples/mpi_lu/mpi_lu-double.h
  14. 0 42
      mpi/examples/mpi_lu/mpi_lu-float.h
  15. 0 19
      mpi/examples/mpi_lu/pdlu.c
  16. 0 19
      mpi/examples/mpi_lu/pdlu_kernels.c
  17. 0 577
      mpi/examples/mpi_lu/plu_example.c
  18. 0 19
      mpi/examples/mpi_lu/plu_example_double.c
  19. 0 19
      mpi/examples/mpi_lu/plu_example_float.c
  20. 0 394
      mpi/examples/mpi_lu/plu_solve.c
  21. 0 19
      mpi/examples/mpi_lu/plu_solve_double.c
  22. 0 19
      mpi/examples/mpi_lu/plu_solve_float.c
  23. 0 19
      mpi/examples/mpi_lu/pslu.c
  24. 0 19
      mpi/examples/mpi_lu/pslu_kernels.c
  25. 0 870
      mpi/examples/mpi_lu/pxlu.c
  26. 0 65
      mpi/examples/mpi_lu/pxlu.h
  27. 0 444
      mpi/examples/mpi_lu/pxlu_kernels.c
  28. 0 32
      mpi/examples/mpi_lu/pxlu_kernels.h
  29. 0 19
      mpi/examples/mpi_lu/slu_kernels.c
  30. 0 106
      mpi/examples/perf.sh
  31. 0 156
      mpi/examples/reduction/mpi_reduction.c
  32. 0 66
      mpi/examples/reduction/mpi_reduction_kernels.c
  33. 0 228
      mpi/examples/scatter_gather/mpi_scatter_gather.c
  34. 0 159
      mpi/examples/stencil/stencil5.c
  35. 0 70
      mpi/include/starpu_mpi.h
  36. 0 29
      mpi/libstarpumpi.pc.in
  37. 0 51
      mpi/src/Makefile.am
  38. 0 867
      mpi/src/starpu_mpi.c
  39. 0 78
      mpi/src/starpu_mpi_collective.c
  40. 0 149
      mpi/src/starpu_mpi_datatype.c
  41. 0 33
      mpi/src/starpu_mpi_datatype.h
  42. 0 45
      mpi/src/starpu_mpi_fxt.h
  43. 0 104
      mpi/src/starpu_mpi_helper.c
  44. 0 632
      mpi/src/starpu_mpi_insert_task.c
  45. 0 99
      mpi/src/starpu_mpi_private.h
  46. 0 88
      mpi/src/starpu_mpi_stats.c
  47. 0 24
      mpi/src/starpu_mpi_stats.h
  48. 0 29
      mpi/starpumpi-1.0.pc.in
  49. 0 1
      mpi/tests/.gitignore
  50. 0 153
      mpi/tests/Makefile.am
  51. 0 148
      mpi/tests/block_interface.c
  52. 0 151
      mpi/tests/block_interface_pinned.c
  53. 0 22
      mpi/tests/helper.h
  54. 0 143
      mpi/tests/insert_task.c
  55. 0 165
      mpi/tests/insert_task_block.c
  56. 0 152
      mpi/tests/insert_task_cache.c
  57. 0 180
      mpi/tests/insert_task_owner.c
  58. 0 120
      mpi/tests/insert_task_owner2.c
  59. 0 99
      mpi/tests/insert_task_owner_data.c
  60. 0 80
      mpi/tests/mpi_detached_tag.c
  61. 0 79
      mpi/tests/mpi_irecv.c
  62. 0 97
      mpi/tests/mpi_irecv_detached.c
  63. 0 80
      mpi/tests/mpi_isend.c
  64. 0 98
      mpi/tests/mpi_isend_detached.c
  65. 0 86
      mpi/tests/mpi_test.c
  66. 0 92
      mpi/tests/multiple_send.c
  67. 0 76
      mpi/tests/pingpong.c
  68. 0 129
      mpi/tests/ring.c
  69. 0 133
      mpi/tests/ring_async.c
  70. 0 133
      mpi/tests/ring_async_implicit.c
  71. 0 32
      mpi/tests/ring_kernel.cu

+ 0 - 1
mpi/.gitignore

@@ -1 +0,0 @@
-/.deps

+ 0 - 29
mpi/Makefile.am

@@ -1,29 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-SUBDIRS=src tests examples
-
-pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc
-
-versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
-versinclude_HEADERS = 					\
-	include/starpu_mpi.h
-
-showcheck:
-	for i in $(SUBDIRS) ; do \
-		make -C $$i showcheck ; \
-	done

+ 0 - 206
mpi/examples/Makefile.am

@@ -1,206 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-CC=$(MPICC)
-CCLD=$(MPICC)
-
-if STARPU_MPI_CHECK
-if STARPU_HAVE_AM111
-LOG_COMPILER	 	=	$(MPIEXEC) -np 2
-else
-TESTS_ENVIRONMENT 	=	$(MPIEXEC) -np 2
-endif
-TESTS			=	$(check_PROGRAMS)
-endif
-
-check_PROGRAMS =
-
-BUILT_SOURCES =
-
-CLEANFILES = *.gcno *.gcda *.linkinfo
-
-EXTRA_DIST = 					\
-	mpi_lu/mpi_lu-float.h		\
-	mpi_lu/mpi_lu-double.h		\
-	mpi_lu/plu_example.c		\
-	mpi_lu/plu_solve.c		\
-	mpi_lu/pxlu.h			\
-	mpi_lu/pxlu.c			\
-	mpi_lu/pxlu_kernels.h		\
-	mpi_lu/pxlu_kernels.c		\
-	cholesky/mpi_cholesky.h	\
-	cholesky/mpi_cholesky_models.h \
-	cholesky/mpi_cholesky_codelets.h \
-	../tests/helper.h
-
-examplebindir = $(libdir)/starpu/mpi
-
-examplebin_PROGRAMS =
-
-if STARPU_USE_CUDA
-# TODO define NVCCFLAGS
-NVCC ?= nvcc
-
-NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include
-
-.cu.cubin:
-	$(MKDIR_P) `dirname $@`
-	$(NVCC) -cubin $< -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS)
-
-.cu.o:
-	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/  -I$(top_builddir)/include/
-endif
-
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
-
-###################
-# Stencil example #
-###################
-if BUILD_EXAMPLES
-examplebin_PROGRAMS +=				\
-	stencil/stencil5
-
-stencil_stencil5_LDADD =		\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-check_PROGRAMS	+=	\
-	stencil/stencil5
-
-##################
-# MPI LU example #
-##################
-
-if !NO_BLAS_LIB
-
-examplebin_PROGRAMS += 			\
-	mpi_lu/plu_example_float	\
-	mpi_lu/plu_example_double
-
-mpi_lu_plu_example_float_LDADD =	\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS)
-
-mpi_lu_plu_example_float_SOURCES =	\
-	mpi_lu/plu_example_float.c	\
-	mpi_lu/plu_solve_float.c	\
-	mpi_lu/pslu_kernels.c		\
-	mpi_lu/pslu.c			\
-	$(top_srcdir)/examples/common/blas.c
-
-mpi_lu_plu_example_double_LDADD =	\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
-	$(STARPU_LIBNUMA_LDFLAGS)				\
-	$(STARPU_BLAS_LDFLAGS)
-
-mpi_lu_plu_example_double_SOURCES =	\
-	mpi_lu/plu_example_double.c	\
-	mpi_lu/plu_solve_double.c  	\
-	mpi_lu/pdlu_kernels.c	    	\
-	mpi_lu/pdlu.c		    	\
-	$(top_srcdir)/examples/common/blas.c
-endif
-
-########################
-# MPI Cholesky example #
-########################
-
-if !NO_BLAS_LIB
-examplebin_PROGRAMS +=		\
-	cholesky/mpi_cholesky			\
-	cholesky/mpi_cholesky_distributed
-
-cholesky_mpi_cholesky_SOURCES	=		\
-	cholesky/mpi_cholesky.c		\
-	cholesky/mpi_cholesky_models.c		\
-	cholesky/mpi_cholesky_kernels.c	\
-	cholesky/mpi_cholesky_codelets.c	\
-	$(top_srcdir)/examples/common/blas.c
-
-cholesky_mpi_cholesky_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
-	$(STARPU_BLAS_LDFLAGS)
-
-cholesky_mpi_cholesky_distributed_SOURCES =	\
-	cholesky/mpi_cholesky_distributed.c	\
-	cholesky/mpi_cholesky_models.c		\
-	cholesky/mpi_cholesky_kernels.c	\
-	cholesky/mpi_cholesky_codelets.c	\
-	$(top_srcdir)/examples/common/blas.c
-
-cholesky_mpi_cholesky_distributed_LDADD =	\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
-	$(STARPU_BLAS_LDFLAGS)
-
-check_PROGRAMS +=					\
-	cholesky/mpi_cholesky			\
-	cholesky/mpi_cholesky_distributed
-endif
-
-########################
-# Scatter Gather       #
-########################
-
-examplebin_PROGRAMS +=		\
-	scatter_gather/mpi_scatter_gather
-
-scatter_gather_mpi_scatter_gather_LDADD =	\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-check_PROGRAMS +=		\
-	scatter_gather/mpi_scatter_gather
-
-###################
-# Reduction       #
-###################
-
-examplebin_PROGRAMS +=		\
-	reduction/mpi_reduction
-
-reduction_mpi_reduction_SOURCES =		\
-	reduction/mpi_reduction.c		\
-	reduction/mpi_reduction_kernels.c
-
-reduction_mpi_reduction_LDADD =	\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-check_PROGRAMS +=		\
-	reduction/mpi_reduction
-
-###################
-# complex example #
-###################
-
-examplebin_PROGRAMS +=				\
-	complex/mpi_complex
-
-complex_mpi_complex_SOURCES =		\
-	complex/mpi_complex.c		\
-	$(top_srcdir)/examples/interface/complex_interface.c
-
-complex_mpi_complex_LDADD =		\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-check_PROGRAMS	+=	\
-	complex/mpi_complex
-endif
-
-
-showcheck:
-	-cat $(TEST_LOGS) /dev/null

+ 0 - 251
mpi/examples/cholesky/mpi_cholesky.c

@@ -1,251 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "mpi_cholesky.h"
-#include "mpi_cholesky_models.h"
-#include "mpi_cholesky_codelets.h"
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	//return (x+y) % nb_nodes;
-	return (x%dblockx)+(y%dblocky)*dblockx;
-}
-
-int main(int argc, char **argv)
-{
-	/* create a simple definite positive symetric matrix example
-	 *
-	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
-	 * */
-
-	float ***bmat;
-	int rank, nodes, ret;
-
-	parse_args(argc, argv);
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	starpu_mpi_initialize_extended(&rank, &nodes);
-	starpu_helper_cublas_init();
-
-	if (dblockx == -1 || dblocky == -1)
-	{
-	     int factor;
-	     dblockx = nodes;
-	     dblocky = 1;
-	     for(factor=sqrt(nodes) ; factor>1 ; factor--)
-	     {
-		  if (nodes % factor == 0)
-		  {
-		       dblockx = nodes/factor;
-		       dblocky = factor;
-		       break;
-		  }
-	     }
-	}
-
-	unsigned i,j,x,y;
-	bmat = malloc(nblocks * sizeof(float *));
-	for(x=0 ; x<nblocks ; x++)
-	{
-		bmat[x] = malloc(nblocks * sizeof(float *));
-		for(y=0 ; y<nblocks ; y++)
-		{
-			starpu_malloc((void **)&bmat[x][y], BLOCKSIZE*BLOCKSIZE*sizeof(float));
-			for (i = 0; i < BLOCKSIZE; i++)
-			{
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					bmat[x][y][j +i*BLOCKSIZE] = (1.0f/(1.0f+(i+(x*BLOCKSIZE)+j+(y*BLOCKSIZE)))) + ((i+(x*BLOCKSIZE) == j+(y*BLOCKSIZE))?1.0f*size:0.0f);
-					//mat[j +i*size] = ((i == j)?1.0f*size:0.0f);
-				}
-			}
-		}
-	}
-
-
-	if (display)
-	{
-		printf("[%d] Input :\n", rank);
-
-		for(y=0 ; y<nblocks ; y++)
-		{
-			for(x=0 ; x<nblocks ; x++)
-			{
-				printf("Block %u,%u :\n", x, y);
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					for (i = 0; i < BLOCKSIZE; i++)
-					{
-						if (i <= j)
-						{
-							printf("%2.2f\t", bmat[y][x][j +i*BLOCKSIZE]);
-						}
-						else
-						{
-							printf(".\t");
-						}
-					}
-					printf("\n");
-				}
-			}
-		}
-	}
-
-	double timing, flops;
-	dw_cholesky(bmat, size, size/nblocks, nblocks, rank, nodes, &timing, &flops);
-
-	starpu_mpi_shutdown();
-
-	if (display)
-	{
-		printf("[%d] Results :\n", rank);
-		for(y=0 ; y<nblocks ; y++)
-		{
-			for(x=0 ; x<nblocks ; x++)
-			{
-				printf("Block %u,%u :\n", x, y);
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					for (i = 0; i < BLOCKSIZE; i++)
-					{
-						if (i <= j)
-						{
-							printf("%2.2f\t", bmat[y][x][j +i*BLOCKSIZE]);
-						}
-						else
-						{
-							printf(".\t");
-						}
-					}
-					printf("\n");
-				}
-			}
-		}
-	}
-
-	float *rmat = malloc(size*size*sizeof(float));
-	for(x=0 ; x<nblocks ; x++)
-	{
-		for(y=0 ; y<nblocks ; y++)
-		{
-			for (i = 0; i < BLOCKSIZE; i++)
-			{
-				for (j = 0; j < BLOCKSIZE; j++)
-				{
-					rmat[j+(y*BLOCKSIZE)+(i+(x*BLOCKSIZE))*size] = bmat[x][y][j +i*BLOCKSIZE];
-				}
-			}
-		}
-	}
-
-	fprintf(stderr, "[%d] compute explicit LLt ...\n", rank);
-	for (j = 0; j < size; j++)
-	{
-		for (i = 0; i < size; i++)
-		{
-			if (i > j)
-			{
-				rmat[j+i*size] = 0.0f; // debug
-			}
-		}
-	}
-	float *test_mat = malloc(size*size*sizeof(float));
-	STARPU_ASSERT(test_mat);
-
-	SSYRK("L", "N", size, size, 1.0f,
-			rmat, size, 0.0f, test_mat, size);
-
-	fprintf(stderr, "[%d] comparing results ...\n", rank);
-	if (display)
-	{
-		for (j = 0; j < size; j++)
-		{
-			for (i = 0; i < size; i++)
-			{
-				if (i <= j)
-				{
-					printf("%2.2f\t", test_mat[j +i*size]);
-				}
-				else
-				{
-					printf(".\t");
-				}
-			}
-			printf("\n");
-		}
-	}
-
-	int correctness = 1;
-	for(x = 0; x < nblocks ;  x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				for (i = (size/nblocks)*x ; i < (size/nblocks)*x+(size/nblocks); i++)
-				{
-					for (j = (size/nblocks)*y ; j < (size/nblocks)*y+(size/nblocks); j++)
-					{
-						if (i <= j)
-						{
-							float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
-							float err = abs(test_mat[j +i*size] - orig);
-							if (err > 0.00001)
-							{
-								fprintf(stderr, "[%d] Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", rank, i, j, test_mat[j +i*size], orig, err);
-								correctness = 0;
-								flops = 0;
-								break;
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	for(x=0 ; x<nblocks ; x++)
-	{
-		for(y=0 ; y<nblocks ; y++)
-		{
-			starpu_free((void *)bmat[x][y]);
-		}
-		free(bmat[x]);
-	}
-	free(bmat);
-	free(rmat);
-	free(test_mat);
-
-	starpu_helper_cublas_shutdown();
-	starpu_shutdown();
-
-	assert(correctness);
-
-	if (rank == 0)
-	{
-		fprintf(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
-		fprintf(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f));
-	}
-
-	return 0;
-}

+ 0 - 106
mpi/examples/cholesky/mpi_cholesky.h

@@ -1,106 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_H__
-#define __MPI_CHOLESKY_H__
-
-#include <string.h>
-#include <math.h>
-#include <sys/time.h>
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <cublas.h>
-#endif
-
-#include <common/blas.h>
-#include <starpu.h>
-
-#define BLOCKSIZE	(size/nblocks)
-
-static unsigned size = 4*1024;
-static unsigned nblocks = 16;
-static unsigned nbigblocks = 2;
-static unsigned noprio = 0;
-static unsigned display = 0;
-static unsigned dblockx = -1;
-static unsigned dblocky = -1;
-
-void chol_cpu_codelet_update_u11(void **, void *);
-void chol_cpu_codelet_update_u21(void **, void *);
-void chol_cpu_codelet_update_u22(void **, void *);
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u11(void *descr[], void *_args);
-void chol_cublas_codelet_update_u21(void *descr[], void *_args);
-void chol_cublas_codelet_update_u22(void *descr[], void *_args);
-#endif
-
-static void __attribute__((unused)) parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-size") == 0)
-		{
-		        char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-dblockx") == 0)
-		{
-		        char *argptr;
-			dblockx = strtol(argv[++i], &argptr, 10);
-		}
-		
-		if (strcmp(argv[i], "-dblocky") == 0)
-		{
-		        char *argptr;
-			dblocky = strtol(argv[++i], &argptr, 10);
-		}
-	
-		if (strcmp(argv[i], "-nblocks") == 0)
-		{
-		        char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-nbigblocks") == 0)
-		{
-		        char *argptr;
-			nbigblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-no-prio") == 0)
-		{
-			noprio = 1;
-		}
-
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-
-		if (strcmp(argv[i], "-h") == 0)
-		{
-			printf("usage : %s [-display] [-size size] [-nblocks nblocks]\n", argv[0]);
-		}
-	}
-	if (nblocks > size) nblocks = size;
-}
-
-#endif // __MPI_CHOLESKY_H__

+ 0 - 174
mpi/examples/cholesky/mpi_cholesky_codelets.c

@@ -1,174 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "mpi_cholesky.h"
-#include "mpi_cholesky_models.h"
-#include "mpi_cholesky_codelets.h"
-
-/*
- *	Create the codelets
- */
-
-static struct starpu_codelet cl11 =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {chol_cpu_codelet_update_u11, NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u11, NULL},
-#endif
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-	.model = &chol_model_11
-};
-
-static struct starpu_codelet cl21 =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {chol_cpu_codelet_update_u21, NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u21, NULL},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &chol_model_21
-};
-
-static struct starpu_codelet cl22 =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {chol_cpu_codelet_update_u22, NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {chol_cublas_codelet_update_u22, NULL},
-#endif
-	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW},
-	.model = &chol_model_22
-};
-
-extern int my_distrib(int x, int y, int nb_nodes);
-
-/*
- *	code to bootstrap the factorization
- *	and construct the DAG
- */
-void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, int rank, int nodes, double *timing, double *flops)
-{
-	struct timeval start;
-	struct timeval end;
-	starpu_data_handle_t **data_handles;
-	int x, y;
-
-	/* create all the DAG nodes */
-	unsigned i,j,k;
-
-	data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
-	for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle_t));
-
-	for(x = 0; x < nblocks ;  x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)matA[x][y],
-						ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-			/* TODO: make better test to only registering what is needed */
-			else
-			{
-				/* I don't own that index, but will need it for my computations */
-				//fprintf(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], -1, (uintptr_t)NULL,
-						ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-			if (data_handles[x][y])
-			{
-				starpu_data_set_rank(data_handles[x][y], mpi_rank);
-				starpu_data_set_tag(data_handles[x][y], (y*nblocks)+x);
-			}
-		}
-	}
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	gettimeofday(&start, NULL);
-
-	for (k = 0; k < nblocks; k++)
-	{
-		int prio = STARPU_DEFAULT_PRIO;
-		if (!noprio) prio = STARPU_MAX_PRIO;
-
-		starpu_mpi_insert_task(MPI_COMM_WORLD, &cl11,
-				STARPU_PRIORITY, prio,
-				STARPU_RW, data_handles[k][k],
-				0);
-
-		for (j = k+1; j<nblocks; j++)
-		{
-			prio = STARPU_DEFAULT_PRIO;
-			if (!noprio&& (j == k+1)) prio = STARPU_MAX_PRIO;
-			starpu_mpi_insert_task(MPI_COMM_WORLD, &cl21,
-					STARPU_PRIORITY, prio,
-					STARPU_R, data_handles[k][k],
-					STARPU_RW, data_handles[k][j],
-					0);
-
-			for (i = k+1; i<nblocks; i++)
-			{
-				if (i <= j)
-				{
-					prio = STARPU_DEFAULT_PRIO;
-					if (!noprio && (i == k + 1) && (j == k +1) ) prio = STARPU_MAX_PRIO;
-					starpu_mpi_insert_task(MPI_COMM_WORLD, &cl22,
-							STARPU_PRIORITY, prio,
-							STARPU_R, data_handles[k][i],
-							STARPU_R, data_handles[k][j],
-							STARPU_RW, data_handles[i][j],
-							0);
-				}
-			}
-		}
-	}
-
-	starpu_task_wait_for_all();
-
-	for(x = 0; x < nblocks ;  x++)
-	{
-		for (y = 0; y < nblocks; y++)
-		{
-			if (data_handles[x][y])
-				starpu_data_unregister(data_handles[x][y]);
-		}
-		free(data_handles[x]);
-	}
-	free(data_handles);
-
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	gettimeofday(&end, NULL);
-
-	if (rank == 0)
-	{
-		double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-		fprintf(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
-
-		double flop = (1.0f*size*size*size)/3.0f;
-		fprintf(stdout, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-	}
-}
-

+ 0 - 27
mpi/examples/cholesky/mpi_cholesky_codelets.h

@@ -1,27 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __MPI_CHOLESKY_CODELETS_H__
-#define __MPI_CHOLESKY_CODELETS_H__
-
-/*
- *	code to bootstrap the factorization
- *	and construct the DAG
- */
-void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, int rank, int nodes, double *timing, double *flops);
-
-#endif /* __MPI_CHOLESKY_CODELETS_H__ */

+ 0 - 117
mpi/examples/cholesky/mpi_cholesky_distributed.c

@@ -1,117 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "mpi_cholesky.h"
-#include "mpi_cholesky_models.h"
-#include "mpi_cholesky_codelets.h"
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	//return (x+y) % nb_nodes;
-	return (x%dblockx)+(y%dblocky)*dblockx;
-}
-
-int main(int argc, char **argv)
-{
-	/* create a simple definite positive symetric matrix example
-	 *
-	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
-	 * */
-
-	float ***bmat;
-	int rank, nodes, ret;
-
-	parse_args(argc, argv);
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	starpu_mpi_initialize_extended(&rank, &nodes);
-	starpu_helper_cublas_init();
-
-	if (dblockx == -1 || dblocky == -1)
-	{
-	     int factor;
-	     dblockx = nodes;
-	     dblocky = 1;
-	     for(factor=sqrt(nodes) ; factor>1 ; factor--)
-	     {
-		  if (nodes % factor == 0)
-		  {
-		       dblockx = nodes/factor;
-		       dblocky = factor;
-		       break;
-		  }
-	     }
-	}
-
-	unsigned i,j,x,y;
-	bmat = malloc(nblocks * sizeof(float *));
-	for(x=0 ; x<nblocks ; x++)
-	{
-		bmat[x] = malloc(nblocks * sizeof(float *));
-		for(y=0 ; y<nblocks ; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				starpu_malloc((void **)&bmat[x][y], BLOCKSIZE*BLOCKSIZE*sizeof(float));
-				for (i = 0; i < BLOCKSIZE; i++)
-				{
-					for (j = 0; j < BLOCKSIZE; j++)
-					{
-						bmat[x][y][j +i*BLOCKSIZE] = (1.0f/(1.0f+(i+(x*BLOCKSIZE)+j+(y*BLOCKSIZE)))) + ((i+(x*BLOCKSIZE) == j+(y*BLOCKSIZE))?1.0f*size:0.0f);
-						//mat[j +i*size] = ((i == j)?1.0f*size:0.0f);
-					}
-				}
-			}
-		}
-	}
-
-	double timing, flops;
-	dw_cholesky(bmat, size, size/nblocks, nblocks, rank, nodes, &timing, &flops);
-
-	starpu_mpi_shutdown();
-
-	if (rank == 0)
-	{
-		fprintf(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
-		fprintf(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f));
-	}
-
-
-	for(x=0 ; x<nblocks ; x++)
-	{
-		for(y=0 ; y<nblocks ; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (mpi_rank == rank)
-			{
-				starpu_free((void *)bmat[x][y]);
-			}
-		}
-		free(bmat[x]);
-	}
-	free(bmat);
-
-	starpu_helper_cublas_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 236
mpi/examples/cholesky/mpi_cholesky_kernels.c

@@ -1,236 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include "mpi_cholesky.h"
-#include "common/blas.h"
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <cublas.h>
-#ifdef STARPU_HAVE_MAGMA
-#include "magma.h"
-#include "magma_lapack.h"
-#endif
-#endif
-
-/*
- *   U22
- */
-
-static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
-{
-	//printf("22\n");
-	float *left 	= (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-	float *right 	= (float *)STARPU_MATRIX_GET_PTR(descr[1]);
-	float *center 	= (float *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-	unsigned dx = STARPU_MATRIX_GET_NY(descr[2]);
-	unsigned dy = STARPU_MATRIX_GET_NX(descr[2]);
-	unsigned dz = STARPU_MATRIX_GET_NY(descr[0]);
-
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]);
-	unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]);
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus st;
-#endif
-
-	switch (s)
-	{
-		case 0:
-			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
-				right, ld12, 1.0f, center, ld22);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			cublasSgemm('n', 't', dy, dx, dz,
-					-1.0f, left, ld21, right, ld12,
-					 1.0f, center, ld22);
-			st = cublasGetError();
-			if (STARPU_UNLIKELY(st != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(st);
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-void chol_cpu_codelet_update_u22(void *descr[], void *_args)
-{
-	chol_common_cpu_codelet_update_u22(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u22(void *descr[], void *_args)
-{
-	chol_common_cpu_codelet_update_u22(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-/*
- * U21
- */
-
-static inline void chol_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args)
-{
-//	printf("21\n");
-	float *sub11;
-	float *sub21;
-
-	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-	sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
-	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
-
-	switch (s)
-	{
-		case 0:
-			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			cublasStrsm('R', 'L', 'T', 'N', nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-void chol_cpu_codelet_update_u21(void *descr[], void *_args)
-{
-	 chol_common_codelet_update_u21(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u21(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u21(descr, 1, _args);
-}
-#endif
-
-/*
- *	U11
- */
-
-static inline void chol_common_codelet_update_u11(void *descr[], int s, __attribute__((unused)) void *_args)
-{
-//	printf("11\n");
-	float *sub11;
-
-	sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-	unsigned nx = STARPU_MATRIX_GET_NY(descr[0]);
-	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
-
-	unsigned z;
-
-	switch (s)
-	{
-		case 0:
-
-			/*
-			 *	- alpha 11 <- lambda 11 = sqrt(alpha11)
-			 *	- alpha 21 <- l 21	= alpha 21 / lambda 11
-			 *	- A22 <- A22 - l21 trans(l21)
-			 */
-
-			for (z = 0; z < nx; z++)
-			{
-				float lambda11;
-				lambda11 = sqrt(sub11[z+z*ld]);
-				sub11[z+z*ld] = lambda11;
-
-				STARPU_ASSERT(lambda11 != 0.0f);
-
-				SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
-
-				SSYR("L", nx - z - 1, -1.0f,
-							&sub11[(z+1)+z*ld], 1,
-							&sub11[(z+1)+(z+1)*ld], ld);
-			}
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			{
-				int ret;
-				int info;
-				ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
-				if (ret != MAGMA_SUCCESS)
-				{
-					fprintf(stderr, "Error in Magma: %d\n", ret);
-					STARPU_ABORT();
-				}
-				cudaError_t cures = cudaStreamSynchronize(starpu_cuda_get_local_stream());
-				STARPU_ASSERT(!cures);
-			}
-#else
-			for (z = 0; z < nx; z++)
-			{
-				float lambda11;
-				cudaMemcpyAsync(&lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
-				cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-				STARPU_ASSERT(lambda11 != 0.0f);
-
-				lambda11 = sqrt(lambda11);
-
-				cublasSetVector(1, sizeof(float), &lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float));
-
-				cublasSscal(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
-
-				cublasSsyr('U', nx - z - 1, -1.0f,
-							&sub11[(z+1)+z*ld], 1,
-							&sub11[(z+1)+(z+1)*ld], ld);
-			}
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-#endif
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-}
-
-
-void chol_cpu_codelet_update_u11(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u11(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-void chol_cublas_codelet_update_u11(void *descr[], void *_args)
-{
-	chol_common_codelet_update_u11(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA

+ 0 - 40
mpi/examples/cholesky/mpi_cholesky_models.c

@@ -1,40 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_cholesky_models.h"
-
-/*
- *	Number of flops of Gemm
- */
-
-struct starpu_perfmodel chol_model_11 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_11"
-};
-
-struct starpu_perfmodel chol_model_21 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_21"
-};
-
-struct starpu_perfmodel chol_model_22 =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "chol_model_22"
-};

+ 0 - 27
mpi/examples/cholesky/mpi_cholesky_models.h

@@ -1,27 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __DW_CHOLESKY_MODELS_H__
-#define __DW_CHOLESKY_MODELS_H__
-
-#include <starpu.h>
-
-extern struct starpu_perfmodel chol_model_11;
-extern struct starpu_perfmodel chol_model_21;
-extern struct starpu_perfmodel chol_model_22;
-
-#endif // __DW_CHOLESKY_MODELS_H__

+ 0 - 75
mpi/examples/complex/mpi_complex.c

@@ -1,75 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <interface/complex_interface.h>
-#include <interface/complex_codelet.h>
-
-int main(int argc, char **argv)
-{
-	int rank, nodes;
-	int ret;
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	starpu_mpi_initialize_extended(&rank, &nodes);
-
-	if (nodes < 2)
-	{
-		fprintf(stderr, "This program needs at least 2 nodes\n");
-		ret = 77;
-	}
-	else
-	{
-		if (rank == 0)
-		{
-			double real[2] = {4.0, 2.0};
-			double imaginary[2] = {7.0, 9.0};
-			starpu_data_handle_t handle;
-
-			double real2[2] = {14.0, 12.0};
-			double imaginary2[2] = {17.0, 19.0};
-			starpu_data_handle_t handle2;
-			MPI_Status status;
-
-			starpu_complex_data_register(&handle, 0, real, imaginary, 2);
-			starpu_insert_task(&cl_display, STARPU_R, handle, 0);
-			starpu_mpi_send(handle, 1, 10, MPI_COMM_WORLD);
-
-			starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);
-			starpu_mpi_recv(handle2, 1, 11, MPI_COMM_WORLD, &status);
-			starpu_insert_task(&cl_display, STARPU_R, handle2, 0);
-			starpu_insert_task(&cl_compare, STARPU_R, handle, STARPU_R, handle2, 0);
-		}
-		else if (rank == 1)
-		{
-			double real[2] = {0.0, 0.0};
-			double imaginary[2] = {0.0, 0.0};
-			starpu_data_handle_t handle;
-			MPI_Status status;
-
-			starpu_complex_data_register(&handle, 0, real, imaginary, 2);
-			starpu_mpi_recv(handle, 0, 10, MPI_COMM_WORLD, &status);
-			starpu_insert_task(&cl_display, STARPU_R, handle, 0);
-			starpu_mpi_send(handle, 0, 11, MPI_COMM_WORLD);
-		}
-	}
-	starpu_task_wait_for_all();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return ret;
-}

+ 0 - 42
mpi/examples/mpi_lu/mpi_lu-double.h

@@ -1,42 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#define TYPE double
-#define MPI_TYPE	MPI_DOUBLE
-
-#define STARPU_PLU(name)       starpu_pdlu_##name
-
-#define CUBLAS_GEMM	cublasDgemm
-#define CUBLAS_TRSM	cublasDtrsm
-#define CUBLAS_SCAL	cublasDscal
-#define CUBLAS_GER	cublasDger
-#define CUBLAS_SWAP	cublasDswap
-#define CUBLAS_IAMAX	cublasIdamax
-
-#define CPU_GEMM	DGEMM
-#define CPU_GEMV	DGEMV
-#define CPU_TRSM	DTRSM
-#define CPU_SCAL	DSCAL
-#define CPU_GER		DGER
-#define CPU_SWAP	DSWAP
-
-#define CPU_TRMM	DTRMM
-#define CPU_AXPY	DAXPY
-#define CPU_ASUM	DASUM
-#define CPU_IAMAX	IDAMAX
-
-#define PIVOT_THRESHHOLD	10e-10

+ 0 - 42
mpi/examples/mpi_lu/mpi_lu-float.h

@@ -1,42 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#define TYPE float
-#define MPI_TYPE	MPI_FLOAT
-
-#define STARPU_PLU(name)       starpu_pslu_##name
-
-#define CUBLAS_GEMM	cublasSgemm
-#define CUBLAS_TRSM	cublasStrsm
-#define CUBLAS_SCAL	cublasSscal
-#define CUBLAS_GER	cublasSger
-#define CUBLAS_SWAP	cublasSswap
-#define CUBLAS_IAMAX	cublasIsamax
-
-#define CPU_GEMM	SGEMM
-#define CPU_GEMV	SGEMV
-#define CPU_TRSM	STRSM
-#define CPU_SCAL	SSCAL
-#define CPU_GER		SGER
-#define CPU_SWAP	SSWAP
-
-#define CPU_TRMM	STRMM
-#define CPU_AXPY	SAXPY
-#define CPU_ASUM	SASUM
-#define CPU_IAMAX	ISAMAX
-
-#define PIVOT_THRESHHOLD	10e-5

+ 0 - 19
mpi/examples/mpi_lu/pdlu.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "pxlu.c"

+ 0 - 19
mpi/examples/mpi_lu/pdlu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "pxlu_kernels.c"

+ 0 - 577
mpi/examples/mpi_lu/plu_example.c

@@ -1,577 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <math.h>
-#include <starpu.h>
-
-#include "pxlu.h"
-//#include "pxlu_kernels.h"
-
-#ifdef STARPU_HAVE_LIBNUMA
-#include <numaif.h>
-#endif
-
-static unsigned long size = 16384;
-static unsigned nblocks = 16;
-static unsigned check = 0;
-static unsigned p = 1;
-static unsigned q = 1;
-static unsigned display = 0;
-
-#ifdef STARPU_HAVE_LIBNUMA
-static unsigned numa = 0;
-#endif
-
-static size_t allocated_memory = 0;
-static size_t allocated_memory_extra = 0;
-
-static starpu_data_handle_t *dataA_handles;
-static TYPE **dataA;
-
-/* In order to implement the distributed LU decomposition, we allocate
- * temporary buffers */
-#ifdef SINGLE_TMP11
-static starpu_data_handle_t tmp_11_block_handle;
-static TYPE *tmp_11_block;
-#else
-static starpu_data_handle_t *tmp_11_block_handles;
-static TYPE **tmp_11_block;
-#endif
-#ifdef SINGLE_TMP1221
-static starpu_data_handle_t *tmp_12_block_handles;
-static TYPE **tmp_12_block;
-static starpu_data_handle_t *tmp_21_block_handles;
-static TYPE **tmp_21_block;
-#else
-static starpu_data_handle_t *(tmp_12_block_handles[2]);
-static TYPE **(tmp_12_block[2]);
-static starpu_data_handle_t *(tmp_21_block_handles[2]);
-static TYPE **(tmp_21_block[2]);
-#endif
-
-int get_block_rank(unsigned i, unsigned j);
-
-static void parse_args(int rank, int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
-			char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-nblocks") == 0) {
-			char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-check") == 0) {
-			check = 1;
-		}
-
-		if (strcmp(argv[i], "-display") == 0) {
-			display = 1;
-		}
-
-		if (strcmp(argv[i], "-numa") == 0) {
-#ifdef STARPU_HAVE_LIBNUMA
-			numa = 1;
-#else
-			if (rank == 0)
-				fprintf(stderr, "Warning: libnuma is not available\n");
-#endif
-		}
-
-		if (strcmp(argv[i], "-p") == 0) {
-			char *argptr;
-			p = strtol(argv[++i], &argptr, 10);
-		}
-
-		if (strcmp(argv[i], "-q") == 0) {
-			char *argptr;
-			q = strtol(argv[++i], &argptr, 10);
-		}
-	}
-}
-
-unsigned STARPU_PLU(display_flag)(void)
-{
-	return display;
-}
-
-static void fill_block_with_random(TYPE *blockptr, unsigned size, unsigned nblocks)
-{
-	const unsigned block_size = (size/nblocks);
-
-	unsigned i, j;
-	for (i = 0; i < block_size; i++)
-	for (j = 0; j < block_size; j++)
-	{
-		blockptr[j+i*block_size] = (TYPE)starpu_drand48();
-	}
-}
-
-#ifdef SINGLE_TMP11
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void)
-{
-	return tmp_11_block_handle;
-}
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k)
-{
-	return tmp_11_block_handles[k];
-}
-#endif
-
-#ifdef SINGLE_TMP1221
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j)
-{
-	return tmp_12_block_handles[j];
-}
-
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i)
-{
-	return tmp_21_block_handles[i];
-}
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k)
-{
-	return tmp_12_block_handles[k%2][j];
-}
-
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k)
-{
-	return tmp_21_block_handles[k%2][i];
-}
-#endif
-
-static unsigned tmp_11_block_is_needed(int rank, unsigned nblocks, unsigned k)
-{
-	return 1;
-}
-
-static unsigned tmp_12_block_is_needed(int rank, unsigned nblocks, unsigned j)
-{
-	unsigned i;
-	for (i = 1; i < nblocks; i++)
-	{
-		if (get_block_rank(i, j) == rank)
-			return 1;
-	}
-
-	return 0;
-}
-
-static unsigned tmp_21_block_is_needed(int rank, unsigned nblocks, unsigned i)
-{
-	unsigned j;
-	for (j = 1; j < nblocks; j++)
-	{
-		if (get_block_rank(i, j) == rank)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void init_matrix(int rank)
-{
-#ifdef STARPU_HAVE_LIBNUMA
-	if (numa)
-	{
-		fprintf(stderr, "Using INTERLEAVE policy\n");
-		unsigned long nodemask = ((1<<0)|(1<<1));
-		int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3);
-		if (ret)
-			perror("set_mempolicy failed");
-	}
-#endif
-
-	/* Allocate a grid of data handles, not all of them have to be allocated later on */
-	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t));
-	dataA = calloc(nblocks*nblocks, sizeof(TYPE *));
-	allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-
-	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
-
-	/* Allocate all the blocks that belong to this mpi node */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			TYPE **blockptr = &dataA[j+i*nblocks];
-//			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
-
-			if (get_block_rank(i, j) == rank)
-			{
-				/* This blocks should be treated by the current MPI process */
-				/* Allocate and fill it */
-				starpu_malloc((void **)blockptr, blocksize);
-				allocated_memory += blocksize;
-
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				fill_block_with_random(*blockptr, size, nblocks);
-				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
-				if (i == j)
-				{
-					unsigned tmp;
-					for (tmp = 0; tmp < size/nblocks; tmp++)
-					{
-						(*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks;
-					}
-				}
-
-				/* Register it to StarPU */
-				starpu_matrix_data_register(handleptr, 0,
-					(uintptr_t)*blockptr, size/nblocks,
-					size/nblocks, size/nblocks, sizeof(TYPE));
-			}
-			else {
-				*blockptr = STARPU_POISON_PTR;
-				*handleptr = STARPU_POISON_PTR;
-			}
-		}
-	}
-
-	/* Allocate the temporary buffers required for the distributed algorithm */
-
-	unsigned k;
-
-	/* tmp buffer 11 */
-#ifdef SINGLE_TMP11
-	starpu_malloc((void **)&tmp_11_block, blocksize);
-	allocated_memory_extra += blocksize;
-	starpu_matrix_data_register(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
-			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-#else
-	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_11_block = calloc(nblocks, sizeof(TYPE *));
-	allocated_memory_extra += nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-
-	for (k = 0; k < nblocks; k++)
-	{
-		if (tmp_11_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_11_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_11_block[k]);
-
-			starpu_matrix_data_register(&tmp_11_block_handles[k], 0,
-				(uintptr_t)tmp_11_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-	}
-#endif
-
-	/* tmp buffers 12 and 21 */
-#ifdef SINGLE_TMP1221
-	tmp_12_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_21_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t));
-	tmp_12_block = calloc(nblocks, sizeof(TYPE *));
-	tmp_21_block = calloc(nblocks, sizeof(TYPE *));
-
-	allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-#else
-	for (i = 0; i < 2; i++) {
-		tmp_12_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t));
-		tmp_21_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t));
-		tmp_12_block[i] = calloc(nblocks, sizeof(TYPE *));
-		tmp_21_block[i] = calloc(nblocks, sizeof(TYPE *));
-
-		allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
-	}
-#endif
-	
-	for (k = 0; k < nblocks; k++)
-	{
-#ifdef SINGLE_TMP1221
-		if (tmp_12_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_12_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_12_block[k]);
-
-			starpu_matrix_data_register(&tmp_12_block_handles[k], 0,
-				(uintptr_t)tmp_12_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-
-		if (tmp_21_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_21_block[k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_21_block[k]);
-
-			starpu_matrix_data_register(&tmp_21_block_handles[k], 0,
-				(uintptr_t)tmp_21_block[k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-#else
-	for (i = 0; i < 2; i++) {
-		if (tmp_12_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_12_block[i][k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_12_block[i][k]);
-	
-			starpu_matrix_data_register(&tmp_12_block_handles[i][k], 0,
-				(uintptr_t)tmp_12_block[i][k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-
-		if (tmp_21_block_is_needed(rank, nblocks, k))
-		{
-			starpu_malloc((void **)&tmp_21_block[i][k], blocksize);
-			allocated_memory_extra += blocksize;
-			STARPU_ASSERT(tmp_21_block[i][k]);
-	
-			starpu_matrix_data_register(&tmp_21_block_handles[i][k], 0,
-				(uintptr_t)tmp_21_block[i][k],
-				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
-		}
-	}
-#endif
-	}
-
-	//display_all_blocks(nblocks, size/nblocks);
-}
-
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j)
-{
-	return dataA[j+i*nblocks];
-}
-
-int get_block_rank(unsigned i, unsigned j)
-{
-	/* Take a 2D block cyclic distribution */
-	/* NB: p (resp. q) is for "direction" i (resp. j) */
-	return (j % q) * p + (i % p);
-}
-
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
-{
-	return dataA_handles[j+i*nblocks];
-}
-
-static void display_grid(int rank, unsigned nblocks)
-{
-	if (!display)
-		return;
-
-	//if (rank == 0)
-	{
-		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
-		
-		unsigned i, j;
-		for (j = 0; j < nblocks; j++)
-		{
-			for (i = 0; i < nblocks; i++)
-			{
-				TYPE *blockptr = STARPU_PLU(get_block)(i, j);
-				starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j);
-
-				fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle);
-			}
-			fprintf(stderr, "\n");
-		}
-	}
-}
-
-int main(int argc, char **argv)
-{
-	int rank;
-	int world_size;
-
-#if 0
-	/*
-	 *	Initialization
-	 */
-	int thread_support;
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) {
-		fprintf(stderr,"MPI_Init_thread failed\n");
-		exit(1);
-	}
-	if (thread_support == MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
-	if (thread_support < MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI does not have thread support!\n");
-	
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
-#endif
-
-	starpu_srand48((long int)time(NULL));
-
-	parse_args(rank, argc, argv);
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	/* We disable sequential consistency in this example */
-	starpu_data_set_default_sequential_consistency_flag(0);
-
-	starpu_mpi_initialize_extended(&rank, &world_size);
-
-	STARPU_ASSERT(p*q == world_size);
-
-	starpu_helper_cublas_init();
-
-	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	/*
-	 * 	Problem Init
-	 */
-
-	init_matrix(rank);
-
-	fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank,
-                        (int)allocated_memory/(1024*1024),
-			(int)allocated_memory_extra/(1024*1024),
-                        (int)(allocated_memory+allocated_memory_extra)/(1024*1024));
-
-	display_grid(rank, nblocks);
-
-	TYPE *a_r = NULL;
-//	STARPU_PLU(display_data_content)(a_r, size);
-
-	TYPE *x, *y;
-
-	if (check)
-	{
-		x = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(x);
-
-		y = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-
-		if (rank == 0)
-		{
-			unsigned ind;
-			for (ind = 0; ind < size; ind++)
-				x[ind] = (TYPE)starpu_drand48();
-		}
-
-		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-		if (rank == 0)
-			STARPU_PLU(display_data_content)(a_r, size);
-
-//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
-	}
-
-	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
-
-	/*
-	 * 	Report performance
-	 */
-
-	int reduce_ret;
-	double min_timing = timing;
-	double max_timing = timing;
-	double sum_timing = timing;
-
-	reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
-
-	if (rank == 0)
-	{
-		fprintf(stderr, "Computation took: %f ms\n", max_timing/1000);
-		fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000);
-		fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000);
-		fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000));
-
-		unsigned n = size;
-		double flop = (2.0f*n*n*n)/3.0f;
-		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f));
-	}
-
-	/*
-	 *	Test Result Correctness
-	 */
-
-	if (check)
-	{
-		/*
-		 *	Compute || A - LU ||
-		 */
-
-		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);
-
-#if 0
-		/*
-		 *	Compute || Ax - LUx ||
-		 */
-
-		unsigned ind;
-
-		y2 = calloc(size, sizeof(TYPE));
-		STARPU_ASSERT(y);
-		
-		if (rank == 0)
-		{
-			for (ind = 0; ind < size; ind++)
-			{
-				y2[ind] = (TYPE)0.0;
-			}
-		}
-
-		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
-
-		/* Compute y2 = y2 - y */
-	        CPU_AXPY(size, -1.0, y, 1, y2, 1);
-	
-	        TYPE err = CPU_ASUM(size, y2, 1);
-	        int max = CPU_IAMAX(size, y2, 1);
-	
-	        fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
-	        fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
-#endif
-	}
-
-	/*
-	 * 	Termination
-	 */
-
-	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	starpu_helper_cublas_shutdown();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-#if 0
-	MPI_Finalize();
-#endif
-
-	return 0;
-}

+ 0 - 19
mpi/examples/mpi_lu/plu_example_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_example.c"

+ 0 - 19
mpi/examples/mpi_lu/plu_example_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_example.c"

+ 0 - 394
mpi/examples/mpi_lu/plu_solve.c

@@ -1,394 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <math.h>
-#include "pxlu.h"
-
-/*
- *	Various useful functions
- */
-
-static double frobenius_norm(TYPE *v, unsigned n)
-{
-        double sum2 = 0.0;
-
-        /* compute sqrt(Sum(|x|^2)) */
-
-        unsigned i,j;
-        for (j = 0; j < n; j++)
-        for (i = 0; i < n; i++)
-        {
-                double a = fabsl((double)v[i+n*j]);
-                sum2 += a*a;
-        }
-
-        return sqrt(sum2);
-}
-
-void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize)
-{
-	if (!STARPU_PLU(display_flag)())
-		return;
-
-	fprintf(stderr, "DISPLAY BLOCK\n");
-
-	unsigned i, j;
-	for (j = 0; j < blocksize; j++)
-	{
-		for (i = 0; i < blocksize; i++)
-		{
-			fprintf(stderr, "%f ", data[j+i*blocksize]);
-		}
-		fprintf(stderr, "\n");
-	}
-
-	fprintf(stderr, "****\n");
-}
-
-void STARPU_PLU(extract_upper)(unsigned block_size, TYPE *inblock, TYPE *outblock)
-{
-	unsigned li, lj;
-	for (lj = 0; lj < block_size; lj++)
-	{
-		/* Upper block diag is 1 */
-		outblock[lj*(block_size + 1)] = (TYPE)1.0;
-
-		for (li = lj + 1; li < block_size; li++)
-		{
-			outblock[lj + li*block_size] = inblock[lj + li*block_size];
-		}
-	}
-}
-
-void STARPU_PLU(extract_lower)(unsigned block_size, TYPE *inblock, TYPE *outblock)
-{
-	unsigned li, lj;
-	for (lj = 0; lj < block_size; lj++)
-	{
-		for (li = 0; li <= lj; li++)
-		{
-			outblock[lj + li*block_size] = inblock[lj + li*block_size];
-		}
-	}
-}
-
-/*
- *	Compute Ax = y
- */
-
-static void STARPU_PLU(compute_ax_block)(unsigned block_size, TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	fprintf(stderr, "block data %p sub x %p sub y %p\n", block_data, sub_x, sub_y);
-	CPU_GEMV("N", block_size, block_size, 1.0, block_data, block_size, sub_x, 1, 1.0, sub_y, 1);
-}
-
-static void STARPU_PLU(compute_ax_block_upper)(unsigned size, unsigned nblocks,
-				 TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Take a copy of the upper part of the diagonal block */
-	TYPE *upper_block_copy = calloc((block_size)*(block_size), sizeof(TYPE));
-	STARPU_PLU(extract_upper)(block_size, block_data, upper_block_copy);
-		
-	STARPU_PLU(compute_ax_block)(block_size, upper_block_copy, sub_x, sub_y);
-	
-	free(upper_block_copy);
-}
-
-static void STARPU_PLU(compute_ax_block_lower)(unsigned size, unsigned nblocks,
-				 TYPE *block_data, TYPE *sub_x, TYPE *sub_y)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Take a copy of the upper part of the diagonal block */
-	TYPE *lower_block_copy = calloc((block_size)*(block_size), sizeof(TYPE));
-	STARPU_PLU(extract_lower)(block_size, block_data, lower_block_copy);
-
-	STARPU_PLU(compute_ax_block)(size/nblocks, lower_block_copy, sub_x, sub_y);
-	
-	free(lower_block_copy);
-}
-
-void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank)
-{
-	/* Create temporary buffers where all MPI processes are going to
-	 * compute Ui x = yi where Ai is the matrix containing the blocks of U
-	 * affected to process i, and 0 everywhere else. We then have y as the
-	 * sum of all yi. */
-	TYPE *yi = calloc(size, sizeof(TYPE));
-
-	fprintf(stderr, "Compute LU\n");
-
-	unsigned block_size = size/nblocks;
-
-	/* Compute UiX = Yi */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		if (get_block_rank(j, j) == rank)
-		{
-			TYPE *block_data = STARPU_PLU(get_block)(j, j);
-			TYPE *sub_x = &x[j*(block_size)];
-			TYPE *sub_yi = &yi[j*(block_size)];
-
-			STARPU_PLU(compute_ax_block_upper)(size, nblocks, block_data, sub_x, sub_yi);
-		}
-
-		for (i = j + 1; i < nblocks; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*(block_size)];
-				TYPE *sub_yi = &yi[j*(block_size)];
-
-				STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi);
-			}
-		}
-	}
-
-	/* Grab Sum Yi in X */
-	MPI_Reduce(yi, x, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-	memset(yi, 0, size*sizeof(TYPE));
-
-//	unsigned ind;
-//	if (rank == 0)
-//	{
-//		fprintf(stderr, "INTERMEDIATE\n");
-//		for (ind = 0; ind < STARPU_MIN(10, size); ind++)
-//		{
-//			fprintf(stderr, "x[%d] = %f\n", ind, (float)x[ind]);
-//		}
-//		fprintf(stderr, "****\n");
-//	}
-
-	/* Everyone needs x */
-	int bcst_ret;
-	bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(bcst_ret == MPI_SUCCESS);
-
-	/* Compute LiX = Yi (with X = UX) */
-	for (j = 0; j < nblocks; j++)
-	{
-		if (j > 0)
-		for (i = 0; i < j; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*(block_size)];
-				TYPE *sub_yi = &yi[j*(block_size)];
-
-				STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi);
-			}
-		}
-
-		if (get_block_rank(j, j) == rank)
-		{
-			TYPE *block_data = STARPU_PLU(get_block)(j, j);
-			TYPE *sub_x = &x[j*(block_size)];
-			TYPE *sub_yi = &yi[j*(block_size)];
-
-			STARPU_PLU(compute_ax_block_lower)(size, nblocks, block_data, sub_x, sub_yi);
-		}
-	}
-
-	/* Grab Sum Yi in Y */
-	MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-
-	free(yi);
-}
-
-
-
-/*
- *	Allocate a contiguous matrix on node 0 and fill it with the whole
- *	content of the matrix distributed accross all nodes.
- */
-
-TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks)
-{
-//	fprintf(stderr, "RECONSTRUCT MATRIX size %d nblocks %d\n", size, nblocks);
-
-	TYPE *bigmatrix = calloc(size*size, sizeof(TYPE));
-
-	unsigned block_size = size/nblocks;
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-	unsigned bi, bj;
-	for (bj = 0; bj < nblocks; bj++)
-	for (bi = 0; bi < nblocks; bi++)
-	{
-		TYPE *block;
-
-		int block_rank = get_block_rank(bi, bj);
-		
-		if (block_rank == 0)
-		{
-			block = STARPU_PLU(get_block)(bi, bj);
-		}
-		else {
-			MPI_Status status;
-
-			if (rank == 0)
-			{
-				block = calloc(block_size*block_size, sizeof(TYPE));
-
-				int ret = MPI_Recv(block, block_size*block_size, MPI_TYPE, block_rank, 0, MPI_COMM_WORLD, &status);
-				STARPU_ASSERT(ret == MPI_SUCCESS);
-			}
-			else if (rank == block_rank) {
-				block = STARPU_PLU(get_block)(bi, bj);
-				int ret = MPI_Send(block, block_size*block_size, MPI_TYPE, 0, 0, MPI_COMM_WORLD);
-				STARPU_ASSERT(ret == MPI_SUCCESS);
-			}
-		}
-
-		if (rank == 0)
-		{
-			unsigned j, i;
-			for (j = 0; j < block_size; j++)
-			for (i = 0; i < block_size; i++)
-			{
-				bigmatrix[(j + bj*block_size)+(i+bi*block_size)*size] =
-									block[j+i*block_size];
-			}
-
-			if (get_block_rank(bi, bj) != 0)
-				free(block);
-		}
-	}
-
-	return bigmatrix;
-}
-
-/* x and y must be valid (at least) on 0 */
-void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank)
-{
-	unsigned block_size = size/nblocks;
-
-	/* Send x to everyone */
-	int bcst_ret;
-	bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(bcst_ret == MPI_SUCCESS);
-
-	/* Create temporary buffers where all MPI processes are going to
-	 * compute Ai x = yi where Ai is the matrix containing the blocks of A
-	 * affected to process i, and 0 everywhere else. We then have y as the
-	 * sum of all yi. */
-	TYPE *yi = calloc(size, sizeof(TYPE));
-
-	/* Compute Aix = yi */
-	unsigned long i,j;
-	for (j = 0; j < nblocks; j++)
-	{
-		for (i = 0; i < nblocks; i++)
-		{
-			if (get_block_rank(i, j) == rank)
-			{
-				/* That block belongs to the current MPI process */
-				TYPE *block_data = STARPU_PLU(get_block)(i, j);
-				TYPE *sub_x = &x[i*block_size];
-				TYPE *sub_yi = &yi[j*block_size];
-
-				STARPU_PLU(compute_ax_block)(block_size, block_data, sub_x, sub_yi);
-			}
-		}
-	}
-
-	/* Compute the Sum of all yi = y */
-	MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
-
-	fprintf(stderr, "RANK %d - FOO 1 y[0] %f\n", rank, y[0]);
-
-	free(yi);
-}
-
-void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved)
-{
-	TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
-
-	unsigned display = STARPU_PLU(display_flag)();
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-	if (rank == 0)
-	{
-	        TYPE *L = malloc((size_t)size*size*sizeof(TYPE));
-	        TYPE *U = malloc((size_t)size*size*sizeof(TYPE));
-	
-	        memset(L, 0, size*size*sizeof(TYPE));
-	        memset(U, 0, size*size*sizeof(TYPE));
-	
-	        /* only keep the lower part */
-		unsigned i, j;
-	        for (j = 0; j < size; j++)
-	        {
-	                for (i = 0; i < j; i++)
-	                {
-	                        L[j+i*size] = all_r[j+i*size];
-	                }
-	
-	                /* diag i = j */
-	                L[j+j*size] = all_r[j+j*size];
-	                U[j+j*size] = 1.0;
-	
-	                for (i = j+1; i < size; i++)
-	                {
-	                        U[j+i*size] = all_r[j+i*size];
-	                }
-	        }
-	
-		STARPU_PLU(display_data_content)(L, size);
-		STARPU_PLU(display_data_content)(U, size);
-	
-	        /* now A_err = L, compute L*U */
-	        CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
-	
-		if (display)
-			fprintf(stderr, "\nLU\n");
-
-		STARPU_PLU(display_data_content)(L, size);
-	
-	        /* compute "LU - A" in L*/
-	        CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1);
-	
-	        TYPE err = CPU_ASUM(size*size, L, 1);
-	        int max = CPU_IAMAX(size*size, L, 1);
-	
-		if (display)
-			fprintf(stderr, "DISPLAY ERROR\n");
-
-		STARPU_PLU(display_data_content)(L, size);
-	
-	        fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size));
-	        fprintf(stderr, "(A - LU) Max error : %e\n", L[max]);
-	
-		double residual = frobenius_norm(L, size);
-		double matnorm = frobenius_norm(Asaved, size);
-	
-		fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size));
-	}
-}
-

+ 0 - 19
mpi/examples/mpi_lu/plu_solve_double.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-double.h"
-#include "plu_solve.c"

+ 0 - 19
mpi/examples/mpi_lu/plu_solve_float.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "plu_solve.c"

+ 0 - 19
mpi/examples/mpi_lu/pslu.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "pxlu.c"

+ 0 - 19
mpi/examples/mpi_lu/pslu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "pxlu_kernels.c"

+ 0 - 870
mpi/examples/mpi_lu/pxlu.c

@@ -1,870 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "pxlu.h"
-#include "pxlu_kernels.h"
-#include <sys/time.h>
-
-#define MPI_TAG11(k)	((1U << 16) | (k))
-#define MPI_TAG12(k, j)	((2U << 16) | (k)<<8 | (j))
-#define MPI_TAG21(k, i)	((3U << 16) | (i)<<8 | (k))
-
-// 11 21
-// 12 22
-
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<50) | (unsigned long long)(k)))
-#define TAG12(k,j)	((starpu_tag_t)(((2ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21(k,i)	((starpu_tag_t)(((3ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<50) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-#define TAG11_SAVE(k)	((starpu_tag_t)( (5ULL<<50) | (unsigned long long)(k)))
-#define TAG12_SAVE(k,j)	((starpu_tag_t)(((6ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21_SAVE(k,i)	((starpu_tag_t)(((7ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
-#define TAG11_SAVE_PARTIAL(k)	((starpu_tag_t)( (8ULL<<50) | (unsigned long long)(k)))
-#define TAG12_SAVE_PARTIAL(k,j)	((starpu_tag_t)(((9ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG21_SAVE_PARTIAL(k,i)	((starpu_tag_t)(((10ULL<<50) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
-#define STARPU_TAG_INIT	((starpu_tag_t)(11ULL<<50))
-
-//#define VERBOSE_INIT	1
-
-//#define DEBUG	1
-
-static unsigned no_prio = 0;
-
-static unsigned nblocks = 0;
-static int rank = -1;
-static int world_size = -1;
-
-struct callback_arg {
-	unsigned i, j, k;
-};
-
-/*
- *	Various
- */
-
-static struct debug_info *create_debug_info(unsigned i, unsigned j, unsigned k)
-{
-	struct debug_info *info = malloc(sizeof(struct debug_info));
-
-	info->i = i;
-	info->j = j;
-	info->k = k;
-
-	return info;
-}
-
-static struct starpu_task *create_task(starpu_tag_t id)
-{
-	struct starpu_task *task = starpu_task_create();
-		task->cl_arg = NULL;
-
-	task->use_tag = 1;
-	task->tag_id = id;
-
-	return task;
-}
-
-/* Send handle to every node appearing in the mask, and unlock tag once the
- * transfers are done. */
-static void send_data_to_mask(starpu_data_handle_t handle, int *rank_mask, int mpi_tag, starpu_tag_t tag)
-{
-	unsigned cnt = 0;
-
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	int rank_array[world_size];
-	MPI_Comm comm_array[world_size];
-	int mpi_tag_array[world_size];
-	starpu_data_handle_t handle_array[world_size];
-
-	unsigned r;
-	for (r = 0; r < world_size; r++)
-	{
-		if (rank_mask[r]) {
-			rank_array[cnt] = r;
-
-			comm_array[cnt] = MPI_COMM_WORLD;
-			mpi_tag_array[cnt] = mpi_tag;
-			handle_array[cnt] = handle;
-			cnt++;
-		}
-	}
-
-	if (cnt == 0)
-	{
-		/* In case there is no message to send, we release the tag at
-		 * once */
-		starpu_tag_notify_from_apps(tag);
-	}
-	else {
-		starpu_mpi_isend_array_detached_unlock_tag(cnt, handle_array,
-				rank_array, mpi_tag_array, comm_array, tag);
-	}
-}
-
-/* Initiate a receive request once all dependencies are fulfilled and unlock
- * tag 'unlocked_tag' once it's done. */
-
-struct recv_when_done_callback_arg {
-	int source;
-	int mpi_tag;
-	starpu_data_handle_t handle;
-	starpu_tag_t unlocked_tag;
-};
-
-static void callback_receive_when_done(void *_arg)
-{
-	struct recv_when_done_callback_arg *arg = _arg;
-
-	starpu_mpi_irecv_detached_unlock_tag(arg->handle, arg->source,
-			arg->mpi_tag, MPI_COMM_WORLD, arg->unlocked_tag);
-
-	free(arg);
-}
-
-static void receive_when_deps_are_done(unsigned ndeps, starpu_tag_t *deps_tags,
-				int source, int mpi_tag,
-				starpu_data_handle_t handle,
-				starpu_tag_t partial_tag,
-				starpu_tag_t unlocked_tag)
-{
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	struct recv_when_done_callback_arg *arg =
-		malloc(sizeof(struct recv_when_done_callback_arg));
-	
-	arg->source = source;
-	arg->mpi_tag = mpi_tag;
-	arg->handle = handle;
-	arg->unlocked_tag = unlocked_tag;
-
-	if (ndeps == 0)
-	{
-		callback_receive_when_done(arg);
-		return;
-	}
-
-	starpu_create_sync_task(partial_tag, ndeps, deps_tags,
-					callback_receive_when_done, arg);
-}
-
-/*
- *	Task 11 (diagonal factorization)
- */
-
-static void create_task_11_recv(unsigned k)
-{
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 11_(k-1) can be used by 12_(k-1)j and
-	 * 21(k-1)i with i,j >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[2*nblocks];
-	
-#ifdef SINGLE_TMP11
-	unsigned i, j;
-	if (k > 0)
-	for (i = (k-1)+1; i < nblocks; i++)
-	{
-		if (rank == get_block_rank(i, k-1))
-			tag_array[ndeps++] = TAG21(k-1, i);
-	}
-
-	if (k > 0)
-	for (j = (k-1)+1; j < nblocks; j++)
-	{
-		if (rank == get_block_rank(k-1, j))
-			tag_array[ndeps++] = TAG12(k-1, j);
-	}
-#endif
-	
-	int source = get_block_rank(k, k);
-#ifdef SINGLE_TMP11
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-	int mpi_tag = MPI_TAG11(k);
-	starpu_tag_t partial_tag = TAG11_SAVE_PARTIAL(k);
-	starpu_tag_t unlocked_tag = TAG11_SAVE(k);
-
-//	fprintf(stderr, "NODE %d - 11 (%d) - recv when done ndeps %d - tag array %lx\n", rank, k, ndeps, tag_array[0]);
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_11(unsigned k, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 11_k is used to compute 12_kj + 12ki with i,j > k */
-	unsigned i;
-	for (i = k+1; i < nblocks; i++)
-	{
-		int r = get_block_rank(i, k);
-		rank_mask[r] = 1;
-	}
-
-	unsigned j;
-	for (j = k+1; j < nblocks; j++)
-	{
-		int r = get_block_rank(k, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_11_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_11(k, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, k);
-	starpu_tag_t tag = TAG11_SAVE(k);
-	int mpi_tag = MPI_TAG11(k);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-	
-	free(arg);
-}
-
-static void create_task_11_real(unsigned k)
-{
-	struct starpu_task *task = create_task(TAG11(k));
-
-	task->cl = &STARPU_PLU(cl11);
-
-	task->cl_arg = create_debug_info(k, k, k);
-
-	/* which sub-data is manipulated ? */
-	task->handles[0] = STARPU_PLU(get_block_handle)(k, k);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->k = k;
-
-	task->callback_func = callback_task_11_real;
-	task->callback_arg = arg;
-
-	/* this is an important task */
-	if (!no_prio)
-		task->priority = STARPU_MAX_PRIO;
-
-	/* enforce dependencies ... */
-	if (k > 0) {
-		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
-	}
-	else {
-		starpu_tag_declare_deps(TAG11(k), 1, STARPU_TAG_INIT);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_11(unsigned k)
-{
-	if (get_block_rank(k, k) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 11(%d) (TAG11_SAVE(%d) = %lx) on node %d\n", k, k, TAG11_SAVE(k), rank);
-#endif
-		create_task_11_real(k);
-	}
-	else {
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_11(k, rank_mask);
-		
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 11(%d) on node %d\n", k, rank);
-#endif
-			create_task_11_recv(k);
-		}
-		else {
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 11(%d)\n", rank, k);
-#endif
-		}
-	}
-}
-
-
-
-/*
- *	Task 12 (Update lower left (TRSM))
- */
-
-static void create_task_12_recv(unsigned k, unsigned j)
-{
-	unsigned i;
-
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 12_(k-1)j can be used by 22_(k-1)ij with
-	 * i >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[nblocks];
-	
-#ifdef SINGLE_TMP1221
-	if (k > 0)
-	for (i = (k-1)+1; i < nblocks; i++)
-#else
-	if (k > 1)
-	for (i = (k-2)+1; i < nblocks; i++)
-#endif
-	{
-		if (rank == get_block_rank(i, j))
-#ifdef SINGLE_TMP1221
-			tag_array[ndeps++] = TAG22(k-1, i, j);
-#else
-			tag_array[ndeps++] = TAG22(k-2, i, j);
-#endif
-	}
-	
-	int source = get_block_rank(k, j);
-#ifdef SINGLE_TMP1221
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j);
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j,k);
-#endif
-	int mpi_tag = MPI_TAG12(k, j);
-	starpu_tag_t partial_tag = TAG12_SAVE_PARTIAL(k, j);
-	starpu_tag_t unlocked_tag = TAG12_SAVE(k, j);
-
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_12(unsigned k, unsigned j, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 12_kj is used to compute 22_kij with i > k */
-	unsigned i;
-	for (i = k+1; i < nblocks; i++)
-	{
-		int r = get_block_rank(i, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_12_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-	unsigned j = arg->j;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_12(k, j, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, j);
-	starpu_tag_t tag = TAG12_SAVE(k, j);
-	int mpi_tag = MPI_TAG12(k, j);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-	
-	free(arg);
-}
-
-static void create_task_12_real(unsigned k, unsigned j)
-{
-	struct starpu_task *task = create_task(TAG12(k, j));
-	
-#warning temporary fix :/
-//	task->cl = &STARPU_PLU(cl12);
-	task->cl = &STARPU_PLU(cl21);
-
-	task->cl_arg = create_debug_info(j, j, k);
-
-	unsigned diag_block_is_local = (get_block_rank(k, k) == rank);
-
-	starpu_tag_t tag_11_dep; 
-
-	/* which sub-data is manipulated ? */
-	starpu_data_handle_t diag_block;
-	if (diag_block_is_local)
-	{
-		diag_block = STARPU_PLU(get_block_handle)(k, k);
-		tag_11_dep = TAG11(k);
-	}
-	else 
-	{
-#ifdef SINGLE_TMP11
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-		tag_11_dep = TAG11_SAVE(k);
-	}
-
-	task->handles[0] = diag_block; 
-	task->handles[1] = STARPU_PLU(get_block_handle)(k, j); 
-
-	STARPU_ASSERT(get_block_rank(k, j) == rank);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->j = j;
-		arg->k = k;
-
-	task->callback_func = callback_task_12_real;
-	task->callback_arg = arg;
-
-	if (!no_prio && (j == k+1)) {
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0) {
-		starpu_tag_declare_deps(TAG12(k, j), 2, tag_11_dep, TAG22(k-1, k, j));
-	}
-	else {
-		starpu_tag_declare_deps(TAG12(k, j), 1, tag_11_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_12(unsigned k, unsigned j)
-{
-	if (get_block_rank(k, j) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 12(k = %d, j = %d) on node %d\n", k, j, rank);
-#endif
-		create_task_12_real(k, j);
-	}
-	else {
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_12(k, j, rank_mask);
-		
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 12(k = %d, j = %d) on node %d\n", k, j, rank);
-#endif
-			create_task_12_recv(k, j);
-		}
-		else {
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 12(k=%d, i=%d)\n", rank, k, j);
-#endif
-		}
-	}
-}
-
-/*
- *	Task 21 (Update upper right (TRSM))
- */
-
-static void create_task_21_recv(unsigned k, unsigned i)
-{
-	unsigned j;
-
-	/* The current node is not computing that task, so we receive the block
-	 * with MPI */
-
-	/* We don't issue a MPI receive request until everyone using the
-	 * temporary buffer is done : 21_(k-1)i can be used by 22_(k-1)ij with
-	 * j >= k */
-	unsigned ndeps = 0;
-	starpu_tag_t tag_array[nblocks];
-	
-#ifdef SINGLE_TMP1221
-	if (k > 0)
-	for (j = (k-1)+1; j < nblocks; j++)
-#else
-	if (k > 1)
-	for (j = (k-2)+1; j < nblocks; j++)
-#endif
-	{
-		if (rank == get_block_rank(i, j))
-#ifdef SINGLE_TMP1221
-			tag_array[ndeps++] = TAG22(k-1, i, j);
-#else
-			tag_array[ndeps++] = TAG22(k-2, i, j);
-#endif
-	}
-
-	int source = get_block_rank(i, k);
-#ifdef SINGLE_TMP1221
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i);
-#else
-	starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i, k);
-#endif
-	int mpi_tag = MPI_TAG21(k, i);
-	starpu_tag_t partial_tag = TAG21_SAVE_PARTIAL(k, i);
-	starpu_tag_t unlocked_tag = TAG21_SAVE(k, i);
-
-//	fprintf(stderr, "NODE %d - 21 (%d, %d) - recv when done ndeps %d - tag array %lx\n", rank, k, i, ndeps, tag_array[0]);
-	receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag);
-}
-
-static void find_nodes_using_21(unsigned k, unsigned i, int *rank_mask)
-{
-	memset(rank_mask, 0, world_size*sizeof(int));
-
-	/* Block 21_ki is used to compute 22_kij with j > k */
-	unsigned j;
-	for (j = k+1; j < nblocks; j++)
-	{
-		int r = get_block_rank(i, j);
-		rank_mask[r] = 1;
-	}
-}
-
-static void callback_task_21_real(void *_arg)
-{
-	struct callback_arg *arg = _arg;
-
-	unsigned k = arg->k;
-	unsigned i = arg->i;
-
-	/* Find all the nodes potentially requiring this block */
-	int rank_mask[world_size];
-	find_nodes_using_21(k, i, rank_mask);
-	rank_mask[rank] = 0;
-
-	/* Send the block to those nodes */
-	starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(i, k);
-	starpu_tag_t tag = TAG21_SAVE(k, i);
-	int mpi_tag = MPI_TAG21(k, i);
-	send_data_to_mask(block_handle, rank_mask, mpi_tag, tag);
-	
-	free(arg);
-}
-
-static void create_task_21_real(unsigned k, unsigned i)
-{
-	struct starpu_task *task = create_task(TAG21(k, i));
-
-#warning temporary fix 
-//	task->cl = &STARPU_PLU(cl21);
-	task->cl = &STARPU_PLU(cl12);
-
-	task->cl_arg = create_debug_info(i, i, k);
-
-	unsigned diag_block_is_local = (get_block_rank(k, k) == rank);
-
-	starpu_tag_t tag_11_dep; 
-	
-	/* which sub-data is manipulated ? */
-	starpu_data_handle_t diag_block;
-	if (diag_block_is_local)
-	{
-		diag_block = STARPU_PLU(get_block_handle)(k, k);
-		tag_11_dep = TAG11(k);
-	}
-	else 
-	{
-#ifdef SINGLE_TMP11
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)();
-#else
-		diag_block = STARPU_PLU(get_tmp_11_block_handle)(k);
-#endif
-		tag_11_dep = TAG11_SAVE(k);
-	}
-
-	task->handles[0] = diag_block; 
-	task->handles[1] = STARPU_PLU(get_block_handle)(i, k);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-
-	struct callback_arg *arg = malloc(sizeof(struct callback_arg));
-		arg->i = i;
-		arg->k = k;
-
-	task->callback_func = callback_task_21_real;
-	task->callback_arg = arg;
-
-	if (!no_prio && (i == k+1)) {
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0) {
-		starpu_tag_declare_deps(TAG21(k, i), 2, tag_11_dep, TAG22(k-1, i, k));
-	}
-	else {
-		starpu_tag_declare_deps(TAG21(k, i), 1, tag_11_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_21(unsigned k, unsigned i)
-{
-	if (get_block_rank(i, k) == rank)
-	{
-#ifdef VERBOSE_INIT
-		fprintf(stderr, "CREATE real task 21(k = %d, i = %d) on node %d\n", k, i, rank);
-#endif
-		create_task_21_real(k, i);
-	}
-	else {
-		/* We don't handle the task, but perhaps we have to generate MPI transfers. */
-		int rank_mask[world_size];
-		find_nodes_using_21(k, i, rank_mask);
-		
-		if (rank_mask[rank])
-		{
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "create RECV task 21(k = %d, i = %d) on node %d\n", k, i, rank);
-#endif
-			create_task_21_recv(k, i);
-		}
-		else {
-#ifdef VERBOSE_INIT
-			fprintf(stderr, "Node %d needs not 21(k=%d, i=%d)\n", rank, k,i);
-#endif
-		}
-	}
-}
-
-/*
- *	Task 22 (GEMM)
- */
-
-static void create_task_22_real(unsigned k, unsigned i, unsigned j)
-{
-//	printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22(k,i,j));
-
-	struct starpu_task *task = create_task(TAG22(k, i, j));
-
-	task->cl = &STARPU_PLU(cl22);
-
-	task->cl_arg = create_debug_info(i, j, k);
-
-	/* which sub-data is manipulated ? */
-
-	/* produced by TAG21_SAVE(k, i) */ 
-	unsigned block21_is_local = (get_block_rank(i, k) == rank);
-	starpu_tag_t tag_21_dep;
-
-	starpu_data_handle_t block21;
-	if (block21_is_local)
-	{
-		block21 = STARPU_PLU(get_block_handle)(i, k);
-		tag_21_dep = TAG21(k, i);
-	}
-	else 
-	{
-#ifdef SINGLE_TMP1221
-		block21 = STARPU_PLU(get_tmp_21_block_handle)(i);
-#else
-		block21 = STARPU_PLU(get_tmp_21_block_handle)(i, k);
-#endif
-		tag_21_dep = TAG21_SAVE(k, i);
-	}
-
-	/* produced by TAG12_SAVE(k, j) */
-	unsigned block12_is_local = (get_block_rank(k, j) == rank);
-	starpu_tag_t tag_12_dep;
-
-	starpu_data_handle_t block12;
-	if (block12_is_local)
-	{
-	//	block12 = STARPU_PLU(get_block_handle)(j, k);
-		block12 = STARPU_PLU(get_block_handle)(k, j);
-		tag_12_dep = TAG12(k, j);
-	}
-	else 
-	{
-#ifdef SINGLE_TMP1221
-		block12 = STARPU_PLU(get_tmp_12_block_handle)(j);
-#else
-		block12 = STARPU_PLU(get_tmp_12_block_handle)(j, k);
-#endif
-		tag_12_dep = TAG12_SAVE(k, j);
-	}
-
-
-
-#warning temporary fix :/
-	//task->handles[0] = block21;
-	task->handles[0] = block12;
-
-	//task->handles[1] = block12;
-	task->handles[1] = block21;
-
-	/* produced by TAG22(k-1, i, j) */
-	task->handles[2] = STARPU_PLU(get_block_handle)(i, j);
-
-	STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
-	STARPU_ASSERT(task->handles[2] != STARPU_POISON_PTR);
-
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
-		task->priority = STARPU_MAX_PRIO;
-	}
-
-	/* enforce dependencies ... */
-	if (k > 0) {
-		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), tag_12_dep, tag_21_dep);
-	}
-	else {
-		starpu_tag_declare_deps(TAG22(k, i, j), 2, tag_12_dep, tag_21_dep);
-	}
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-static void create_task_22(unsigned k, unsigned i, unsigned j)
-{
-	if (get_block_rank(i, j) == rank)
-	{
-	//	fprintf(stderr, "CREATE real task 22(k = %d, i = %d, j = %d) on node %d\n", k, i, j, rank);
-		create_task_22_real(k, i, j);
-	}
-//	else {
-//		fprintf(stderr, "Node %d needs not 22(k=%d, i=%d, j = %d)\n", rank, k,i,j);
-//	}
-}
-
-static void wait_tag_and_fetch_handle(starpu_tag_t tag, starpu_data_handle_t handle)
-{
-	STARPU_ASSERT(handle != STARPU_POISON_PTR);
-
-	starpu_tag_wait(tag);
-//	fprintf(stderr, "Rank %d : tag %lx is done\n", rank, tag);
-
-	starpu_data_acquire(handle, STARPU_R);
-
-//	starpu_data_unregister(handle);
-}
-
-static void wait_termination(void)
-{
-	unsigned k, i, j;
-	for (k = 0; k < nblocks; k++)
-	{
-		/* Wait task 11k if needed */
-		if (get_block_rank(k, k) == rank)
-		{
-			starpu_data_handle_t diag_block = STARPU_PLU(get_block_handle)(k, k);
-			wait_tag_and_fetch_handle(TAG11_SAVE(k), diag_block);
-		}
-		
-
-		for (i = k + 1; i < nblocks; i++)
-		{
-			/* Wait task 21ki if needed */
-			if (get_block_rank(i, k) == rank)
-			{
-				starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(i, k);
-				//starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(k, i);
-				//fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21);
-				wait_tag_and_fetch_handle(TAG21_SAVE(k, i), block21);
-			}
-		}
-
-		for (j = k + 1; j < nblocks; j++)
-		{
-			/* Wait task 12kj if needed */
-			if (get_block_rank(k, j) == rank)
-			{
-				//starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(j, k);
-				starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(k, j);
-				//fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12);
-				wait_tag_and_fetch_handle(TAG12_SAVE(k, j), block12);
-			}
-		}
-	}	
-}
-
-/*
- *	code to bootstrap the factorization 
- */
-
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
-{
-	struct timeval start;
-	struct timeval end;
-
-	nblocks = _nblocks;
-	rank = _rank;
-	world_size = _world_size;
-
-	/* create all the DAG nodes */
-	unsigned i,j,k;
-
-	for (k = 0; k < nblocks; k++)
-	{
-		create_task_11(k);
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			create_task_12(k, i);
-			create_task_21(k, i);
-		}
-
-		for (i = k+1; i<nblocks; i++)
-		{
-			for (j = k+1; j<nblocks; j++)
-			{
-				create_task_22(k, i, j);
-			}
-		}
-	}
-
-	int barrier_ret = starpu_mpi_barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	/* schedule the codelet */
-	gettimeofday(&start, NULL);
-
-	starpu_tag_notify_from_apps(STARPU_TAG_INIT);
-
-	wait_termination();
-	
-	gettimeofday(&end, NULL);
-
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	
-//	fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
-	
-	return timing;
-}

+ 0 - 65
mpi/examples/mpi_lu/pxlu.h

@@ -1,65 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __PXLU_H__
-#define __PXLU_H__
-
-#include <starpu.h>
-#include <common/blas.h>
-#include <starpu_mpi.h>
-
-#define BLAS3_FLOP(n1,n2,n3)    \
-        (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
-
-//#define SINGLE_TMP11	1
-//#define SINGLE_TMP1221	1
-
-struct debug_info {
-	unsigned i;
-	unsigned j;
-	unsigned k;
-};
-
-double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
-
-TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
-void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);
-
-unsigned STARPU_PLU(display_flag)(void);
-
-void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
-void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
-starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j);
-TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j);
-#ifdef SINGLE_TMP11
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void);
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k);
-#endif
-#ifdef SINGLE_TMP1221
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j);
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i);
-#else
-starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k);
-starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k);
-#endif
-
-void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize);
-
-int get_block_rank(unsigned i, unsigned j);
-
-#endif // __PXLU_H__

+ 0 - 444
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -1,444 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "pxlu.h"
-#include "pxlu_kernels.h"
-#include <math.h>
-
-///#define VERBOSE_KERNELS	1
-
-/*
- *   U22 
- */
-
-static inline void STARPU_PLU(common_u22)(void *descr[],
-				int s, __attribute__((unused)) void *_args)
-{
-	TYPE *right 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-	unsigned dx = STARPU_MATRIX_GET_NX(descr[2]);
-	unsigned dy = STARPU_MATRIX_GET_NY(descr[2]);
-	unsigned dz = STARPU_MATRIX_GET_NY(descr[0]);
-
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-	unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]);
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 22 %d - k = %d i = %d j = %d\n", rank, info->k, info->i, info->j);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-	cudaError_t cures;
-#endif
-
-	switch (s) {
-		case 0:
-			CPU_GEMM("N", "N", dy, dx, dz, 
-				(TYPE)-1.0, right, ld21, left, ld12,
-				(TYPE)1.0, center, ld22);
-			break;
-
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_GEMM('n', 'n', dx, dy, dz,
-				(TYPE)-1.0, right, ld21, left, ld12,
-				(TYPE)1.0f, center, ld22);
-
-			status = cublasGetError();
-			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			if (STARPU_UNLIKELY((cures = cudaStreamSynchronize(starpu_cuda_get_local_stream())) != cudaSuccess))
-				STARPU_CUDA_REPORT_ERROR(cures);
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-#ifdef VERBOSE_KERNELS
-	fprintf(stderr, "KERNEL 22 %d - k = %d i = %d j = %d done\n", rank, info->k, info->i, info->j);
-#endif
-}
-
-static void STARPU_PLU(cpu_u22)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u22)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u22)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u22)(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_22) = {
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_22_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_22)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl22) = {
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u22), NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u22), NULL},
-#endif
-	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_22)
-};
-
-
-/*
- * U12
- */
-
-static inline void STARPU_PLU(common_u12)(void *descr[],
-				int s, __attribute__((unused)) void *_args)
-{
-	TYPE *sub11;
-	TYPE *sub12;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);	
-	sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]);
-	unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]);
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-#warning fixed debugging according to other tweak
-	//fprintf(stderr, "KERNEL 12 %d - k = %d i %d\n", rank, info->k, info->i);
-	fprintf(stderr, "KERNEL 21 %d - k = %d i %d\n", rank, info->k, info->j);
-
-	//fprintf(stderr, "INPUT 12 U11\n");
-	fprintf(stderr, "INPUT 21 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx12);
-	//fprintf(stderr, "INPUT 12 U12\n");
-	fprintf(stderr, "INPUT 21 U21\n");
-	STARPU_PLU(display_data_content)(sub12, nx12);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-	cudaError_t cures;
-#endif
-
-	/* solve L11 U12 = A12 (find U12) */
-	switch (s) {
-		case 0:
-			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
-					(TYPE)1.0, sub11, ld11, sub12, ld12);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_TRSM('L', 'L', 'N', 'N', ny12, nx12,
-					(TYPE)1.0, sub11, ld11, sub12, ld12);
-
-			status = cublasGetError();
-			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			if (STARPU_UNLIKELY((cures = cudaStreamSynchronize(starpu_cuda_get_local_stream())) != cudaSuccess))
-				STARPU_CUDA_REPORT_ERROR(cures);
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-
-#ifdef VERBOSE_KERNELS
-	//fprintf(stderr, "OUTPUT 12 U12\n");
-	fprintf(stderr, "OUTPUT 21 U21\n");
-	STARPU_PLU(display_data_content)(sub12, nx12);
-#endif
-}
-
-static void STARPU_PLU(cpu_u12)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u12)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u12)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u12)(descr, 1, _args);
-}
-#endif // STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_12) = {
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_12_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_12)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl12) = {
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u12), NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u12), NULL},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_12)
-};
-
-
-/* 
- * U21
- */
-
-static inline void STARPU_PLU(common_u21)(void *descr[],
-				int s, __attribute__((unused)) void *_args)
-{
-	TYPE *sub11;
-	TYPE *sub21;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]);
-
-	unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]);
-	unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]);
-	
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-#warning fixed debugging according to other tweak
-	//fprintf(stderr, "KERNEL 21 %d (k = %d, i = %d)\n", rank, info->k, info->i);
-	fprintf(stderr, "KERNEL 12 %d (k = %d, j = %d)\n", rank, info->k, info->j);
-
-	//fprintf(stderr, "INPUT 21 U11\n");
-	fprintf(stderr, "INPUT 12 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx21);
-	//fprintf(stderr, "INPUT 21 U21\n");
-	fprintf(stderr, "INPUT 12 U12\n");
-	STARPU_PLU(display_data_content)(sub21, nx21);
-#endif
-
-#ifdef STARPU_USE_CUDA
-	cublasStatus status;
-#endif
-
-
-	switch (s) {
-		case 0:
-			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
-					(TYPE)1.0, sub11, ld11, sub21, ld21);
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			CUBLAS_TRSM('R', 'U', 'N', 'U', ny21, nx21,
-					(TYPE)1.0, sub11, ld11, sub21, ld21);
-
-			status = cublasGetError();
-			if (status != CUBLAS_STATUS_SUCCESS)
-				STARPU_CUBLAS_REPORT_ERROR(status);
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-
-#ifdef VERBOSE_KERNELS
-	//fprintf(stderr, "OUTPUT 21 U11\n");
-	fprintf(stderr, "OUTPUT 12 U11\n");
-	STARPU_PLU(display_data_content)(sub11, nx21);
-	//fprintf(stderr, "OUTPUT 21 U21\n");
-	fprintf(stderr, "OUTPUT 12 U12\n");
-	STARPU_PLU(display_data_content)(sub21, nx21);
-#endif
-}
-
-static void STARPU_PLU(cpu_u21)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u21)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u21)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u21)(descr, 1, _args);
-}
-#endif 
-
-static struct starpu_perfmodel STARPU_PLU(model_21) = {
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_21_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_21)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl21) = {
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u21), NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u21), NULL},
-#endif
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &STARPU_PLU(model_21)
-};
-
-
-/*
- *	U11
- */
-
-static inline void STARPU_PLU(common_u11)(void *descr[],
-				int s, __attribute__((unused)) void *_args)
-{
-	TYPE *sub11;
-
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); 
-
-	unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]);
-	unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]);
-
-	unsigned long z;
-
-#ifdef VERBOSE_KERNELS
-	struct debug_info *info = _args;
-
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 11 %d - k = %d\n", rank, info->k);
-#endif
-
-	switch (s) {
-		case 0:
-			for (z = 0; z < nx; z++)
-			{
-				TYPE pivot;
-				pivot = sub11[z+z*ld];
-				STARPU_ASSERT(pivot != 0.0);
-		
-				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
-		
-				CPU_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
-			}
-			break;
-#ifdef STARPU_USE_CUDA
-		case 1:
-			for (z = 0; z < nx; z++)
-			{
-				TYPE pivot;
-				cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
-				cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-				STARPU_ASSERT(pivot != 0.0);
-				
-				CUBLAS_SCAL(nx - z - 1, 1.0/pivot, &sub11[z+(z+1)*ld], ld);
-				
-				CUBLAS_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
-			}
-			
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
-
-			break;
-#endif
-		default:
-			STARPU_ABORT();
-			break;
-	}
-#ifdef VERBOSE_KERNELS
-	fprintf(stderr, "KERNEL 11 %d - k = %d\n", rank, info->k);
-#endif
-}
-
-static void STARPU_PLU(cpu_u11)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u11)(descr, 0, _args);
-}
-
-#ifdef STARPU_USE_CUDA
-static void STARPU_PLU(cublas_u11)(void *descr[], void *_args)
-{
-	STARPU_PLU(common_u11)(descr, 1, _args);
-}
-#endif// STARPU_USE_CUDA
-
-static struct starpu_perfmodel STARPU_PLU(model_11) = {
-	.type = STARPU_HISTORY_BASED,
-#ifdef STARPU_ATLAS
-	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
-#elif defined(STARPU_GOTO)
-	.symbol = STARPU_PLU_STR(lu_model_11_goto)
-#else
-	.symbol = STARPU_PLU_STR(lu_model_11)
-#endif
-};
-
-struct starpu_codelet STARPU_PLU(cl11) = {
-	.where = STARPU_CPU|STARPU_CUDA,
-	.cpu_funcs = {STARPU_PLU(cpu_u11), NULL},
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {STARPU_PLU(cublas_u11), NULL},
-#endif
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-	.model = &STARPU_PLU(model_11)
-};
-
-

+ 0 - 32
mpi/examples/mpi_lu/pxlu_kernels.h

@@ -1,32 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __PXLU_KERNELS_H__
-#define __PXLU_KERNELS_H__
-
-#include <starpu.h>
-
-#define str(s) #s
-#define xstr(s)        str(s)
-#define STARPU_PLU_STR(name)  xstr(STARPU_PLU(name))
-
-struct starpu_codelet STARPU_PLU(cl11);
-struct starpu_codelet STARPU_PLU(cl12);
-struct starpu_codelet STARPU_PLU(cl21);
-struct starpu_codelet STARPU_PLU(cl22);
-
-#endif // __PXLU_KERNELS_H__

+ 0 - 19
mpi/examples/mpi_lu/slu_kernels.c

@@ -1,19 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "mpi_lu-float.h"
-#include "xlu_kernels.c"

+ 0 - 106
mpi/examples/perf.sh

@@ -1,106 +0,0 @@
-#!/bin/bash
-
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-# 
-# Copyright (C) 2010  Université de Bordeaux 1
-# Copyright (C) 2010  Centre National de la Recherche Scientifique
-# 
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-# 
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# 
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-# 4G x np = 4 * (k*1K) ^ 2
-# A G * np = 4 * k^2 * 1M
-# A * 250 * np = k^2
-# A = 6
-# k = sqrt(1500*np)
-# np = 1 => k = 32
-# np = 2 => k = 48
-# np = 3 => k = 64 
-# np = 4 => k = 64
-
-# Problem size
-NBLOCKS=16
-BLOCKSIZE=1024
-SIZE=$(($NBLOCKS*$BLOCKSIZE))
-
-echo "JOB ID ${PBS_JOBID}"
-
-nnodes=$(cat machinefile.${PBS_JOBID}|wc -l)
-echo "got $nnodes mpi nodes"
-
-# Calibrate
-ncalibrate=0
-for i in `seq 1 $ncalibrate`
-do
-echo "STARPU_CALIBRATE $i/$ncalibrate"
-STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
-done
-
-func()
-{
-ngpus=$1
-np=$2
-p=$3
-q=$4
-nblocks=$5
-
-echo "*******************************************"> log
-echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log
-echo "*******************************************">> log
-cat log
-cat log >> log.all
-
-STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
-cat log.out > log
-cat log.err >> log
-cat log
-cat log >> log.all
-}
-
-rm -f log.all
-
-#how many time do we repeat each experiment ?
-nloops=3
-
-per_node_max_memory=7000
-
-for np in 1 2 4
-do
-	for nblocks in 16 32 48 64 80
-	do
-		for ngpus_per_node in 1 2 3 4
-		do
-			for loop in `seq 1 $nloops`
-			do
-				# Compute p and q from np
-				case $np in
-				  1) p=1; q=1;;
-				  2) p=2; q=1;;
-				  4) p=2; q=2;;
-				  *) echo -n "does not support $np nodes yet";;
-				esac
-
-				# Does the problem fit into memory ?
-				matrix_size=$(($nblocks * $BLOCKSIZE))
-				per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np))
-
-				echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks"
-
-				if test $per_node_memory -ge $per_node_max_memory; then
-						echo "Problem is too large !"
-				else
-					func $ngpus_per_node $np $p $q $nblocks
-					echo "go !"
-				fi
-			done
-		done
-	done
-done

+ 0 - 156
mpi/examples/reduction/mpi_reduction.c

@@ -1,156 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-
-extern void init_cpu_func(void *descr[], void *cl_arg);
-extern void redux_cpu_func(void *descr[], void *cl_arg);
-extern void dot_cpu_func(void *descr[], void *cl_arg);
-
-static struct starpu_codelet init_codelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {init_cpu_func, NULL},
-	.nbuffers = 1,
-	.name = "init_codelet"
-};
-
-static struct starpu_codelet redux_codelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {redux_cpu_func, NULL},
-	.nbuffers = 2,
-	.name = "redux_codelet"
-};
-
-static struct starpu_codelet dot_codelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {dot_cpu_func, NULL},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_REDUX},
-	.name = "dot_codelet"
-};
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int nb_nodes)
-{
-	return x % nb_nodes;
-}
-
-int main(int argc, char **argv)
-{
-        int my_rank, size, x, y;
-        long int *vector;
-	long int dot, sum=0;
-        starpu_data_handle_t *handles;
-	starpu_data_handle_t dot_handle;
-
-	int nb_elements, step;
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	starpu_mpi_initialize_extended(&my_rank, &size);
-
-	nb_elements = size*8000;
-	step = 4;
-
-	vector = (long int *) malloc(nb_elements*sizeof(vector[0]));
-        for(x = 0; x < nb_elements; x+=step)
-	{
-		int mpi_rank = my_distrib(x/step, size);
-		if (mpi_rank == my_rank)
-		{
-			for(y=0 ; y<step ; y++)
-			{
-				vector[x+y] = x+y+1;
-			}
-		}
-        }
-	if (my_rank == 0) {
-		dot = 14;
-		sum = (nb_elements * (nb_elements + 1)) / 2;
-		sum+= dot;
-		starpu_variable_data_register(&dot_handle, 0, (uintptr_t)&dot, sizeof(dot));
-	}
-	else
-	{
-		starpu_variable_data_register(&dot_handle, -1, (uintptr_t)NULL, sizeof(dot));
-	}
-
-
-	handles = (starpu_data_handle_t *) malloc(nb_elements*sizeof(handles[0]));
-        for(x = 0; x < nb_elements; x+=step)
-	{
-		int mpi_rank = my_distrib(x/step, size);
-		if (mpi_rank == my_rank)
-		{
-			/* Owning data */
-			starpu_vector_data_register(&handles[x], 0, (uintptr_t)&(vector[x]), step, sizeof(vector[0]));
-		}
-		else
-		{
-			starpu_vector_data_register(&handles[x], -1, (uintptr_t)NULL, step, sizeof(vector[0]));
-		}
-		if (handles[x])
-		{
-			starpu_data_set_rank(handles[x], mpi_rank);
-			starpu_data_set_tag(handles[x], x);
-		}
-	}
-
-	starpu_data_set_rank(dot_handle, 0);
-	starpu_data_set_tag(dot_handle, nb_elements+1);
-	starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet);
-
-	for (x = 0; x < nb_elements; x+=step)
-	{
-		starpu_mpi_insert_task(MPI_COMM_WORLD,
-				       &dot_codelet,
-				       STARPU_R, handles[x],
-				       STARPU_REDUX, dot_handle,
-				       0);
-	}
-	starpu_mpi_redux_data(MPI_COMM_WORLD, dot_handle);
-
-        fprintf(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-
-        for(x = 0; x < nb_elements; x+=step)
-	{
-		if (handles[x]) starpu_data_unregister(handles[x]);
-	}
-	if (dot_handle)
-	{
-		starpu_data_unregister(dot_handle);
-	}
-	free(vector);
-	free(handles);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	if (my_rank == 0)
-	{
-                fprintf(stderr, "[%d] sum=%ld\n", my_rank, sum);
-                fprintf(stderr, "[%d] dot=%ld\n", my_rank, dot);
-		fprintf(stderr, "%s when computing reduction\n", (sum == dot) ? "Success" : "Error");
-        }
-
-	return 0;
-}
-

+ 0 - 66
mpi/examples/reduction/mpi_reduction_kernels.c

@@ -1,66 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <mpi.h>
-
-#define _DISPLAY(fmt, args ...) do { \
-		int _display_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_display_rank);	\
-		fprintf(stderr, "[%d][%s] " fmt , _display_rank, __func__ ,##args); 	\
-		fflush(stderr); } while(0)
-
-/*
- *	Codelet to create a neutral element
- */
-void init_cpu_func(void *descr[], void *cl_arg)
-{
-	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	*dot = 0;
-	_DISPLAY("Init dot\n");
-}
-
-/*
- *	Codelet to perform the reduction of two elements
- */
-void redux_cpu_func(void *descr[], void *cl_arg)
-{
-	long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-	*dota = *dota + *dotb;
-	_DISPLAY("Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb);
-}
-
-/*
- *	Dot product codelet
- */
-void dot_cpu_func(void *descr[], void *cl_arg)
-{
-	long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]);
-	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
-
-	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-//	_DISPLAY("Before dot=%ld (adding %d elements...)\n", *dot, n);
-	unsigned i;
-	for (i = 0; i < n; i++)
-	{
-//		_DISPLAY("Adding %ld\n", local_x[i]);
-		*dot += local_x[i];
-	}
-//	_DISPLAY("After dot=%ld\n", *dot);
-}
-

+ 0 - 228
mpi/examples/scatter_gather/mpi_scatter_gather.c

@@ -1,228 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-        return (x+y) % nb_nodes;
-}
-
-void cpu_codelet(void *descr[], void *_args)
-{
-	float *block;
-	unsigned nx = STARPU_MATRIX_GET_NY(descr[0]);
-	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
-	unsigned i,j;
-	int rank;
-	float factor;
-
-	block = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
-        starpu_codelet_unpack_args(_args, &rank);
-	factor = block[0];
-
-	//fprintf(stderr,"rank %d factor %f\n", rank, factor);
-	for (j = 0; j < nx; j++)
-	{
-		for (i = 0; i < nx; i++)
-		{
-			//fprintf(stderr,"rank %d factor %f --> %f %f\n", rank, factor, block[j+i*ld], block[j+i*ld]*factor);
-			block[j+i*ld] *= factor;
-		}
-	}
-}
-
-static struct starpu_codelet cl =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {cpu_codelet, NULL},
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-};
-
-int main(int argc, char **argv)
-{
-        int rank, nodes;
-	float ***bmat = NULL;
-        starpu_data_handle_t *data_handles;
-
-	unsigned i,j,x,y;
-
-	unsigned nblocks=4;
-	unsigned block_size=2;
-	unsigned size = nblocks*block_size;
-	unsigned ld = size / nblocks;
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	starpu_mpi_initialize_extended(&rank, &nodes);
-
-	if (rank == 0)
-	{
-		/* Allocate the matrix */
-		int block_number=10;
-		bmat = malloc(nblocks * sizeof(float *));
-		for(x=0 ; x<nblocks ; x++)
-		{
-			bmat[x] = malloc(nblocks * sizeof(float *));
-			for(y=0 ; y<nblocks ; y++)
-			{
-				float value=0.0;
-				starpu_malloc((void **)&bmat[x][y], block_size*block_size*sizeof(float));
-				for (i = 0; i < block_size; i++)
-				{
-					for (j = 0; j < block_size; j++)
-					{
-						bmat[x][y][j +i*block_size] = block_number + value;
-						value++;
-					}
-				}
-				block_number += 10;
-			}
-		}
-	}
-
-#if 0
-	// Print matrix
-	if (rank == 0)
-	{
-		fprintf(stderr, "Input matrix\n");
-		for(x=0 ; x<nblocks ; x++)
-		{
-			for(y=0 ; y<nblocks ; y++)
-			{
-				for (j = 0; j < block_size; j++)
-				{
-					for (i = 0; i < block_size; i++)
-					{
-						fprintf(stderr, "%2.2f\t", bmat[x][y][j+i*block_size]);
-					}
-					fprintf(stderr,"\n");
-				}
-				fprintf(stderr,"\n");
-			}
-		}
-	}
-#endif
-
-	/* Allocate data handles and register data to StarPU */
-        data_handles = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t *));
-        for(x = 0; x < nblocks ;  x++)
-	{
-                for (y = 0; y < nblocks; y++)
-		{
-			int mpi_rank = my_distrib(x, y, nodes);
-			if (rank == 0)
-			{
-				starpu_matrix_data_register(&data_handles[x+y*nblocks], 0, (uintptr_t)bmat[x][y],
-							    ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-			else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1)))
-			{
-				/* I own that index, or i will need it for my computations */
-				//fprintf(stderr, "[%d] Owning or neighbor of data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x+y*nblocks], -1, (uintptr_t)NULL,
-							    ld, size/nblocks, size/nblocks, sizeof(float));
-			}
-			else
-			{
-				/* I know it's useless to allocate anything for this */
-				data_handles[x+y*nblocks] = NULL;
-			}
-                        if (data_handles[x+y*nblocks])
-			{
-                                starpu_data_set_rank(data_handles[x+y*nblocks], mpi_rank);
-                                starpu_data_set_tag(data_handles[x+y*nblocks], (y*nblocks)+x);
-			}
-                }
-        }
-
-	/* Scatter the matrix among the nodes */
-	starpu_mpi_scatter_detached(data_handles, nblocks*nblocks, 0, MPI_COMM_WORLD);
-
-	/* Calculation */
-	for(x = 0; x < nblocks*nblocks ;  x++)
-	{
-		if (data_handles[x])
-		{
-			int owner = starpu_data_get_rank(data_handles[x]);
-			if (owner == rank)
-			{
-				//fprintf(stderr,"[%d] Computing on data[%d]\n", rank, x);
-				starpu_insert_task(&cl,
-						   STARPU_VALUE, &rank, sizeof(rank),
-						   STARPU_RW, data_handles[x],
-						   0);
-			}
-		}
-	}
-
-	/* Gather the matrix on main node */
-	starpu_mpi_gather_detached(data_handles, nblocks*nblocks, 0, MPI_COMM_WORLD);
-
-	/* Unregister matrix from StarPU */
-	for(x=0 ; x<nblocks*nblocks ; x++)
-	{
-		if (data_handles[x])
-		{
-			starpu_data_unregister(data_handles[x]);
-		}
-	}
-
-#if 0
-	// Print matrix
-	if (rank == 0)
-	{
-		fprintf(stderr, "Output matrix\n");
-		for(x=0 ; x<nblocks ; x++)
-		{
-			for(y=0 ; y<nblocks ; y++)
-			{
-				for (j = 0; j < block_size; j++)
-				{
-					for (i = 0; i < block_size; i++)
-					{
-						fprintf(stderr, "%2.2f\t", bmat[x][y][j+i*block_size]);
-					}
-					fprintf(stderr,"\n");
-				}
-				fprintf(stderr,"\n");
-			}
-		}
-	}
-#endif
-
-	// Free memory
-        free(data_handles);
-	if (rank == 0)
-	{
-		for(x=0 ; x<nblocks ; x++)
-		{
-			for(y=0 ; y<nblocks ; y++)
-			{
-				starpu_free((void *)bmat[x][y]);
-			}
-			free(bmat[x]);
-		}
-		free(bmat);
-	}
-
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-	return 0;
-}

+ 0 - 159
mpi/examples/stencil/stencil5.c

@@ -1,159 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-
-void stencil5_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *xy = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	unsigned *xm1y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
-	unsigned *xp1y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[2]);
-	unsigned *xym1 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[3]);
-	unsigned *xyp1 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[4]);
-
-        //        fprintf(stdout, "VALUES: %d %d %d %d %d\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-        *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
-}
-
-struct starpu_codelet stencil5_cl =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {stencil5_cpu, NULL},
-        .nbuffers = 5,
-	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
-};
-
-#define NITER_DEF 500
-#define X         20
-#define Y         20
-
-int display = 0;
-int niter = NITER_DEF;
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-	/* Block distrib */
-	return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
-}
-
-
-static void parse_args(int argc, char **argv)
-{
-	int i;
-	for (i = 1; i < argc; i++)
-	{
-		if (strcmp(argv[i], "-iter") == 0)
-		{
-			char *argptr;
-			niter = strtol(argv[++i], &argptr, 10);
-		}
-		if (strcmp(argv[i], "-display") == 0)
-		{
-			display = 1;
-		}
-	}
-}
-
-int main(int argc, char **argv)
-{
-        int my_rank, size, x, y, loop;
-        int value=0, mean=0;
-        unsigned matrix[X][Y];
-        starpu_data_handle_t data_handles[X][Y];
-
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	starpu_mpi_initialize_extended(&my_rank, &size);
-        parse_args(argc, argv);
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        matrix[x][y] = (my_rank+1)*10 + value;
-                        value++;
-                        mean += matrix[x][y];
-                }
-        }
-        mean /= value;
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        int mpi_rank = my_distrib(x, y, size);
-                        if (mpi_rank == my_rank)
-			{
-                                //fprintf(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
-                        }
-			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
-			      || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
-			{
-                                /* I don't own that index, but will need it for my computations */
-                                //fprintf(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
-                        }
-                        else
-			{
-                                /* I know it's useless to allocate anything for this */
-                                data_handles[x][y] = NULL;
-                        }
-                        if (data_handles[x][y])
-			{
-                                starpu_data_set_rank(data_handles[x][y], mpi_rank);
-                                starpu_data_set_tag(data_handles[x][y], (y*X)+x);
-			}
-                }
-        }
-
-        for(loop=0 ; loop<niter; loop++)
-	{
-                for (x = 1; x < X-1; x++)
-		{
-                        for (y = 1; y < Y-1; y++)
-			{
-                                starpu_mpi_insert_task(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
-                                                       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
-                                                       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
-                                                       0);
-                        }
-                }
-        }
-        fprintf(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-        if (display)
-	{
-                fprintf(stdout, "[%d] mean=%d\n", my_rank, mean);
-                for(x = 0; x < X; x++)
-		{
-                        fprintf(stdout, "[%d] ", my_rank);
-                        for (y = 0; y < Y; y++)
-			{
-                                fprintf(stdout, "%3u ", matrix[x][y]);
-                        }
-                        fprintf(stdout, "\n");
-                }
-        }
-
-	return 0;
-}

+ 0 - 70
mpi/include/starpu_mpi.h

@@ -1,70 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_H__
-#define __STARPU_MPI_H__
-
-#include <starpu.h>
-
-#if defined(STARPU_USE_MPI)
-
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void *starpu_mpi_req;
-
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status);
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
-int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
-int starpu_mpi_barrier(MPI_Comm comm);
-int starpu_mpi_initialize(void);
-int starpu_mpi_initialize_extended(int *rank, int *world_size);
-int starpu_mpi_shutdown(void);
-
-int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
-void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg);
-void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle);
-
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm);
-int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm);
-
-/* Some helper functions */
-
-/* When the transfer is completed, the tag is unlocked */
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-
-/* Asynchronously send an array of buffers, and unlocks the tag once all of
- * them are transmitted. */
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // STARPU_USE_MPI
-#endif // __STARPU_MPI_H__

+ 0 - 29
mpi/libstarpumpi.pc.in

@@ -1,29 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2011  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: starpumpi
-Description: offers MPI support for heterogeneous multicore architecture
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ -DSTARPU_USE_DEPRECATED_API
-Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@
-Libs.private: @LDFLAGS@ @LIBS@
-Requires: libstarpu
-Requires.private:

+ 0 - 51
mpi/src/Makefile.am

@@ -1,51 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-CC=$(MPICC)
-CCLD=$(MPICC)
-
-BUILT_SOURCES =
-
-CLEANFILES = *.gcno *.gcda *.linkinfo
-
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS)
-
-lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined					\
-  -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) \
-  $(MPICC_LDFLAGS) $(FXT_LDFLAGS)
-noinst_HEADERS =					\
-	starpu_mpi_private.h				\
-	starpu_mpi_fxt.h				\
-	starpu_mpi_stats.h				\
-	starpu_mpi_datatype.h
-
-libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
-	starpu_mpi.c					\
-	starpu_mpi_helper.c				\
-	starpu_mpi_datatype.c				\
-	starpu_mpi_insert_task.c			\
-	starpu_mpi_collective.c				\
-	starpu_mpi_stats.c
-
-
-showcheck:
-	-cat /dev/null

+ 0 - 867
mpi/src/starpu_mpi.c

@@ -1,867 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <starpu_mpi.h>
-#include <starpu_mpi_datatype.h>
-//#define STARPU_MPI_VERBOSE	1
-#include <starpu_mpi_private.h>
-#include <starpu_profiling.h>
-#include <starpu_mpi_stats.h>
-
-/* TODO find a better way to select the polling method (perhaps during the
- * configuration) */
-//#define USE_STARPU_ACTIVITY	1
-
-static void submit_mpi_req(void *arg);
-static void handle_request_termination(struct _starpu_mpi_req *req);
-
-/* The list of requests that have been newly submitted by the application */
-static struct _starpu_mpi_req_list *new_requests;
-
-/* The list of detached requests that have already been submitted to MPI */
-static struct _starpu_mpi_req_list *detached_requests;
-static pthread_mutex_t detached_requests_mutex;
-
-/* Condition to wake up progression thread */
-static pthread_cond_t cond_progression;
-/* Condition to wake up waiting for all current MPI requests to finish */
-static pthread_cond_t cond_finished;
-static pthread_mutex_t mutex;
-static pthread_t progress_thread;
-static int running = 0;
-
-/* Count requests posted by the application and not yet submitted to MPI, i.e pushed into the new_requests list */
-static pthread_mutex_t mutex_posted_requests;
-static int posted_requests = 0, newer_requests, barrier_running = 0;
-
-#define INC_POSTED_REQUESTS(value) { _STARPU_PTHREAD_MUTEX_LOCK(&mutex_posted_requests); posted_requests += value; _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_posted_requests); }
-
-/*
- *	Isend
- */
-
-static void starpu_mpi_isend_func(struct _starpu_mpi_req *req)
-{
-	int count;
-
-        _STARPU_MPI_LOG_IN();
-
-	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype, &count);
-	if (req->needs_unpacking)
-		starpu_handle_pack_data(req->data_handle, &req->ptr);
-	else
-		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
-	STARPU_ASSERT(req->ptr);
-
-        _STARPU_MPI_DEBUG("post MPI isend tag %d dst %d ptr %p datatype %p count %d req %p\n", req->mpi_tag, req->srcdst, req->ptr, req->datatype, count, &req->request);
-
-	_starpu_mpi_comm_amounts_inc(req->comm, req->srcdst, req->datatype, count);
-
-        req->ret = MPI_Isend(req->ptr, count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
-        STARPU_ASSERT(req->ret == MPI_SUCCESS);
-
-	TRACE_MPI_ISEND(req->srcdst, req->mpi_tag, 0);
-
-	/* somebody is perhaps waiting for the MPI request to be posted */
-	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
-	req->submitted = 1;
-	_STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
-							int dest, int mpi_tag, MPI_Comm comm,
-							unsigned detached, void (*callback)(void *), void *arg)
-{
-	struct _starpu_mpi_req *req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(req);
-
-        _STARPU_MPI_LOG_IN();
-
-        INC_POSTED_REQUESTS(1);
-
-	/* Initialize the request structure */
-	req->submitted = 0;
-	req->completed = 0;
-	_STARPU_PTHREAD_MUTEX_INIT(&req->req_mutex, NULL);
-	_STARPU_PTHREAD_COND_INIT(&req->req_cond, NULL);
-
-	req->request_type = SEND_REQ;
-
-	req->data_handle = data_handle;
-	req->srcdst = dest;
-	req->mpi_tag = mpi_tag;
-	req->comm = comm;
-	req->func = starpu_mpi_isend_func;
-
-	req->detached = detached;
-	req->callback = callback;
-	req->callback_arg = arg;
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, submit_mpi_req(req) is called and
-	 * the request is actually submitted  */
-	starpu_data_acquire_cb(data_handle, STARPU_R, submit_mpi_req, (void *)req);
-
-        _STARPU_MPI_LOG_OUT();
-	return req;
-}
-
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
-{
-        _STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(public_req);
-
-	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL);
-
-	STARPU_ASSERT(req);
-	*public_req = req;
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-/*
- *	Isend (detached)
- */
-
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle,
-				int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-        _STARPU_MPI_LOG_IN();
-	_starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 1, callback, arg);
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-/*
- *	Irecv
- */
-
-static void starpu_mpi_irecv_func(struct _starpu_mpi_req *req)
-{
-	int count;
-
-        _STARPU_MPI_LOG_IN();
-
-	req->needs_unpacking = starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype, &count);
-	if (req->needs_unpacking == 1)
-		req->ptr = malloc(count);
-	else
-		req->ptr = starpu_handle_get_local_ptr(req->data_handle);
-	STARPU_ASSERT(req->ptr);
-
-	_STARPU_MPI_DEBUG("post MPI irecv tag %d src %d data %p ptr %p req %p datatype %p\n", req->mpi_tag, req->srcdst, req->data_handle, req->ptr, &req->request, req->datatype);
-
-        req->ret = MPI_Irecv(req->ptr, count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
-        STARPU_ASSERT(req->ret == MPI_SUCCESS);
-
-	/* somebody is perhaps waiting for the MPI request to be posted */
-	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
-	req->submitted = 1;
-	_STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
-{
-        _STARPU_MPI_LOG_IN();
-	struct _starpu_mpi_req *req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(req);
-
-        INC_POSTED_REQUESTS(1);
-
-	/* Initialize the request structure */
-	req->submitted = 0;
-	_STARPU_PTHREAD_MUTEX_INIT(&req->req_mutex, NULL);
-	_STARPU_PTHREAD_COND_INIT(&req->req_cond, NULL);
-
-	req->request_type = RECV_REQ;
-
-	req->data_handle = data_handle;
-	req->srcdst = source;
-	req->mpi_tag = mpi_tag;
-	req->comm = comm;
-
-	req->detached = detached;
-	req->callback = callback;
-	req->callback_arg = arg;
-
-	req->func = starpu_mpi_irecv_func;
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, submit_mpi_req(req) is called and
-	 * the request is actually submitted  */
-	starpu_data_acquire_cb(data_handle, STARPU_W, submit_mpi_req, (void *)req);
-
-        _STARPU_MPI_LOG_OUT();
-	return req;
-}
-
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
-{
-        _STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(public_req);
-
-	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL);
-
-	STARPU_ASSERT(req);
-	*public_req = req;
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-/*
- *	Irecv (detached)
- */
-
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-{
-        _STARPU_MPI_LOG_IN();
-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg);
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-
-/*
- *	Recv
- */
-
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
-{
-	starpu_mpi_req req;
-
-        _STARPU_MPI_LOG_IN();
-	starpu_mpi_irecv(data_handle, &req, source, mpi_tag, comm);
-	starpu_mpi_wait(&req, status);
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-/*
- *	Send
- */
-
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
-{
-	starpu_mpi_req req;
-	MPI_Status status;
-
-        _STARPU_MPI_LOG_IN();
-	memset(&status, 0, sizeof(MPI_Status));
-
-	starpu_mpi_isend(data_handle, &req, dest, mpi_tag, comm);
-	starpu_mpi_wait(&req, &status);
-
-        _STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-/*
- *	Wait
- */
-
-static void starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
-{
-        _STARPU_MPI_LOG_IN();
-	/* Which is the mpi request we are waiting for ? */
-	struct _starpu_mpi_req *req = waiting_req->other_request;
-
-	req->ret = MPI_Wait(&req->request, waiting_req->status);
-        STARPU_ASSERT(req->ret == MPI_SUCCESS);
-
-	handle_request_termination(req);
-        _STARPU_MPI_LOG_OUT();
-}
-
-int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
-{
-        _STARPU_MPI_LOG_IN();
-	int ret;
-	struct _starpu_mpi_req *waiting_req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(waiting_req);
-	struct _starpu_mpi_req *req = *public_req;
-
-        INC_POSTED_REQUESTS(1);
-
-	/* We cannot try to complete a MPI request that was not actually posted
-	 * to MPI yet. */
-	_STARPU_PTHREAD_MUTEX_LOCK(&(req->req_mutex));
-	while (!(req->submitted))
-		_STARPU_PTHREAD_COND_WAIT(&(req->req_cond), &(req->req_mutex));
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&(req->req_mutex));
-
-	/* Initialize the request structure */
-	_STARPU_PTHREAD_MUTEX_INIT(&(waiting_req->req_mutex), NULL);
-	_STARPU_PTHREAD_COND_INIT(&(waiting_req->req_cond), NULL);
-	waiting_req->status = status;
-	waiting_req->other_request = req;
-	waiting_req->func = starpu_mpi_wait_func;
-	waiting_req->request_type = WAIT_REQ;
-
-	submit_mpi_req(waiting_req);
-
-	/* We wait for the MPI request to finish */
-	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
-	while (!req->completed)
-		_STARPU_PTHREAD_COND_WAIT(&req->req_cond, &req->req_mutex);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
-
-	ret = req->ret;
-
-	/* The internal request structure was automatically allocated */
-	*public_req = NULL;
-	free(req);
-
-        //free(waiting_req);
-        _STARPU_MPI_LOG_OUT();
-	return ret;
-}
-
-/*
- * 	Test
- */
-
-static void starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
-{
-        _STARPU_MPI_LOG_IN();
-	/* Which is the mpi request we are testing for ? */
-	struct _starpu_mpi_req *req = testing_req->other_request;
-
-        _STARPU_MPI_DEBUG("Test request %p - mpitag %d - TYPE %s %d\n", &req->request, req->mpi_tag, (req->request_type == RECV_REQ)?"recv : source":"send : dest", req->srcdst);
-	req->ret = MPI_Test(&req->request, testing_req->flag, testing_req->status);
-        STARPU_ASSERT(req->ret == MPI_SUCCESS);
-
-	if (*testing_req->flag)
-	{
-		testing_req->ret = req->ret;
-		handle_request_termination(req);
-	}
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&testing_req->req_mutex);
-	testing_req->completed = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&testing_req->req_cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->req_mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
-{
-        _STARPU_MPI_LOG_IN();
-	int ret = 0;
-
-	STARPU_ASSERT(public_req);
-
-	struct _starpu_mpi_req *req = *public_req;
-
-	STARPU_ASSERT(!req->detached);
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
-	unsigned submitted = req->submitted;
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
-
-	if (submitted)
-	{
-		struct _starpu_mpi_req *testing_req = calloc(1, sizeof(struct _starpu_mpi_req));
-                STARPU_ASSERT(testing_req);
-                //		memset(testing_req, 0, sizeof(struct _starpu_mpi_req));
-
-		/* Initialize the request structure */
-		_STARPU_PTHREAD_MUTEX_INIT(&(testing_req->req_mutex), NULL);
-		_STARPU_PTHREAD_COND_INIT(&(testing_req->req_cond), NULL);
-		testing_req->flag = flag;
-		testing_req->status = status;
-		testing_req->other_request = req;
-		testing_req->func = starpu_mpi_test_func;
-		testing_req->completed = 0;
-                testing_req->request_type = TEST_REQ;
-
-                INC_POSTED_REQUESTS(1);
-                submit_mpi_req(testing_req);
-
-		/* We wait for the test request to finish */
-		_STARPU_PTHREAD_MUTEX_LOCK(&(testing_req->req_mutex));
-		while (!(testing_req->completed))
-                        _STARPU_PTHREAD_COND_WAIT(&(testing_req->req_cond), &(testing_req->req_mutex));
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&(testing_req->req_mutex));
-
-		ret = testing_req->ret;
-
-		if (*(testing_req->flag))
-		{
-			/* The request was completed so we free the internal
-			 * request structure which was automatically allocated
-			 * */
-			*public_req = NULL;
-			free(req);
-		}
-	}
-	else {
-		*flag = 0;
-	}
-
-        _STARPU_MPI_LOG_OUT();
-	return ret;
-}
-
-/*
- *	Barrier
- */
-
-static void starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req)
-{
-        _STARPU_MPI_LOG_IN();
-
-	barrier_req->ret = MPI_Barrier(barrier_req->comm);
-        STARPU_ASSERT(barrier_req->ret == MPI_SUCCESS);
-
-	handle_request_termination(barrier_req);
-        _STARPU_MPI_LOG_OUT();
-}
-
-int starpu_mpi_barrier(MPI_Comm comm)
-{
-        _STARPU_MPI_LOG_IN();
-	int ret;
-	struct _starpu_mpi_req *barrier_req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(barrier_req);
-
-	/* First wait for *both* all tasks and MPI requests to finish, in case
-	 * some tasks generate MPI requests, MPI requests generate tasks, etc.
-	 */
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	STARPU_ASSERT_MSG(!barrier_running, "Concurrent starpu_mpi_barrier is not implemented, even on different communicators");
-	barrier_running = 1;
-	do {
-		while (posted_requests)
-			/* Wait for all current MPI requests to finish */
-			_STARPU_PTHREAD_COND_WAIT(&cond_finished, &mutex);
-		/* No current request, clear flag */
-		newer_requests = 0;
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-		/* Now wait for all tasks */
-		starpu_task_wait_for_all();
-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-		/* Check newer_requests again, in case some MPI requests
-		 * triggered by tasks completed and triggered tasks between
-		 * wait_for_all finished and we take the lock */
-	} while (posted_requests || newer_requests);
-	barrier_running = 0;
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-	/* Initialize the request structure */
-	_STARPU_PTHREAD_MUTEX_INIT(&(barrier_req->req_mutex), NULL);
-	_STARPU_PTHREAD_COND_INIT(&(barrier_req->req_cond), NULL);
-	barrier_req->func = starpu_mpi_barrier_func;
-	barrier_req->request_type = BARRIER_REQ;
-	barrier_req->comm = comm;
-
-        INC_POSTED_REQUESTS(1);
-	submit_mpi_req(barrier_req);
-
-	/* We wait for the MPI request to finish */
-	_STARPU_PTHREAD_MUTEX_LOCK(&barrier_req->req_mutex);
-	while (!barrier_req->completed)
-		_STARPU_PTHREAD_COND_WAIT(&barrier_req->req_cond, &barrier_req->req_mutex);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&barrier_req->req_mutex);
-
-	ret = barrier_req->ret;
-
-        //free(waiting_req);
-        _STARPU_MPI_LOG_OUT();
-	return ret;
-}
-
-/*
- *	Requests
- */
-
-#ifdef STARPU_MPI_VERBOSE
-static char *starpu_mpi_request_type(unsigned request_type)
-{
-        switch (request_type)
-                {
-                case SEND_REQ: return "send";
-                case RECV_REQ: return "recv";
-                case WAIT_REQ: return "wait";
-                case TEST_REQ: return "test";
-                case BARRIER_REQ: return "barrier";
-                default: return "unknown request type";
-                }
-}
-#endif
-
-static void handle_request_termination(struct _starpu_mpi_req *req)
-{
-        _STARPU_MPI_LOG_IN();
-
-	_STARPU_MPI_DEBUG("complete MPI (%s %d) data %p req %p - tag %d\n", starpu_mpi_request_type(req->request_type), req->srcdst, req->data_handle, &req->request, req->mpi_tag);
-        if (req->request_type != BARRIER_REQ) {
-		if (req->needs_unpacking)
-			starpu_handle_unpack_data(req->data_handle, req->ptr);
-		else
-			MPI_Type_free(&req->datatype);
-                starpu_data_release(req->data_handle);
-        }
-
-	if (req->request_type == RECV_REQ)
-	{
-		TRACE_MPI_IRECV_END(req->srcdst, req->mpi_tag);
-	}
-
-	/* Execute the specified callback, if any */
-	if (req->callback)
-		req->callback(req->callback_arg);
-
-	/* tell anyone potentiallly waiting on the request that it is
-	 * terminated now */
-	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
-	req->completed = 1;
-	_STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-static void submit_mpi_req(void *arg)
-{
-        _STARPU_MPI_LOG_IN();
-	struct _starpu_mpi_req *req = arg;
-
-        INC_POSTED_REQUESTS(-1);
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	_starpu_mpi_req_list_push_front(new_requests, req);
-	newer_requests = 1;
-        _STARPU_MPI_DEBUG("Pushing new request type %d\n", req->request_type);
-	_STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-/*
- *	Scheduler hook
- */
-
-#ifdef USE_STARPU_ACTIVITY
-static unsigned progression_hook_func(void *arg __attribute__((unused)))
-{
-	unsigned may_block = 1;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	if (!_starpu_mpi_req_list_empty(detached_requests))
-	{
-		_STARPU_PTHREAD_COND_SIGNAL(&cond_progression);
-		may_block = 0;
-	}
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-	return may_block;
-}
-#endif
-
-/*
- *	Progression loop
- */
-
-static void test_detached_requests(void)
-{
-        _STARPU_MPI_LOG_IN();
-	int flag;
-	MPI_Status status;
-	struct _starpu_mpi_req *req, *next_req;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
-
-	for (req = _starpu_mpi_req_list_begin(detached_requests);
-		req != _starpu_mpi_req_list_end(detached_requests);
-		req = next_req)
-	{
-		next_req = _starpu_mpi_req_list_next(req);
-
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
-
-                //_STARPU_MPI_DEBUG("Test detached request %p - mpitag %d - TYPE %s %d\n", &req->request, req->mpi_tag, (req->request_type == RECV_REQ)?"recv : source":"send : dest", req->srcdst);
-		req->ret = MPI_Test(&req->request, &flag, &status);
-		STARPU_ASSERT(req->ret == MPI_SUCCESS);
-
-		if (flag)
-		{
-			handle_request_termination(req);
-		}
-
-		_STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
-
-		if (flag)
-			_starpu_mpi_req_list_erase(detached_requests, req);
-
-#ifdef STARPU_DEVEL
-#warning TODO fix memleak
-#endif
-		/* Detached requests are automatically allocated by the lib */
-		//if (req->detached)
-		//	free(req);
-	}
-
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
-        _STARPU_MPI_LOG_OUT();
-}
-
-static void handle_new_request(struct _starpu_mpi_req *req)
-{
-        _STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(req);
-
-	/* submit the request to MPI */
-        _STARPU_MPI_DEBUG("Handling new request type %d\n", req->request_type);
-	req->func(req);
-
-	if (req->detached)
-	{
-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-		_starpu_mpi_req_list_push_front(detached_requests, req);
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-		starpu_wake_all_blocked_workers();
-
-		/* put the submitted request into the list of pending requests
-		 * so that it can be handled by the progression mechanisms */
-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-		_STARPU_PTHREAD_COND_SIGNAL(&cond_progression);
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-	}
-        _STARPU_MPI_LOG_OUT();
-}
-
-static void *progress_thread_func(void *arg)
-{
-        int initialize_mpi = *((int *) arg);
-
-        _STARPU_DEBUG("Initialize mpi: %d\n", initialize_mpi);
-
-        if (initialize_mpi) {
-#ifdef STARPU_DEVEL
-#warning get real argc and argv from the application
-#endif
-                int argc = 0;
-                char **argv = NULL;
-                int thread_support;
-                _STARPU_DEBUG("Calling MPI_Init_thread\n");
-                if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) {
-                        fprintf(stderr,"MPI_Init_thread failed\n");
-                        exit(1);
-                }
-                if (thread_support == MPI_THREAD_FUNNELED)
-                        fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
-                if (thread_support < MPI_THREAD_FUNNELED)
-                        fprintf(stderr,"Warning: MPI does not have thread support!\n");
-        }
-
-	/* notify the main thread that the progression thread is ready */
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	running = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&cond_progression);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	while (running || posted_requests || !(_starpu_mpi_req_list_empty(new_requests)) || !(_starpu_mpi_req_list_empty(detached_requests))) {
-		/* shall we block ? */
-		unsigned block = _starpu_mpi_req_list_empty(new_requests);
-
-#ifndef USE_STARPU_ACTIVITY
-		block = block && _starpu_mpi_req_list_empty(detached_requests);
-#endif
-
-		if (block)
-		{
-                        _STARPU_MPI_DEBUG("NO MORE REQUESTS TO HANDLE\n");
-			if (barrier_running)
-				/* Tell mpi_barrier */
-				_STARPU_PTHREAD_COND_SIGNAL(&cond_finished);
-			_STARPU_PTHREAD_COND_WAIT(&cond_progression, &mutex);
-		}
-
-		/* test whether there are some terminated "detached request" */
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-		test_detached_requests();
-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-
-		/* get one request */
-		struct _starpu_mpi_req *req;
-		while (!_starpu_mpi_req_list_empty(new_requests))
-		{
-			req = _starpu_mpi_req_list_pop_back(new_requests);
-
-			/* handling a request is likely to block for a while
-			 * (on a sync_data_with_mem call), we want to let the
-			 * application submit requests in the meantime, so we
-			 * release the lock.  */
-			_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-			handle_new_request(req);
-			_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-		}
-	}
-
-	STARPU_ASSERT(_starpu_mpi_req_list_empty(detached_requests));
-	STARPU_ASSERT(_starpu_mpi_req_list_empty(new_requests));
-        STARPU_ASSERT(posted_requests == 0);
-
-        if (initialize_mpi) {
-                _STARPU_MPI_DEBUG("Calling MPI_Finalize()\n");
-                MPI_Finalize();
-        }
-
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-	return NULL;
-}
-
-/*
- *	(De)Initialization methods
- */
-
-#ifdef USE_STARPU_ACTIVITY
-static int hookid = - 1;
-#endif
-
-static void _starpu_mpi_add_sync_point_in_fxt(void)
-{
-#ifdef STARPU_USE_FXT
-	int rank;
-	int worldsize;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
-
-	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	/* We generate a "unique" key so that we can make sure that different
-	 * FxT traces come from the same MPI run. */
-	int random_number;
-
-	/* XXX perhaps we don't want to generate a new seed if the application
-	 * specified some reproductible behaviour ? */
-	if (rank == 0)
-	{
-		srand(time(NULL));
-		random_number = rand();
-	}
-
-	MPI_Bcast(&random_number, 1, MPI_INT, 0, MPI_COMM_WORLD);
-
-	TRACE_MPI_BARRIER(rank, worldsize, random_number);
-
-        _STARPU_MPI_DEBUG("unique key %x\n", random_number);
-#endif
-}
-
-static
-int _starpu_mpi_initialize(int initialize_mpi, int *rank, int *world_size)
-{
-#ifndef STARPU_MPI_CACHE
-	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --disable-mpi-cache\n");
-#endif
-
-	_STARPU_PTHREAD_MUTEX_INIT(&mutex, NULL);
-	_STARPU_PTHREAD_COND_INIT(&cond_progression, NULL);
-	_STARPU_PTHREAD_COND_INIT(&cond_finished, NULL);
-	new_requests = _starpu_mpi_req_list_new();
-
-	_STARPU_PTHREAD_MUTEX_INIT(&detached_requests_mutex, NULL);
-	detached_requests = _starpu_mpi_req_list_new();
-
-        _STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
-
-	_STARPU_PTHREAD_CREATE(&progress_thread, NULL,
-			       progress_thread_func, (void *)&initialize_mpi);
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	while (!running)
-		_STARPU_PTHREAD_COND_WAIT(&cond_progression, &mutex);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-        if (rank && world_size) {
-                _STARPU_DEBUG("Calling MPI_Comm_rank\n");
-                MPI_Comm_rank(MPI_COMM_WORLD, rank);
-                MPI_Comm_size(MPI_COMM_WORLD, world_size);
-        }
-
-#ifdef STARPU_USE_FXT
-	int prank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &prank);
-	starpu_set_profiling_id(prank);
-#endif //STARPU_USE_FXT
-
-#ifdef USE_STARPU_ACTIVITY
-	hookid = starpu_progression_hook_register(progression_hook_func, NULL);
-	STARPU_ASSERT(hookid >= 0);
-#endif
-
-	_starpu_mpi_add_sync_point_in_fxt();
-	_starpu_mpi_comm_amounts_init(MPI_COMM_WORLD);
-	return 0;
-}
-
-int starpu_mpi_initialize(void)
-{
-        return _starpu_mpi_initialize(0, NULL, NULL);
-}
-
-int starpu_mpi_initialize_extended(int *rank, int *world_size)
-{
-        return _starpu_mpi_initialize(1, rank, world_size);
-}
-
-int starpu_mpi_shutdown(void)
-{
-	void *value;
-	int rank;
-
-	/* We need to get the  rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-	/* kill the progression thread */
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	running = 0;
-	_STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-
-	pthread_join(progress_thread, &value);
-
-#ifdef USE_STARPU_ACTIVITY
-	starpu_progression_hook_deregister(hookid);
-#endif
-
-	/* free the request queues */
-	_starpu_mpi_req_list_delete(detached_requests);
-	_starpu_mpi_req_list_delete(new_requests);
-
-	_starpu_mpi_comm_amounts_display(rank);
-	_starpu_mpi_comm_amounts_free();
-
-	return 0;
-}
-

+ 0 - 78
mpi/src/starpu_mpi_collective.c

@@ -1,78 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <mpi.h>
-#include <starpu.h>
-#include <starpu_mpi.h>
-
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm)
-{
-	int rank;
-	int x;
-
-	MPI_Comm_rank(comm, &rank);
-
-	for(x = 0; x < count ;  x++)
-	{
-		if (data_handles[x])
-		{
-			int owner = starpu_data_get_rank(data_handles[x]);
-			int mpi_tag = starpu_data_get_tag(data_handles[x]);
-			STARPU_ASSERT(mpi_tag >= 0);
-			if ((rank == root) && (owner != root))
-			{
-				//fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner);
-				starpu_mpi_isend_detached(data_handles[x], owner, mpi_tag, comm, NULL, NULL);
-			}
-			if ((rank != root) && (owner == rank))
-			{
-				//fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root);
-				starpu_mpi_irecv_detached(data_handles[x], root, mpi_tag, comm, NULL, NULL);
-			}
-		}
-	}
-	return 0;
-}
-
-int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm)
-{
-	int rank;
-	int x;
-
-	MPI_Comm_rank(comm, &rank);
-
-	for(x = 0; x < count ;  x++)
-	{
-		if (data_handles[x])
-		{
-			int owner = starpu_data_get_rank(data_handles[x]);
-			int mpi_tag = starpu_data_get_tag(data_handles[x]);
-			STARPU_ASSERT(mpi_tag >= 0);
-			if ((rank == root) && (owner != root))
-			{
-				//fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, owner);
-				starpu_mpi_irecv_detached(data_handles[x], owner, mpi_tag, comm, NULL, NULL);
-			}
-			if ((rank != root) && (owner == rank))
-			{
-				//fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, root);
-				starpu_mpi_isend_detached(data_handles[x], root, mpi_tag, comm, NULL, NULL);
-			}
-		}
-	}
-	return 0;
-}
-

+ 0 - 149
mpi/src/starpu_mpi_datatype.c

@@ -1,149 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi_datatype.h>
-
-typedef int (*handle_to_datatype_func)(starpu_data_handle_t, MPI_Datatype *);
-
-/*
- * 	Matrix
- */
-
-static int handle_to_datatype_matrix(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_matrix_get_nx(data_handle);
-	unsigned ny = starpu_matrix_get_ny(data_handle);
-	unsigned ld = starpu_matrix_get_local_ld(data_handle);
-	size_t elemsize = starpu_matrix_get_elemsize(data_handle);
-
-	ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	return 0;
-}
-
-/*
- * 	Block
- */
-
-static int handle_to_datatype_block(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_block_get_nx(data_handle);
-	unsigned ny = starpu_block_get_ny(data_handle);
-	unsigned nz = starpu_block_get_nz(data_handle);
-	unsigned ldy = starpu_block_get_local_ldy(data_handle);
-	unsigned ldz = starpu_block_get_local_ldz(data_handle);
-	size_t elemsize = starpu_block_get_elemsize(data_handle);
-
-	MPI_Datatype datatype_2dlayer;
-	ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_commit(&datatype_2dlayer);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_hvector(nz, 1, ldz*elemsize, datatype_2dlayer, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	return 0;
-}
-
-/*
- * 	Vector
- */
-
-static int handle_to_datatype_vector(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	unsigned nx = starpu_vector_get_nx(data_handle);
-	size_t elemsize = starpu_vector_get_elemsize(data_handle);
-
-	ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	return 0;
-}
-
-/*
- * 	Variable
- */
-
-static int handle_to_datatype_variable(starpu_data_handle_t data_handle, MPI_Datatype *datatype)
-{
-	int ret;
-
-	size_t elemsize = starpu_variable_get_elemsize(data_handle);
-
-	ret = MPI_Type_contiguous(elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
-
-	return 0;
-}
-
-/*
- *	Generic
- */
-
-static handle_to_datatype_func handle_to_datatype_funcs[STARPU_MAX_INTERFACE_ID] =
-{
-	[STARPU_MATRIX_INTERFACE_ID]	= handle_to_datatype_matrix,
-	[STARPU_BLOCK_INTERFACE_ID]	= handle_to_datatype_block,
-	[STARPU_VECTOR_INTERFACE_ID]	= handle_to_datatype_vector,
-	[STARPU_CSR_INTERFACE_ID]	= NULL,
-	[STARPU_BCSR_INTERFACE_ID]	= NULL,
-	[STARPU_VARIABLE_INTERFACE_ID]	= handle_to_datatype_variable,
-	[STARPU_VOID_INTERFACE_ID]      = NULL,
-	[STARPU_MULTIFORMAT_INTERFACE_ID] = NULL,
-};
-
-int starpu_mpi_handle_to_datatype(starpu_data_handle_t data_handle, MPI_Datatype *datatype, int *count)
-{
-	enum starpu_data_interface_id id = starpu_handle_get_interface_id(data_handle);
-
-	if (id <= STARPU_MULTIFORMAT_INTERFACE_ID)
-	{
-		handle_to_datatype_func func = handle_to_datatype_funcs[id];
-		STARPU_ASSERT(func);
-		func(data_handle, datatype);
-		*count = 1;
-		return 0;
-	}
-	else
-	{
-		/* The datatype is not predefined by StarPU */
-		*count = starpu_handle_get_size(data_handle);
-		*datatype = MPI_BYTE;
-		return 1;
-	}
-}

+ 0 - 33
mpi/src/starpu_mpi_datatype.h

@@ -1,33 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_DATATYPE_H__
-#define __STARPU_MPI_DATATYPE_H__
-
-#include <starpu_mpi.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int starpu_mpi_handle_to_datatype(starpu_data_handle_t data_handle, MPI_Datatype *datatype, int *count);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STARPU_MPI_DATATYPE_H__

+ 0 - 45
mpi/src/starpu_mpi_fxt.h

@@ -1,45 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_FXT_H__
-#define __STARPU_MPI_FXT_H__
-
-#include <starpu.h>
-#include <common/config.h>
-#include <common/fxt.h>
-
-#define FUT_MPI_BARRIER		0x5201
-#define FUT_MPI_ISEND		0x5202
-#define FUT_MPI_IRECV_END	0x5203
-
-#ifdef STARPU_USE_FXT
-#define TRACE_MPI_BARRIER(rank, worldsize, key)	\
-	FUT_DO_PROBE4(FUT_MPI_BARRIER, (rank), (worldsize), (key), _starpu_gettid());
-#define TRACE_MPI_ISEND(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(FUT_MPI_ISEND, (dest), (mpi_tag), (size), _starpu_gettid());
-#define TRACE_MPI_IRECV_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(FUT_MPI_IRECV_END, (src), (mpi_tag), _starpu_gettid());
-#define TRACE
-#else
-#define TRACE_MPI_BARRIER(a, b, c)	do {} while(0);
-#define TRACE_MPI_ISEND(a, b, c)	do {} while(0);
-#define TRACE_MPI_IRECV_END(a, b)	do {} while(0);
-#endif
-
-
-
-#endif // __STARPU_MPI_FXT_H__

+ 0 - 104
mpi/src/starpu_mpi_helper.c

@@ -1,104 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-
-static void starpu_mpi_unlock_tag_callback(void *arg)
-{
-	starpu_tag_t *tagptr = arg;
-
-	starpu_tag_notify_from_apps(*tagptr);
-
-	free(tagptr);
-}
-
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle,
-				int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
-{
-	starpu_tag_t *tagptr = malloc(sizeof(starpu_tag_t));
-	*tagptr = tag;
-	
-	return starpu_mpi_isend_detached(data_handle, dest, mpi_tag, comm,
-						starpu_mpi_unlock_tag_callback, tagptr);
-}
-
-
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
-{
-	starpu_tag_t *tagptr = malloc(sizeof(starpu_tag_t));
-	*tagptr = tag;
-	
-	return starpu_mpi_irecv_detached(data_handle, source, mpi_tag, comm,
-						starpu_mpi_unlock_tag_callback, tagptr);
-}
-
-struct arg_array {
-	int array_size;
-	starpu_tag_t tag;
-};
-
-static void starpu_mpi_array_unlock_callback(void *_arg)
-{
-	struct arg_array *arg = _arg;
-
-	int remaining = STARPU_ATOMIC_ADD(&arg->array_size, -1);
-
-	if (remaining == 0)
-	{
-		starpu_tag_notify_from_apps(arg->tag);
-		free(arg);
-	}
-}
-
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size,
-		starpu_data_handle_t *data_handle, int *dest, int *mpi_tag,
-		MPI_Comm *comm, starpu_tag_t tag)
-{
-	struct arg_array *arg = malloc(sizeof(struct arg_array));
-
-	arg->array_size = array_size;
-	arg->tag = tag;
-
-	unsigned elem;
-	for (elem = 0; elem < array_size; elem++)
-	{
-		starpu_mpi_isend_detached(data_handle[elem], dest[elem],
-				mpi_tag[elem], comm[elem],
-				starpu_mpi_array_unlock_callback, arg);
-	}
-
-	return 0;
-}
-
-
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
-{
-	struct arg_array *arg = malloc(sizeof(struct arg_array));
-
-	arg->array_size = array_size;
-	arg->tag = tag;
-
-	unsigned elem;
-	for (elem = 0; elem < array_size; elem++)
-	{
-		starpu_mpi_irecv_detached(data_handle[elem], source[elem],
-				mpi_tag[elem], comm[elem],
-				starpu_mpi_array_unlock_callback, arg);
-	}
-
-	return 0;
-}

+ 0 - 632
mpi/src/starpu_mpi_insert_task.c

@@ -1,632 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- * Copyright (C) 2011-2012  Université de Bordeaux 1
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdarg.h>
-#include <mpi.h>
-
-#include <starpu.h>
-#include <starpu_data.h>
-#include <common/utils.h>
-#include <common/uthash.h>
-#include <util/starpu_insert_task_utils.h>
-#include <datawizard/coherency.h>
-
-//#define STARPU_MPI_VERBOSE 1
-#include <starpu_mpi_private.h>
-
-#ifdef STARPU_MPI_CACHE
-/* Whether we are allowed to keep copies of remote data. */
-struct _starpu_data_entry
-{
-	UT_hash_handle hh;
-	void *data;
-};
-
-struct _starpu_data_entry **sent_data = NULL;
-struct _starpu_data_entry **received_data = NULL;
-#endif /* STARPU_MPI_CACHE */
-
-static void _starpu_mpi_tables_init()
-{
-#ifdef STARPU_MPI_CACHE
-	if (sent_data == NULL) {
-		int nb_nodes;
-		int i;
-
-		MPI_Comm_size(MPI_COMM_WORLD, &nb_nodes);
-		_STARPU_MPI_DEBUG("Initialising htable for cache\n");
-		sent_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
-		for(i=0 ; i<nb_nodes ; i++) sent_data[i] = NULL;
-		received_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
-		for(i=0 ; i<nb_nodes ; i++) received_data[i] = NULL;
-	}
-#endif /* STARPU_MPI_CACHE */
-}
-
-static
-int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *dest, size_t *size_on_nodes)
-{
-	if (data && mode & STARPU_R) {
-		struct starpu_data_interface_ops *ops;
-		int rank = starpu_data_get_rank(data);
-
-		ops = data->ops;
-		size_on_nodes[rank] += ops->get_size(data);
-	}
-
-	if (mode & STARPU_W) {
-		if (!data) {
-			/* We don't have anything allocated for this.
-			 * The application knows we won't do anything
-			 * about this task */
-			/* Yes, the app could actually not call
-			 * insert_task at all itself, this is just a
-			 * safeguard. */
-			_STARPU_MPI_DEBUG("oh oh\n");
-			_STARPU_MPI_LOG_OUT();
-			return -EINVAL;
-		}
-		int mpi_rank = starpu_data_get_rank(data);
-		if (mpi_rank == me) {
-			if (*do_execute == 0) {
-				*inconsistent_execute = 1;
-			}
-			else {
-				*do_execute = 1;
-			}
-		}
-		else if (mpi_rank != -1) {
-			if (*do_execute == 1) {
-				*inconsistent_execute = 1;
-			}
-			else {
-				*do_execute = 0;
-				*dest = mpi_rank;
-				/* That's the rank which needs the data to be sent to */
-			}
-		}
-		else {
-			_STARPU_ERROR("rank invalid\n");
-		}
-	}
-	return 0;
-}
-
-static
-void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_access_mode mode, int me, int dest, int do_execute, MPI_Comm comm)
-{
-	if (data && mode & STARPU_R) {
-		int mpi_rank = starpu_data_get_rank(data);
-		int mpi_tag = starpu_data_get_tag(data);
-		if(mpi_rank == -1) {
-			fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_data_set_rank\n");
-			STARPU_ABORT();
-		}
-		if(mpi_tag == -1) {
-			fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_data_set_tag\n");
-			STARPU_ABORT();
-		}
-		/* The task needs to read this data */
-		if (do_execute && mpi_rank != me && mpi_rank != -1) {
-			/* I will have to execute but I don't have the data, receive */
-#ifdef STARPU_MPI_CACHE
-			struct _starpu_data_entry *already_received;
-			HASH_FIND_PTR(received_data[mpi_rank], &data, already_received);
-			if (already_received == NULL) {
-				struct _starpu_data_entry *entry = (struct _starpu_data_entry *)malloc(sizeof(*entry));
-				entry->data = data;
-				HASH_ADD_PTR(received_data[mpi_rank], data, entry);
-			}
-			else {
-				_STARPU_MPI_DEBUG("Do not receive data %p from node %d as it is already available\n", data, mpi_rank);
-			}
-			if (!already_received)
-#endif
-			{
-				_STARPU_MPI_DEBUG("Receive data %p from %d\n", data, mpi_rank);
-				starpu_mpi_irecv_detached(data, mpi_rank, mpi_tag, comm, NULL, NULL);
-			}
-		}
-		if (!do_execute && mpi_rank == me) {
-			/* Somebody else will execute it, and I have the data, send it. */
-#ifdef STARPU_MPI_CACHE
-			struct _starpu_data_entry *already_sent;
-			HASH_FIND_PTR(sent_data[dest], &data, already_sent);
-			if (already_sent == NULL) {
-				struct _starpu_data_entry *entry = (struct _starpu_data_entry *)malloc(sizeof(*entry));
-				entry->data = data;
-				HASH_ADD_PTR(sent_data[dest], data, entry);
-				_STARPU_MPI_DEBUG("Noting that data %p has already been sent to %d\n", data, dest);
-			}
-			else {
-				_STARPU_MPI_DEBUG("Do not send data %p to node %d as it has already been sent\n", data, dest);
-			}
-			if (!already_sent)
-#endif
-			{
-				_STARPU_MPI_DEBUG("Send data %p to %d\n", data, dest);
-				starpu_mpi_isend_detached(data, dest, mpi_tag, comm, NULL, NULL);
-			}
-		}
-	}
-}
-
-static
-void _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum starpu_access_mode mode, int me, int xrank, int dest, int do_execute, MPI_Comm comm)
-{
-	if (mode & STARPU_W) {
-		int mpi_rank = starpu_data_get_rank(data);
-		int mpi_tag = starpu_data_get_tag(data);
-		if(mpi_rank == -1) {
-			fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_data_set_rank\n");
-			STARPU_ABORT();
-		}
-		if(mpi_tag == -1) {
-			fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_data_set_tag\n");
-			STARPU_ABORT();
-		}
-		if (mpi_rank == me) {
-			if (xrank != -1 && me != xrank) {
-				_STARPU_MPI_DEBUG("Receive data %p back from the task %d which executed the codelet ...\n", data, dest);
-				starpu_mpi_irecv_detached(data, dest, mpi_tag, comm, NULL, NULL);
-			}
-		}
-		else if (do_execute) {
-			_STARPU_MPI_DEBUG("Send data %p back to its owner %d...\n", data, mpi_rank);
-			starpu_mpi_isend_detached(data, mpi_rank, mpi_tag, comm, NULL, NULL);
-		}
-	}
-}
-
-void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum starpu_access_mode mode, int me, int do_execute, MPI_Comm comm)
-{
-#ifdef STARPU_MPI_CACHE
-	if (mode & STARPU_W) {
-		if (do_execute) {
-			/* Note that all copies I've sent to neighbours are now invalid */
-			int n, size;
-			MPI_Comm_size(comm, &size);
-			for(n=0 ; n<size ; n++) {
-				struct _starpu_data_entry *already_sent;
-				HASH_FIND_PTR(sent_data[n], &data, already_sent);
-				if (already_sent) {
-					_STARPU_MPI_DEBUG("Clearing send cache for data %p\n", data);
-					HASH_DEL(sent_data[n], already_sent);
-				}
-			}
-		}
-		else {
-			int mpi_rank = starpu_data_get_rank(data);
-			struct _starpu_data_entry *already_received;
-			HASH_FIND_PTR(received_data[mpi_rank], &data, already_received);
-			if (already_received) {
-				/* Somebody else will write to the data, so discard our cached copy if any */
-				/* TODO: starpu_mpi could just remember itself. */
-				_STARPU_MPI_DEBUG("Clearing receive cache for data %p\n", data);
-				HASH_DEL(received_data[mpi_rank], already_received);
-				starpu_data_invalidate_submit(data);
-			}
-		}
-	}
-#else
-	/* We allocated a temporary buffer for the received data, now drop it */
-	if ((mode & STARPU_R) && do_execute) {
-		int mpi_rank = starpu_data_get_rank(data);
-		if (mpi_rank != me && mpi_rank != -1) {
-			starpu_data_invalidate_submit(data);
-		}
-	}
-#endif
-}
-
-int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
-{
-	int arg_type;
-	va_list varg_list;
-	int me, do_execute, xrank, nb_nodes;
-	size_t *size_on_nodes;
-	size_t arg_buffer_size = 0;
-	char *arg_buffer;
-	int dest=0, inconsistent_execute;
-	int current_data = 0;
-
-	_STARPU_MPI_LOG_IN();
-
-	MPI_Comm_rank(comm, &me);
-	MPI_Comm_size(comm, &nb_nodes);
-
-	size_on_nodes = (size_t *)calloc(1, nb_nodes * sizeof(size_t));
-
-	_starpu_mpi_tables_init();
-
-	/* Get the number of buffers and the size of the arguments */
-	va_start(varg_list, codelet);
-	arg_buffer_size = _starpu_insert_task_get_arg_size(varg_list);
-
-	va_start(varg_list, codelet);
-	_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
-
-	/* Find out whether we are to execute the data because we own the data to be written to. */
-	inconsistent_execute = 0;
-	do_execute = -1;
-	xrank = -1;
-	va_start(varg_list, codelet);
-	while ((arg_type = va_arg(varg_list, int)) != 0) {
-		if (arg_type==STARPU_EXECUTE_ON_NODE) {
-			xrank = va_arg(varg_list, int);
-			_STARPU_MPI_DEBUG("Executing on node %d\n", xrank);
-			do_execute = 1;
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA) {
-			starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
-			xrank = starpu_data_get_rank(data);
-			_STARPU_MPI_DEBUG("Executing on data node %d\n", xrank);
-			STARPU_ASSERT(xrank <= nb_nodes);
-			do_execute = 1;
-		}
-		else if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type==STARPU_SCRATCH || arg_type==STARPU_REDUX) {
-			starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
-			enum starpu_access_mode mode = (enum starpu_access_mode) arg_type;
-			int ret = _starpu_mpi_find_executee_node(data, mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
-			if (ret == -EINVAL)
-			{
-				free(size_on_nodes);
-				return ret;
-			}
-			current_data ++;
-		}
-		else if (arg_type == STARPU_DATA_ARRAY)
-		{
-			starpu_data_handle_t *datas = va_arg(varg_list, starpu_data_handle_t *);
-			int nb_handles = va_arg(varg_list, int);
-			int i;
-			for(i=0 ; i<nb_handles ; i++)
-			{
-				enum starpu_access_mode mode = codelet->modes[current_data];
-				int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
-				if (ret == -EINVAL)
-				{
-					free(size_on_nodes);
-					return ret;
-				}
-				current_data ++;
-			}
-		}
-		else if (arg_type==STARPU_VALUE) {
-			va_arg(varg_list, void *);
-			va_arg(varg_list, size_t);
-		}
-		else if (arg_type==STARPU_CALLBACK) {
-			va_arg(varg_list, void (*)(void *));
-		}
-		else if (arg_type==STARPU_CALLBACK_WITH_ARG) {
-			va_arg(varg_list, void (*)(void *));
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_CALLBACK_ARG) {
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PRIORITY) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_NODE) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA) {
-			va_arg(varg_list, int);
-		}
-	}
-	va_end(varg_list);
-
-	if (do_execute == -1) {
-		int i;
-		size_t max_size = 0;
-		for(i=0 ; i<nb_nodes ; i++) {
-			if (size_on_nodes[i] > max_size)
-			{
-				max_size = size_on_nodes[i];
-				xrank = i;
-			}
-		}
-		free(size_on_nodes);
-		if (xrank != -1) {
-			_STARPU_MPI_DEBUG("Node %d is having the most R data\n", xrank);
-			do_execute = 1;
-		}
-	}
-
-	STARPU_ASSERT_MSG(do_execute != -1, "StarPU needs to see a W or a REDUX data which will tell it where to execute the task");
-
-	if (inconsistent_execute == 1) {
-		if (xrank == -1) {
-			_STARPU_MPI_DEBUG("Different tasks are owning W data. Needs to specify which one is to execute the codelet, using STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA\n");
-			free(size_on_nodes);
-			return -EINVAL;
-		}
-		else {
-			do_execute = (me == xrank);
-			dest = xrank;
-		}
-	}
-	else if (xrank != -1) {
-		do_execute = (me == xrank);
-		dest = xrank;
-	}
-
-	/* Send and receive data as requested */
-	va_start(varg_list, codelet);
-	current_data = 0;
-	while ((arg_type = va_arg(varg_list, int)) != 0) {
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type==STARPU_SCRATCH || arg_type==STARPU_REDUX) {
-			starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
-			enum starpu_access_mode mode = (enum starpu_access_mode) arg_type;
-
-			_starpu_mpi_exchange_data_before_execution(data, mode, me, dest, do_execute, comm);
-			current_data ++;
-
-		}
-		else if (arg_type == STARPU_DATA_ARRAY)
-		{
-			starpu_data_handle_t *datas = va_arg(varg_list, starpu_data_handle_t *);
-			int nb_handles = va_arg(varg_list, int);
-			int i;
-
-			for(i=0 ; i<nb_handles ; i++)
-			{
-				_starpu_mpi_exchange_data_before_execution(datas[i], codelet->modes[current_data], me, dest, do_execute, comm);
-				current_data++;
-			}
-		}
-		else if (arg_type==STARPU_VALUE) {
-			va_arg(varg_list, void *);
-			va_arg(varg_list, size_t);
-		}
-		else if (arg_type==STARPU_CALLBACK) {
-			va_arg(varg_list, void (*)(void *));
-		}
-		else if (arg_type==STARPU_CALLBACK_WITH_ARG) {
-			va_arg(varg_list, void (*)(void *));
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_CALLBACK_ARG) {
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PRIORITY) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_NODE) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA) {
-			va_arg(varg_list, starpu_data_handle_t);
-		}
-	}
-	va_end(varg_list);
-
-	if (do_execute) {
-		_STARPU_MPI_DEBUG("Execution of the codelet %p (%s)\n", codelet, codelet->name);
-		va_start(varg_list, codelet);
-		struct starpu_task *task = starpu_task_create();
-		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
-		_STARPU_MPI_DEBUG("ret: %d\n", ret);
-		STARPU_ASSERT(ret==0);
-	}
-
-	if (inconsistent_execute) {
-		va_start(varg_list, codelet);
-		current_data = 0;
-		while ((arg_type = va_arg(varg_list, int)) != 0) {
-			if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type==STARPU_SCRATCH || arg_type==STARPU_REDUX) {
-				starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
-				enum starpu_access_mode mode = (enum starpu_access_mode) arg_type;
-
-				_starpu_mpi_exchange_data_after_execution(data, mode, me, xrank, dest, do_execute, comm);
-				current_data++;
-			}
-			else if (arg_type == STARPU_DATA_ARRAY)
-			{
-				starpu_data_handle_t *datas = va_arg(varg_list, starpu_data_handle_t *);
-				int nb_handles = va_arg(varg_list, int);
-				int i;
-
-				for(i=0 ; i<nb_handles ; i++)
-				{
-					_starpu_mpi_exchange_data_after_execution(datas[i], codelet->modes[current_data], me, xrank, dest, do_execute, comm);
-					current_data++;
-				}
-			}
-			else if (arg_type==STARPU_VALUE) {
-				va_arg(varg_list, void *);
-				va_arg(varg_list, size_t);
-			}
-			else if (arg_type==STARPU_CALLBACK) {
-				va_arg(varg_list, void (*)(void *));
-			}
-			else if (arg_type==STARPU_CALLBACK_WITH_ARG) {
-				va_arg(varg_list, void (*)(void *));
-				va_arg(varg_list, void *);
-			}
-			else if (arg_type==STARPU_CALLBACK_ARG) {
-				va_arg(varg_list, void *);
-			}
-			else if (arg_type==STARPU_PRIORITY) {
-				va_arg(varg_list, int);
-			}
-			else if (arg_type==STARPU_EXECUTE_ON_NODE) {
-				va_arg(varg_list, int);
-			}
-			else if (arg_type==STARPU_EXECUTE_ON_DATA) {
-				va_arg(varg_list, starpu_data_handle_t);
-			}
-		}
-		va_end(varg_list);
-	}
-
-	va_start(varg_list, codelet);
-	current_data = 0;
-	while ((arg_type = va_arg(varg_list, int)) != 0) {
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX) {
-			starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
-			enum starpu_access_mode mode = (enum starpu_access_mode) arg_type;
-
-			_starpu_mpi_clear_data_after_execution(data, mode, me, do_execute, comm);
-			current_data++;
-		}
-		else if (arg_type == STARPU_DATA_ARRAY)
-		{
-			starpu_data_handle_t *datas = va_arg(varg_list, starpu_data_handle_t *);
-			int nb_handles = va_arg(varg_list, int);
-			int i;
-
-			for(i=0 ; i<nb_handles ; i++)
-			{
-				_starpu_mpi_clear_data_after_execution(datas[i], codelet->modes[current_data], me, do_execute, comm);
-				current_data++;
-			}
-		}
-		else if (arg_type==STARPU_VALUE) {
-			va_arg(varg_list, void *);
-			va_arg(varg_list, size_t);
-		}
-		else if (arg_type==STARPU_CALLBACK) {
-			va_arg(varg_list, void (*)(void *));
-		}
-		else if (arg_type==STARPU_CALLBACK_WITH_ARG) {
-			va_arg(varg_list, void (*)(void *));
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_CALLBACK_ARG) {
-			va_arg(varg_list, void *);
-		}
-		else if (arg_type==STARPU_PRIORITY) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_NODE) {
-			va_arg(varg_list, int);
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA) {
-			va_arg(varg_list, starpu_data_handle_t);
-		}
-	}
-
-	va_end(varg_list);
-	_STARPU_MPI_LOG_OUT();
-	return 0;
-}
-
-void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
-{
-	int me, rank, tag;
-
-	rank = starpu_data_get_rank(data_handle);
-	tag = starpu_data_get_tag(data_handle);
-	if(rank == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_data_set_rank\n");
-		STARPU_ABORT();
-	}
-	if(tag == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_data_set_tag\n");
-		STARPU_ABORT();
-	}
-	MPI_Comm_rank(comm, &me);
-
-	if (node == rank) return;
-
-	if (me == node)
-	{
-		starpu_mpi_irecv_detached(data_handle, rank, tag, comm, callback, arg);
-	}
-	else if (me == rank)
-	{
-		starpu_mpi_isend_detached(data_handle, node, tag, comm, NULL, NULL);
-	}
-}
-
-void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
-{
-	int me, rank, tag;
-
-	rank = starpu_data_get_rank(data_handle);
-	tag = starpu_data_get_tag(data_handle);
-	if(rank == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_data_set_rank\n");
-		STARPU_ABORT();
-	}
-	if(tag == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_data_set_tag\n");
-		STARPU_ABORT();
-	}
-	MPI_Comm_rank(comm, &me);
-
-	if (node == rank) return;
-
-	if (me == node)
-	{
-		MPI_Status status;
-		starpu_mpi_recv(data_handle, rank, tag, comm, &status);
-	}
-	else if (me == rank)
-	{
-		starpu_mpi_send(data_handle, node, tag, comm);
-	}
-}
-
-void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
-{
-	int me, rank, tag, nb_nodes;
-
-	rank = starpu_data_get_rank(data_handle);
-	tag = starpu_data_get_tag(data_handle);
-	if(rank == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_data_set_rank\n");
-		STARPU_ABORT();
-	}
-	if(tag == -1) {
-		fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_data_set_tag\n");
-		STARPU_ABORT();
-	}
-
-	MPI_Comm_rank(comm, &me);
-	MPI_Comm_size(comm, &nb_nodes);
-
-	_STARPU_MPI_DEBUG("Doing reduction for data %p on node %d with %d nodes ...\n", data_handle, rank, nb_nodes);
-
-	// need to count how many nodes have the data in redux mode
-	if (me == rank) {
-		int i;
-
-		for(i=0 ; i<nb_nodes ; i++) {
-			if (i != rank) {
-				starpu_data_handle_t new_handle;
-
-				starpu_data_register_same(&new_handle, data_handle);
-
-				_STARPU_MPI_DEBUG("Receiving redux handle from %d in %p ...\n", i, new_handle);
-
-				starpu_mpi_irecv_detached(new_handle, i, tag, comm, NULL, NULL);
-				starpu_insert_task(data_handle->redux_cl,
-						STARPU_RW, data_handle,
-						STARPU_R, new_handle,
-						0);
-			}
-		}
-	}
-	else {
-		_STARPU_MPI_DEBUG("Sending redux handle to %d ...\n", rank);
-		starpu_mpi_isend_detached(data_handle, rank, tag, comm, NULL, NULL);
-	}
-}

+ 0 - 99
mpi/src/starpu_mpi_private.h

@@ -1,99 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_MPI_PRIVATE_H__
-#define __STARPU_MPI_PRIVATE_H__
-
-#include <starpu.h>
-#include <common/config.h>
-#include "starpu_mpi.h"
-#include "starpu_mpi_fxt.h"
-#include <common/list.h>
-#include <common/utils.h>
-#include <pthread.h>
-
-//#define STARPU_MPI_VERBOSE	1
-
-#ifdef STARPU_MPI_VERBOSE
-#  define _STARPU_MPI_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) { \
-    						int _debug_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_debug_rank);       \
-                                                fprintf(stderr, "%*s[%d][starpu_mpi][%s] " fmt , (_debug_rank+1)*4, "", _debug_rank, __func__ ,##args); \
-                                                fflush(stderr); }} while(0);
-#else
-#  define _STARPU_MPI_DEBUG(fmt, args ...)
-#endif
-
-#ifdef STARPU_MPI_VERBOSE0
-#  define _STARPU_MPI_LOG_IN()             do { if (!getenv("STARPU_SILENT")) { \
-                                               int _debug_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_debug_rank);                        \
-                                               fprintf(stderr, "%*s[%d][starpu_mpi][%s] -->\n", (_debug_rank+1)*4, "", _debug_rank, __func__ ); \
-                                               fflush(stderr); }} while(0)
-#  define _STARPU_MPI_LOG_OUT()            do { if (!getenv("STARPU_SILENT")) { \
-                                               int _debug_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_debug_rank);                        \
-                                               fprintf(stderr, "%*s[%d][starpu_mpi][%s] <--\n", (_debug_rank+1)*4, "", _debug_rank, __func__ ); \
-                                               fflush(stderr); }} while(0)
-#else
-#  define _STARPU_MPI_LOG_IN()
-#  define _STARPU_MPI_LOG_OUT()
-#endif
-
-#define SEND_REQ	0
-#define RECV_REQ	1
-#define WAIT_REQ        2
-#define TEST_REQ        3
-#define BARRIER_REQ     4
-
-LIST_TYPE(_starpu_mpi_req,
-	/* description of the data at StarPU level */
-	starpu_data_handle_t data_handle;
-
-	/* description of the data to be sent/received */
-	MPI_Datatype datatype;
-	void *ptr;
-	int needs_unpacking;
-
-	/* who are we talking to ? */
-	int srcdst;
-	int mpi_tag;
-	MPI_Comm comm;
-
-	void (*func)(struct _starpu_mpi_req *);
-
-	MPI_Status *status;
-	MPI_Request request;
-	int *flag;
-
-	int ret;
-	pthread_mutex_t req_mutex;
-	pthread_cond_t req_cond;
-
-	unsigned request_type; /* 0 send, 1 recv */
-
-	unsigned submitted;
-	unsigned completed;
-
-	/* In the case of a Wait/Test request, we are going to post a request
-	 * to test the completion of another request */
-	struct _starpu_mpi_req *other_request;
-
-	/* in the case of detached requests */
-	unsigned detached;
-	void *callback_arg;
-	void (*callback)(void *);
-);
-
-#endif // __STARPU_MPI_PRIVATE_H__

+ 0 - 88
mpi/src/starpu_mpi_stats.c

@@ -1,88 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi_stats.h>
-#include <common/config.h>
-#include <stdio.h>
-//#define STARPU_MPI_VERBOSE	1
-#include <starpu_mpi_private.h>
-
-/* measure the amount of data transfers between each pair of MPI nodes */
-#ifdef STARPU_COMM_STATS
-static size_t *comm_amount;
-static int world_size;
-#endif /* STARPU_COMM_STATS */
-
-void _starpu_mpi_comm_amounts_init(MPI_Comm comm)
-{
-#ifdef STARPU_COMM_STATS
-	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-comm-stats, which slows down a bit\n");
-
-	MPI_Comm_size(comm, &world_size);
-	_STARPU_MPI_DEBUG("allocating for %d nodes\n", world_size);
-
-	comm_amount = (size_t *) calloc(world_size, sizeof(size_t));
-#endif /* STARPU_COMM_STATS */
-}
-
-void _starpu_mpi_comm_amounts_free()
-{
-#ifdef STARPU_COMM_STATS
-	free(comm_amount);
-#endif /* STARPU_COMM_STATS */
-}
-
-void _starpu_mpi_comm_amounts_inc(MPI_Comm comm  __attribute__ ((unused)),
-				  unsigned dst  __attribute__ ((unused)),
-				  MPI_Datatype datatype  __attribute__ ((unused)),
-				  int count __attribute__ ((unused)))
-{
-#ifdef STARPU_COMM_STATS
-	int src, size;
-
-	MPI_Comm_rank(comm, &src);
-	MPI_Type_size(datatype, &size);
-
-	_STARPU_MPI_DEBUG("[%d] adding %d to %d\n", src, count*size, dst);
-
-	comm_amount[dst] += count*size;
-#endif /* STARPU_COMM_STATS */
-}
-
-void _starpu_mpi_comm_amounts_display(int node)
-{
-#ifdef STARPU_COMM_STATS
-	unsigned dst;
-	size_t sum = 0;
-
-	for (dst = 0; dst < world_size; dst++)
-	{
-		sum += comm_amount[dst];
-	}
-
-	fprintf(stderr, "\n[%d] Communication transfers stats:\nTOTAL transfers %f B\t%f MB\n", node, (float)sum, (float)sum/1024/1024);
-
-	for (dst = 0; dst < world_size; dst++)
-	{
-		if (comm_amount[dst])
-		{
-			fprintf(stderr, "\t%d -> %d\t%f B\t%f MB\n",
-				node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024));
-		}
-	}
-#endif /* STARPU_COMM_STATS */
-}
-

+ 0 - 24
mpi/src/starpu_mpi_stats.h

@@ -1,24 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdlib.h>
-#include <mpi.h>
-
-void _starpu_mpi_comm_amounts_init(MPI_Comm comm);
-void _starpu_mpi_comm_amounts_free();
-void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned dst, MPI_Datatype datatype, int count);
-void _starpu_mpi_comm_amounts_display(int node);
-

+ 0 - 29
mpi/starpumpi-1.0.pc.in

@@ -1,29 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2011  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: starpumpi
-Description: offers MPI support for heterogeneous multicore architecture
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@
-Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@
-Libs.private: @LDFLAGS@ @LIBS@
-Requires: starpu-1.0
-Requires.private:

+ 0 - 1
mpi/tests/.gitignore

@@ -1 +0,0 @@
-/.deps

+ 0 - 153
mpi/tests/Makefile.am

@@ -1,153 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-CC=$(MPICC)
-CCLD=$(MPICC)
-
-if STARPU_MPI_CHECK
-if STARPU_HAVE_AM111
-LOG_COMPILER	 	=	$(MPIEXEC) -np 2
-else
-TESTS_ENVIRONMENT 	=	$(MPIEXEC) -np 2
-endif
-TESTS			=	$(check_PROGRAMS)
-endif
-
-check_PROGRAMS =
-
-BUILT_SOURCES =
-
-CLEANFILES = *.gcno *.gcda *.linkinfo
-
-examplebindir = $(libdir)/starpu/examples/mpi
-
-examplebin_PROGRAMS =
-
-if STARPU_USE_CUDA
-# TODO define NVCCFLAGS
-NVCC ?= nvcc
-
-NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include
-
-.cu.cubin:
-	$(MKDIR_P) `dirname $@`
-	$(NVCC) -cubin $< -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS)
-
-.cu.o:
-	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/  -I$(top_builddir)/include/
-endif
-
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS)
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/src -I$(top_builddir)/src
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
-
-########################
-# Unit testcases       #
-########################
-
-check_PROGRAMS +=				\
-	pingpong				\
-	mpi_test				\
-	mpi_isend				\
-	mpi_irecv				\
-	mpi_isend_detached			\
-	mpi_irecv_detached			\
-	mpi_detached_tag			\
-	ring					\
-	ring_async				\
-	ring_async_implicit			\
-	block_interface				\
-	block_interface_pinned			\
-	insert_task				\
-	insert_task_cache			\
-	insert_task_block			\
-	insert_task_owner			\
-	insert_task_owner2			\
-	insert_task_owner_data			\
-	multiple_send
-
-noinst_PROGRAMS =				\
-	pingpong				\
-	mpi_test				\
-	mpi_isend				\
-	mpi_irecv				\
-	mpi_isend_detached			\
-	mpi_irecv_detached			\
-	mpi_detached_tag			\
-	ring					\
-	ring_async				\
-	ring_async_implicit			\
-	block_interface				\
-	block_interface_pinned			\
-	insert_task				\
-	insert_task_cache			\
-	insert_task_block			\
-	insert_task_owner			\
-	insert_task_owner2			\
-	insert_task_owner_data			\
-	multiple_send
-
-mpi_isend_LDADD =					\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-mpi_irecv_LDADD =					\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-mpi_isend_detached_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-mpi_irecv_detached_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-mpi_detached_tag_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-pingpong_LDADD =					\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-mpi_test_LDADD =					\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-ring_LDADD =					\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-ring_async_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-ring_async_implicit_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-block_interface_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-block_interface_pinned_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_cache_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_block_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_owner_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_owner2_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_owner_data_LDADD =			\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-multiple_send_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-
-ring_SOURCES = ring.c
-ring_async_SOURCES = ring_async.c
-ring_async_implicit_SOURCES = ring_async_implicit.c
-if STARPU_USE_CUDA
-ring_SOURCES += ring_kernel.cu
-ring_async_SOURCES += ring_kernel.cu
-ring_async_implicit_SOURCES += ring_kernel.cu
-endif
-
-showcheck:
-	-cat $(TEST_LOGS) /dev/null

+ 0 - 148
mpi/tests/block_interface.c

@@ -1,148 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <stdlib.h>
-#include "helper.h"
-
-#define NITER	2048
-
-#define BIGSIZE	128
-#define SIZE	64
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	/* We only use 2 nodes for that test */
-	if (rank >= 2)
-	{
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	/* Node 0 will allocate a big block and only register an inner part of
-	 * it as the block data, Node 1 will allocate a block of small size and
-	 * register it directly. Node 0 and 1 will then exchange the content of
-	 * their blocks. */
-
-	float *block;
-	starpu_data_handle_t block_handle;
-
-	if (rank == 0)
-	{
-		block = calloc(BIGSIZE*BIGSIZE*BIGSIZE, sizeof(float));
-		assert(block);
-
-		/* fill the inner block */
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
-		}
-
-		starpu_block_data_register(&block_handle, 0,
-			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
-			SIZE, SIZE, SIZE, sizeof(float));
-	}
-	else /* rank == 1 */
-	{
-		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
-		assert(block);
-
-		starpu_block_data_register(&block_handle, 0,
-			(uintptr_t)block, SIZE, SIZE*SIZE,
-			SIZE, SIZE, SIZE, sizeof(float));
-	}
-
-	if (rank == 0)
-	{
-		ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
-
-		MPI_Status status;
-		ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
-
-		/* check the content of the block */
-		ret = starpu_data_acquire(block_handle, STARPU_R);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
-
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
-		}
-		starpu_data_release(block_handle);
-
-	}
-	else /* rank == 1 */
-	{
-		MPI_Status status;
-		ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
-
-		/* check the content of the block and modify it */
-		ret = starpu_data_acquire(block_handle, STARPU_RW);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
-
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
-			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
-		}
-		starpu_data_release(block_handle);
-
-		ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
-	}
-
-	FPRINTF(stdout, "Rank %d is done\n", rank);
-	fflush(stdout);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 151
mpi/tests/block_interface_pinned.c

@@ -1,151 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <stdlib.h>
-#include "helper.h"
-
-#define NITER	2048
-
-#define BIGSIZE	64
-#define SIZE	64
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	/* We only use 2 nodes for that test */
-	if (rank >= 2)
-	{
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	/* Node 0 will allocate a big block and only register an inner part of
-	 * it as the block data, Node 1 will allocate a block of small size and
-	 * register it directly. Node 0 and 1 will then exchange the content of
-	 * their blocks. */
-
-	float *block;
-	starpu_data_handle_t block_handle;
-
-	if (rank == 0)
-	{
-		starpu_malloc((void **)&block,
-				BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
-		memset(block, 0, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
-
-		/* fill the inner block */
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
-		}
-
-		starpu_block_data_register(&block_handle, 0,
-			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
-			SIZE, SIZE, SIZE, sizeof(float));
-	}
-	else /* rank == 1 */
-	{
-		starpu_malloc((void **)&block,
-			SIZE*SIZE*SIZE*sizeof(float));
-		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
-
-		starpu_block_data_register(&block_handle, 0,
-			(uintptr_t)block, SIZE, SIZE*SIZE,
-			SIZE, SIZE, SIZE, sizeof(float));
-	}
-
-	if (rank == 0)
-	{
-		MPI_Status status;
-
-		ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
-
-		ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
-
-		/* check the content of the block */
-		starpu_data_acquire(block_handle, STARPU_R);
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
-		}
-		starpu_data_release(block_handle);
-
-	}
-	else /* rank == 1 */
-	{
-		MPI_Status status;
-
-		ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
-
-		/* check the content of the block and modify it */
-		ret = starpu_data_acquire(block_handle, STARPU_RW);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
-
-		unsigned i, j, k;
-		for (k = 0; k < SIZE; k++)
-		for (j = 0; j < SIZE; j++)
-		for (i = 0; i < SIZE; i++)
-		{
-			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
-			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
-		}
-		starpu_data_release(block_handle);
-
-		ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send");
-
-	}
-
-	FPRINTF(stdout, "Rank %d is done\n", rank);
-	fflush(stdout);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 22
mpi/tests/helper.h

@@ -1,22 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <errno.h>
-
-#define STARPU_TEST_SKIPPED 77
-
-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
-

+ 0 - 143
mpi/tests/insert_task.c

@@ -1,143 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *x = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	unsigned *y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-        FPRINTF(stdout, "VALUES: %u %u\n", *x, *y);
-        *x = (*x + *y) / 2;
-}
-
-struct starpu_codelet mycodelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
-};
-
-#define X     4
-#define Y     5
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-        return x % nb_nodes;
-}
-
-
-int main(int argc, char **argv)
-{
-        int rank, size, x, y;
-        int value=0, ret;
-        unsigned matrix[X][Y];
-        starpu_data_handle_t data_handles[X][Y];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        matrix[x][y] = (rank+1)*10 + value;
-                        value++;
-                }
-        }
-#if 0
-        for(x = 0; x < X; x++) {
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < Y; y++) {
-                        FPRINTF(stdout, "%3d ", matrix[x][y]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        int mpi_rank = my_distrib(x, y, size);
-                        if (mpi_rank == rank)
-			{
-                                //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
-                        }
-                        else if (rank == mpi_rank+1 || rank == mpi_rank-1)
-			{
-                                /* I don't own that index, but will need it for my computations */
-                                //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
-                        }
-                        else
-			{
-                                /* I know it's useless to allocate anything for this */
-                                data_handles[x][y] = NULL;
-                        }
-                        if (data_handles[x][y])
-			{
-                                starpu_data_set_rank(data_handles[x][y], mpi_rank);
-                                starpu_data_set_tag(data_handles[x][y], (y*X)+x);
-			}
-                }
-        }
-
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-        FPRINTF(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        if (data_handles[x][y])
-                                starpu_data_unregister(data_handles[x][y]);
-                }
-        }
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-#if 0
-        for(x = 0; x < X; x++)
-	{
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < Y; y++)
-		{
-                        FPRINTF(stdout, "%3d ", matrix[x][y]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-	return 0;
-}

+ 0 - 165
mpi/tests/insert_task_block.c

@@ -1,165 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *matrix = (unsigned *)STARPU_MATRIX_GET_PTR(descr[0]);
-	int nx = (int)STARPU_MATRIX_GET_NX(descr[0]);
-	int ny = (int)STARPU_MATRIX_GET_NY(descr[0]);
-	int ld = (int)STARPU_MATRIX_GET_LD(descr[0]);
-
-        int i, j;
-        unsigned sum=0;
-
-	for (i = 0; i < nx; i++)
-	{
-		for (j = 0; j < ny; j++)
-		{
-                        sum += matrix[i+j*ld];
-                }
-        }
-	for (i = 0; i < nx; i++)
-	{
-		for (j = 0; j < ny; j++)
-		{
-                        matrix[i+j*ld] = sum;///(nx*ny);
-                }
-        }
-}
-
-struct starpu_codelet mycodelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-#define SIZE       6
-#define BLOCKS     3
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-        return x % nb_nodes;
-}
-
-
-int main(int argc, char **argv)
-{
-        int rank, size, x, y;
-        int ret, value=0;
-        unsigned matrix[SIZE*SIZE];
-        starpu_data_handle_t data_handles[SIZE][SIZE];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        for(x = 0; x < SIZE; x++)
-	{
-                for (y = 0; y < SIZE; y++)
-		{
-                        matrix[x+y*SIZE] = rank*100 + value;
-                        value++;
-                }
-        }
-#if 1
-        for(x = 0; x < SIZE; x++) {
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < SIZE; y++) {
-                        FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-        for(x = 0; x < BLOCKS ;  x++)
-	{
-                for (y = 0; y < BLOCKS; y++)
-		{
-                        int mpi_rank = my_distrib(x, y, size);
-                        if (mpi_rank == rank)
-			{
-                                //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-                                starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]),
-                                                            SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned));
-                        }
-                        else if (rank == mpi_rank+1 || rank == mpi_rank-1)
-			{
-                                /* I don't own that index, but will need it for my computations */
-                                //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y);
-                                starpu_matrix_data_register(&data_handles[x][y], -1, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]),
-                                                            SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned));
-                        }
-                        else
-			{
-                                /* I know it's useless to allocate anything for this */
-                                data_handles[x][y] = NULL;
-                        }
-                        if (data_handles[x][y])
-			{
-                                starpu_data_set_rank(data_handles[x][y], mpi_rank);
-                                starpu_data_set_tag(data_handles[x][y], (y*BLOCKS)+x);
-			}
-                }
-        }
-
-        for(x = 0; x < BLOCKS; x++)
-	{
-                for (y = 0; y < BLOCKS; y++)
-		{
-                        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
-						     STARPU_RW, data_handles[x][y],
-						     0);
-			STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-                }
-        }
-
-        FPRINTF(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-
-        for(x = 0; x < BLOCKS; x++)
-	{
-                for (y = 0; y < BLOCKS; y++)
-		{
-                        if (data_handles[x][y])
-                                starpu_data_unregister(data_handles[x][y]);
-                }
-        }
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-#if 1
-        for(x = 0; x < SIZE; x++)
-	{
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < SIZE; y++) {
-                        FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-	return 0;
-}

+ 0 - 152
mpi/tests/insert_task_cache.c

@@ -1,152 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *x = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	unsigned *y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-        FPRINTF(stdout, "VALUES: %u %u\n", *x, *y);
-        *x = (*x + *y) / 2;
-}
-
-struct starpu_codelet mycodelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
-};
-
-#define X     4
-#define Y     5
-
-/* Returns the MPI node number where data indexes index is */
-int my_distrib(int x, int y, int nb_nodes)
-{
-        return x % nb_nodes;
-}
-
-
-int main(int argc, char **argv)
-{
-        int rank, size, x, y;
-        int ret,value=0;
-        unsigned matrix[X][Y];
-        starpu_data_handle_t data_handles[X][Y];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        matrix[x][y] = (rank+1)*10 + value;
-                        value++;
-                }
-        }
-#if 0
-        for(x = 0; x < X; x++)
-	{
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < Y; y++)
-		{
-                        FPRINTF(stdout, "%3u ", matrix[x][y]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        int mpi_rank = my_distrib(x, y, size);
-                        if (mpi_rank == rank)
-			{
-                                //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
-                        }
-                        else if (rank == mpi_rank+1 || rank == mpi_rank-1)
-			{
-                                /* I don't own that index, but will need it for my computations */
-                                //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
-                        }
-                        else
-			{
-                                /* I know it's useless to allocate anything for this */
-                                data_handles[x][y] = NULL;
-                        }
-                        if (data_handles[x][y])
-			{
-                                starpu_data_set_rank(data_handles[x][y], mpi_rank);
-                                starpu_data_set_tag(data_handles[x][y], (y*X)+x);
-			}
-                }
-        }
-
-	mycodelet.name = "codelet1";
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-	mycodelet.name = "codelet2";
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-	mycodelet.name = "codelet3";
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-	mycodelet.name = "codelet4";
-        ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-
-        FPRINTF(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-
-        for(x = 0; x < X; x++)
-	{
-                for (y = 0; y < Y; y++)
-		{
-                        if (data_handles[x][y])
-                                starpu_data_unregister(data_handles[x][y]);
-                }
-        }
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-#if 0
-        for(x = 0; x < X; x++)
-	{
-                FPRINTF(stdout, "[%d] ", rank);
-                for (y = 0; y < Y; y++)
-		{
-                        FPRINTF(stdout, "%3u ", matrix[x][y]);
-                }
-                FPRINTF(stdout, "\n");
-        }
-#endif
-
-	return 0;
-}

+ 0 - 180
mpi/tests/insert_task_owner.c

@@ -1,180 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	int node;
-	int rank;
-
-        starpu_codelet_unpack_args(_args, &node);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	FPRINTF(stderr, "Expected node: %d - Actual node: %d\n", node, rank);
-
-	assert(node == rank);
-}
-
-struct starpu_codelet mycodelet_r_w =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_R, STARPU_W}
-};
-
-struct starpu_codelet mycodelet_rw_r =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
-};
-
-struct starpu_codelet mycodelet_rw_rw =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_RW}
-};
-
-struct starpu_codelet mycodelet_w_r =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_W, STARPU_R}
-};
-
-struct starpu_codelet mycodelet_r_r =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_R, STARPU_R}
-};
-
-int main(int argc, char **argv)
-{
-        int ret, rank, size, err, node;
-        int x0=32, x1=23;
-        starpu_data_handle_t data_handlesx0;
-        starpu_data_handle_t data_handlesx1;
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        if (size != 2)
-	{
-		if (rank == 0) FPRINTF(stderr, "We need exactly 2 processes.\n");
-                starpu_mpi_shutdown();
-                starpu_shutdown();
-                return STARPU_TEST_SKIPPED;
-        }
-
-        if (rank == 0)
-	{
-                starpu_variable_data_register(&data_handlesx0, 0, (uintptr_t)&x0, sizeof(x0));
-                starpu_data_set_rank(data_handlesx0, rank);
-		starpu_data_set_tag(data_handlesx0, 0);
-                starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(int));
-                starpu_data_set_rank(data_handlesx1, 1);
-		starpu_data_set_tag(data_handlesx1, 1);
-        }
-        else if (rank == 1)
-	{
-                starpu_variable_data_register(&data_handlesx1, 0, (uintptr_t)&x1, sizeof(x1));
-                starpu_data_set_rank(data_handlesx1, rank);
-		starpu_data_set_tag(data_handlesx1, 1);
-                starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(int));
-                starpu_data_set_rank(data_handlesx0, 0);
-		starpu_data_set_tag(data_handlesx0, 0);
-        }
-
-	node = starpu_data_get_rank(data_handlesx1);
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_w,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1,
-				     0);
-        assert(err == 0);
-
-	node = starpu_data_get_rank(data_handlesx0);
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_r,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1,
-				     0);
-        assert(err == 0);
-
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
-				     0);
-        assert(err == -EINVAL);
-
-	node = 1;
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
-				     0);
-        assert(err == 0);
-
-	node = 0;
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
-				     0);
-        assert(err == 0);
-
-	node = 0;
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_r,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
-				     0);
-        assert(err == 0);
-
-        /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
-           going to overwrite the node even though the data model clearly specifies
-           which node is going to execute the codelet */
-	node = 0;
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_w,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
-				     0);
-        assert(err == 0);
-
-        /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
-           going to overwrite the node even though the data model clearly specifies
-           which node is going to execute the codelet */
-	node = 0;
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_w_r,
-				     STARPU_VALUE, &node, sizeof(node),
-				     STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
-				     0);
-        assert(err == 0);
-
-	fprintf(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}
-

+ 0 - 120
mpi/tests/insert_task_owner2.c

@@ -1,120 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]);
-	int *x2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]);
-	int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[3]);
-
-//        FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
-//
-//        *x2 = 45;
-//        *y = 144;
-//
-        FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
-        *y = (*x0 + *x1) * 100;
-        *x1 = 12;
-        *x2 = 24;
-        *x0 = 36;
-        FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
-}
-
-struct starpu_codelet mycodelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 4,
-	.modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_W}
-};
-
-int main(int argc, char **argv)
-{
-        int rank, size, err;
-        int x[3], y=0;
-        int i, ret;
-        starpu_data_handle_t data_handles[4];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        if (rank == 0)
-	{
-                for(i=0 ; i<3 ; i++)
-		{
-                        x[i] = 10*(i+1);
-                        starpu_variable_data_register(&data_handles[i], 0, (uintptr_t)&x[i], sizeof(x[i]));
-                }
-                y = -1;
-                starpu_variable_data_register(&data_handles[3], -1, (uintptr_t)NULL, sizeof(int));
-        }
-        else if (rank == 1)
-	{
-                for(i=0 ; i<3 ; i++)
-		{
-                        x[i] = -1;
-                        starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int));
-                }
-                y=200;
-                starpu_variable_data_register(&data_handles[3], 0, (uintptr_t)&y, sizeof(int));
-        } else
-	{
-                for(i=0 ; i<4 ; i++)
-                        starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int));
-	}
-        FPRINTF(stderr, "[%d][init] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
-
-	for(i=0 ; i<3 ; i++)
-	{
-		starpu_data_set_rank(data_handles[i], 0);
-		starpu_data_set_tag(data_handles[i], i);
-	}
-	starpu_data_set_rank(data_handles[3], 1);
-	starpu_data_set_tag(data_handles[3], 3);
-
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
-                                     STARPU_R, data_handles[0], STARPU_RW, data_handles[1],
-                                     STARPU_W, data_handles[2],
-                                     STARPU_W, data_handles[3],
-                                     STARPU_EXECUTE_ON_NODE, 1, 0);
-	STARPU_CHECK_RETURN_VALUE(err, "starpu_mpi_insert_task");
-        starpu_task_wait_for_all();
-
-        int *values = malloc(4 * sizeof(int *));
-        for(i=0 ; i<4 ; i++)
-	{
-                starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL);
-		if (rank == 0) {
-			starpu_data_acquire(data_handles[i], STARPU_R);
-			values[i] = *((int *)starpu_handle_get_local_ptr(data_handles[i]));
-		}
-        }
-        FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]);
-        FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}
-

+ 0 - 99
mpi/tests/insert_task_owner_data.c

@@ -1,99 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <math.h>
-#include "helper.h"
-
-void func_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]);
-
-	*x0 += 1;
-	*x1 *= *x1;
-}
-
-struct starpu_codelet mycodelet =
-{
-	.where = STARPU_CPU,
-	.cpu_funcs = {func_cpu, NULL},
-        .nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_RW}
-};
-
-int main(int argc, char **argv)
-{
-        int rank, size, err;
-        int x[2];
-        int ret, i;
-        starpu_data_handle_t data_handles[2];
-	int values[2];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-        if (rank == 0)
-	{
-		x[0] = 11;
-		starpu_variable_data_register(&data_handles[0], 0, (uintptr_t)&x[0], sizeof(x[0]));
-		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
-        }
-        else if (rank == 1)
-	{
-		x[1] = 12;
-		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
-		starpu_variable_data_register(&data_handles[1], 0, (uintptr_t)&x[1], sizeof(x[1]));
-        }
-	else
-	{
-		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
-		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
-        }
-
-	starpu_data_set_rank(data_handles[0], 0);
-	starpu_data_set_tag(data_handles[0], 0);
-	starpu_data_set_rank(data_handles[1], 1);
-	starpu_data_set_tag(data_handles[1], 1);
-
-        err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
-                                     STARPU_RW, data_handles[0], STARPU_RW, data_handles[1],
-                                     STARPU_EXECUTE_ON_DATA, data_handles[1],
-				     0);
-        assert(err == 0);
-        starpu_task_wait_for_all();
-
-        for(i=0 ; i<2 ; i++)
-	{
-                starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL);
-		if (rank == 0) {
-			starpu_data_acquire(data_handles[i], STARPU_R);
-			values[i] = *((int *)starpu_handle_get_local_ptr(data_handles[i]));
-		}
-        }
-        FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]);
-	ret = 0;
-	if (rank == 0 && (values[0] != 12 || values[1] != 144))
-		ret = EXIT_FAILURE;
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return ret;
-}
-

+ 0 - 80
mpi/tests/mpi_detached_tag.c

@@ -1,80 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		starpu_tag_t tag = (starpu_tag_t)loop;
-
-		if ((loop % 2) == rank)
-		{
-			starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
-		}
-		else
-		{
-			starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
-		}
-
-		starpu_tag_wait(tag);
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 79
mpi/tests/mpi_irecv.c

@@ -1,79 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		if ((loop % 2) == rank)
-		{
-			starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_req req;
-			starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD);
-			starpu_mpi_wait(&req, &status);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 97
mpi/tests/mpi_irecv_detached.c

@@ -1,97 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <common/utils.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-
-void callback(void *arg __attribute__((unused)))
-{
-	unsigned *received = arg;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	*received = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-}
-
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		if (rank == 0)
-		{
-			starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD);
-		}
-		else
-		{
-			int received = 0;
-			starpu_mpi_irecv_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &received);
-
-			_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-			while (!received)
-				_STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
-			_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 80
mpi/tests/mpi_isend.c

@@ -1,80 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		if ((loop % 2) == rank)
-		{
-			MPI_Status status;
-			starpu_mpi_req req;
-			starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD);
-			starpu_mpi_wait(&req, &status);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 98
mpi/tests/mpi_isend_detached.c

@@ -1,98 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include <common/utils.h>
-#include <pthread.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-static float *tab;
-static starpu_data_handle_t tab_handle;
-
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-
-void callback(void *arg __attribute__((unused)))
-{
-	unsigned *sent = arg;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-	*sent = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&cond);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-}
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		if (rank == 0)
-		{
-			int sent = 0;
-			starpu_mpi_isend_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &sent);
-
-			_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
-			while (!sent)
-				_STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
-			_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 86
mpi/tests/mpi_test.c

@@ -1,86 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		starpu_mpi_req req;
-
-		if ((loop % 2) == rank)
-		{
-                        starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD);
-		}
-		else
-		{
-			starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD);
-		}
-
-		int finished = 0;
-		do
-		{
-			MPI_Status status;
-			starpu_mpi_test(&req, &finished, &status);
-		}
-		while (!finished);
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 92
mpi/tests/multiple_send.c

@@ -1,92 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-        unsigned send[2] = {42, 11};
-        unsigned recv[2] = {33, 33};
-        starpu_mpi_req req[2];
-        starpu_data_handle_t send_handle[2];
-        starpu_data_handle_t recv_handle[2];
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-                starpu_mpi_shutdown();
-                starpu_shutdown();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	starpu_variable_data_register(&send_handle[0], 0, (uintptr_t)&send[0], sizeof(unsigned));
-	starpu_variable_data_register(&send_handle[1], 0, (uintptr_t)&send[1], sizeof(unsigned));
-	starpu_variable_data_register(&recv_handle[0], 0, (uintptr_t)&recv[0], sizeof(unsigned));
-	starpu_variable_data_register(&recv_handle[1], 0, (uintptr_t)&recv[1], sizeof(unsigned));
-
-        if (rank == 0)
-	{
-                starpu_mpi_isend(send_handle[0], &(req[0]), 1, 12, MPI_COMM_WORLD);
-                starpu_mpi_isend(send_handle[1], &(req[1]), 1, 13, MPI_COMM_WORLD);
-        }
-        else if (rank == 1)
-	{
-                starpu_mpi_irecv(recv_handle[0], &(req[0]), 0, 12, MPI_COMM_WORLD);
-                starpu_mpi_irecv(recv_handle[1], &(req[1]), 0, 13, MPI_COMM_WORLD);
-        }
-
-        if (rank == 0 || rank == 1)
-	{
-                int nb_req=2;
-                while (nb_req)
-		{
-                        int r=0;
-                        for(r=0 ; r<2 ; r++)
-			{
-                                if (req[r])
-				{
-                                        int finished = 0;
-                                        MPI_Status status;
-                                        starpu_mpi_test(&req[r], &finished, &status);
-                                        STARPU_ASSERT(finished != -1);
-                                        if (finished)
-					{
-                                                FPRINTF(stderr, "[%d] Request %d finished\n", rank, r);
-                                                req[r] = NULL;
-                                                nb_req--;
-                                        }
-                                }
-                        }
-                }
-        }
-        FPRINTF(stderr, "[%d] All requests finished\n", rank);
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 76
mpi/tests/pingpong.c

@@ -1,76 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-#define SIZE	16
-
-float *tab;
-starpu_data_handle_t tab_handle;
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size != 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need exactly 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	tab = malloc(SIZE*sizeof(float));
-
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-	int other_rank = (rank + 1)%2;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		if ((loop % 2) == rank)
-		{
-			starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	return 0;
-}

+ 0 - 129
mpi/tests/ring.c

@@ -1,129 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-
-unsigned token = 42;
-starpu_data_handle_t token_handle;
-
-#ifdef STARPU_USE_CUDA
-extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
-#endif
-
-void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *tokenptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
-	(*tokenptr)++;
-}
-
-static struct starpu_codelet increment_cl =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {increment_cuda, NULL},
-#endif
-	.cpu_funcs = {increment_cpu, NULL},
-	.nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-void increment_token(void)
-{
-	struct starpu_task *task = starpu_task_create();
-
-	task->cl = &increment_cl;
-	task->handles[0] = token_handle;
-	task->synchronous = 1;
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	unsigned last_loop = nloops - 1;
-	unsigned last_rank = size - 1;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		int tag = loop*size + rank;
-
-		if (loop == 0 && rank == 0)
-		{
-			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status);
-		}
-
-		increment_token();
-
-		if (loop == last_loop && rank == last_rank)
-		{
-			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
-			starpu_data_release(token_handle);
-		}
-		else
-		{
-			starpu_mpi_send(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	if (rank == last_rank)
-	{
-		STARPU_ASSERT(token == nloops*size);
-	}
-
-	return 0;
-}

+ 0 - 133
mpi/tests/ring_async.c

@@ -1,133 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-
-unsigned token = 42;
-starpu_data_handle_t token_handle;
-
-#ifdef STARPU_USE_CUDA
-extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
-#endif
-
-void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *tokenptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
-	(*tokenptr)++;
-}
-
-static struct starpu_codelet increment_cl =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {increment_cuda, NULL},
-#endif
-	.cpu_funcs = {increment_cpu, NULL},
-	.nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-void increment_token(void)
-{
-	struct starpu_task *task = starpu_task_create();
-
-	task->cl = &increment_cl;
-	task->handles[0] = token_handle;
-	task->synchronous = 1;
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize();
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize");
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	unsigned last_loop = nloops - 1;
-	unsigned last_rank = size - 1;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		int tag = loop*size + rank;
-
-		if (loop == 0 && rank == 0)
-		{
-			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
-		}
-		else
-		{
-			MPI_Status status;
-			starpu_mpi_req req;
-			starpu_mpi_irecv(token_handle, &req, (rank+size-1)%size, tag, MPI_COMM_WORLD);
-			starpu_mpi_wait(&req, &status);
-		}
-
-		increment_token();
-
-		if (loop == last_loop && rank == last_rank)
-		{
-			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
-			starpu_data_release(token_handle);
-		}
-		else {
-			starpu_mpi_req req;
-			MPI_Status status;
-			starpu_mpi_isend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD);
-			starpu_mpi_wait(&req, &status);
-		}
-	}
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-	MPI_Finalize();
-
-	if (rank == last_rank)
-	{
-		STARPU_ASSERT(token == nloops*size);
-	}
-
-	return 0;
-}

+ 0 - 133
mpi/tests/ring_async_implicit.c

@@ -1,133 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu_mpi.h>
-#include "helper.h"
-
-#define NITER	2048
-
-unsigned token = 42;
-starpu_data_handle_t token_handle;
-
-#ifdef STARPU_USE_CUDA
-extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
-#endif
-
-void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	unsigned *tokenptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
-	(*tokenptr)++;
-}
-
-static struct starpu_codelet increment_cl =
-{
-	.where = STARPU_CPU|STARPU_CUDA,
-#ifdef STARPU_USE_CUDA
-	.cuda_funcs = {increment_cuda, NULL},
-#endif
-	.cpu_funcs = {increment_cpu, NULL},
-	.nbuffers = 1,
-	.modes = {STARPU_RW}
-};
-
-void increment_token(void)
-{
-	struct starpu_task *task = starpu_task_create();
-
-	task->cl = &increment_cl;
-	task->handles[0] = token_handle;
-
-	int ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-int main(int argc, char **argv)
-{
-	int ret, rank, size;
-
-#if 0
-	MPI_Init(NULL, NULL);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-#endif
-
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_initialize_extended(&rank, &size);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-
-	if (size < 2)
-	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
-	}
-
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(unsigned));
-
-	unsigned nloops = NITER;
-	unsigned loop;
-
-	unsigned last_loop = nloops - 1;
-	unsigned last_rank = size - 1;
-
-	for (loop = 0; loop < nloops; loop++)
-	{
-		int tag = loop*size + rank;
-
-		if (loop == 0 && rank == 0)
-		{
-			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
-		}
-		else
-		{
-			starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL);
-		}
-
-		increment_token();
-
-		if (loop == last_loop && rank == last_rank)
-		{
-			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
-			starpu_data_release(token_handle);
-		}
-		else
-		{
-			starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL);
-		}
-	}
-
-	starpu_task_wait_for_all();
-
-	starpu_mpi_shutdown();
-	starpu_shutdown();
-
-        //MPI_Finalize();
-
-	if (rank == last_rank)
-	{
-                FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size);
-                STARPU_ASSERT(token == nloops*size);
-	}
-
-	return 0;
-}

+ 0 - 32
mpi/tests/ring_kernel.cu

@@ -1,32 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-
-static __global__ void cuda_incrementer(unsigned *token)
-{
-	(*token)++;
-}
-
-extern "C" void increment_cuda(void *descr[], void *_args)
-{
-	(void) _args;
-	unsigned *tokenptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
-
-	cuda_incrementer<<<1,1, 0, starpu_cuda_get_local_stream()>>>(tokenptr);
-	cudaStreamSynchronize(starpu_cuda_get_local_stream());
-}