Browse Source

merge trunk

Samuel Thibault 11 years ago
parent
commit
8e4b36633b
34 changed files with 357879 additions and 170 deletions
  1. 1 0
      AUTHORS
  2. 19 19
      doc/doxygen/chapters/api/scheduling_context_hypervisor.doxy
  3. 356873 0
      doc/doxygen/chapters/data_trace.eps
  4. BIN
      doc/doxygen/chapters/data_trace.pdf
  5. BIN
      doc/doxygen/chapters/data_trace.png
  6. 16 0
      doc/doxygen/chapters/mpi_support.doxy
  7. 13 0
      doc/doxygen/chapters/performance_feedback.doxy
  8. 7 7
      doc/doxygen/chapters/scheduling_context_hypervisor.doxy
  9. 4 2
      examples/lu/lu_example.c
  10. 1 0
      include/starpu_fxt.h
  11. 31 2
      mpi/examples/Makefile.am
  12. 19 0
      mpi/examples/mpi_lu/pdlu_implicit.c
  13. 6 0
      mpi/examples/mpi_lu/plu_example.c
  14. 357 0
      mpi/examples/mpi_lu/plu_implicit_example.c
  15. 19 0
      mpi/examples/mpi_lu/plu_implicit_example_double.c
  16. 19 0
      mpi/examples/mpi_lu/plu_implicit_example_float.c
  17. 19 0
      mpi/examples/mpi_lu/pslu_implicit.c
  18. 162 0
      mpi/examples/mpi_lu/pxlu_implicit.c
  19. 5 5
      sc_hypervisor/examples/app_driven_test/app_driven_test.c
  20. 76 76
      sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c
  21. 16 16
      sc_hypervisor/include/sc_hypervisor_config.h
  22. 20 20
      sc_hypervisor/src/sc_config.c
  23. 2 0
      src/common/fxt.h
  24. 4 10
      src/core/disk_ops/disk_stdio.c
  25. 6 5
      src/core/disk_ops/unistd/disk_unistd_global.c
  26. 1 1
      src/core/perfmodel/perfmodel_history.c
  27. 1 1
      src/core/perfmodel/perfmodel_print.c
  28. 11 0
      src/datawizard/coherency.c
  29. 14 2
      src/datawizard/malloc.c
  30. 103 0
      src/debug/traces/starpu_fxt.c
  31. 1 1
      tests/disk/disk_compute.c
  32. 1 1
      tests/disk/disk_copy.c
  33. 8 2
      tools/Makefile.am
  34. 44 0
      tools/starpu_fxt_data_trace.c

+ 1 - 0
AUTHORS

@@ -21,3 +21,4 @@ François Tessier <francois.tessier@inria.fr>
 Samuel Thibault <samuel.thibault@labri.fr>
 Pierre-André Wacrenier <wacrenier@labri.fr>
 Andra Hugo <andra.hugo@inria.fr>
+Joris Pablo <joris.pablo@orange.fr>

+ 19 - 19
doc/doxygen/chapters/api/scheduling_context_hypervisor.doxy

@@ -178,75 +178,75 @@ finished executing
 \ingroup API_Scheduling_Context_Hypervisor
 Requires reconsidering the distribution of ressources over the indicated scheduling contexts 
 
-\fn void sc_hypervisor_ioctl(unsigned sched_ctx, ...)
+\fn void sc_hypervisor_ctl(unsigned sched_ctx, ...)
 \ingroup API_Scheduling_Context_Hypervisor
 Inputs conditions to the context sched_ctx with the following
 arguments. The argument list must be zero-terminated.
 
-\def HYPERVISOR_MAX_IDLE
+\def SC_HYPERVISOR_MAX_IDLE
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 3 arguments: an array of int for the workerids to apply
 the condition, an int to indicate the size of the array, and a double
 value indicating the maximum idle time allowed for a worker before the
 resizing process should be triggered
 
-\def HYPERVISOR_PRIORITY
+\def SC_HYPERVISOR_PRIORITY
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 3 arguments: an array of int for the workerids to apply
 the condition, an int to indicate the size of the array, and an int
 value indicating the priority of the workers previously mentioned. The
 workers with the smallest priority are moved the first.
 
-\def HYPERVISOR_MIN_WORKERS
+\def SC_HYPERVISOR_MIN_WORKERS
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument(int) indicating the minimum number of workers a
 context should have, underneath this limit the context cannot execute.
 
-\def HYPERVISOR_MAX_WORKERS
+\def SC_HYPERVISOR_MAX_WORKERS
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument(int) indicating the maximum number of workers a
 context should have, above this limit the context would not be able to
 scale
 
-\def HYPERVISOR_GRANULARITY
+\def SC_HYPERVISOR_GRANULARITY
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument(int) indicating the granularity of the resizing
 process (the number of workers should be moved from the context once
 it is resized) This parameter is ignore for the Gflops rate based
 strategy (see \ref ResizingStrategies), the number of workers that have to
 be moved is calculated by the strategy.
 
-\def HYPERVISOR_FIXED_WORKERS
+\def SC_HYPERVISOR_FIXED_WORKERS
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 2 arguments: an array of int for the workerids to apply
 the condition and an int to indicate the size of the array. These
 workers are not allowed to be moved from the context.
 
-\def HYPERVISOR_MIN_TASKS
+\def SC_HYPERVISOR_MIN_TASKS
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument (int) that indicated the minimum number of
 tasks that have to be executed before the context could be resized.
 This parameter is ignored for the Application Driven strategy (see \ref 
 ResizingStrategies) where the user indicates exactly when the resize
 should be done.
 
-\def HYPERVISOR_NEW_WORKERS_MAX_IDLE
+\def SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument, a double value indicating the maximum idle
 time allowed for workers that have just been moved from other contexts
 in the current context.
 
-\def HYPERVISOR_TIME_TO_APPLY
+\def SC_HYPERVISOR_TIME_TO_APPLY
 \ingroup API_Scheduling_Context_Hypervisor
-This macro is used when calling sc_hypervisor_ioctl() and must be
+This macro is used when calling sc_hypervisor_ctl() and must be
 followed by 1 argument (int) indicating the tag an executed task
 should have such that this configuration should be taken into account.
 

File diff suppressed because it is too large
+ 356873 - 0
doc/doxygen/chapters/data_trace.eps


BIN
doc/doxygen/chapters/data_trace.pdf


BIN
doc/doxygen/chapters/data_trace.png


+ 16 - 0
doc/doxygen/chapters/mpi_support.doxy

@@ -375,3 +375,19 @@ starpu_mpi_gather_detached(data_handles, nblocks, 0, MPI_COMM_WORLD);
 \endcode
 
 */
+
+\section MPIExamples More MPI examples
+
+MPI examples are available in the StarPU source code in mpi/examples:
+
+<ul>
+<li><c>complex</c> is a simple example using a user-define data interface over
+MPI (complex numbers),
+<li><c>stencil5</c> is a simple stencil example using <c>starpu_mpi_insert_task</c>,
+<li><c>matrix_decomposition</c> is a cholesky decomposition example using
+<c>starpu_mpi_insert_task</c>. The non-distributed version can check for
+<algorithm correctness in 1-node configuration, the distributed version uses
+exactly the same source code, to be used over MPI,
+<li><c>mpi_lu</c> is an LU decomposition example, provided in both explicit and
+implicit versions.
+</ul>

+ 13 - 0
doc/doxygen/chapters/performance_feedback.doxy

@@ -582,6 +582,19 @@ Computation took (in ms)
 Synthetic GFlops : 44.21
 \endverbatim
 
+\section DataTrace Data trace and tasks length
+It is possible to get statistics about tasks length and data size by using :
+\verbatim
+$starpu_fxt_data_trace filename
+\endverbatim
+Where filename is the FxT trace file. This will create 2 files : <c>data_total.txt</c> which
+shows each task length and total data size and <c>data_trace.gp</c> which can be plotted to 
+get a .eps image of these results. On the image, each point represents a task.
+
+\image html data_trace.png
+\image latex data_trace.eps "" width=\textwidth
+
+
 \internal
 TODO: data transfer stats are similar to the ones displayed when
 setting STARPU_BUS_STATS

+ 7 - 7
doc/doxygen/chapters/scheduling_context_hypervisor.doxy

@@ -92,10 +92,10 @@ sc_hypervisor_resize(sched_ctx, 2);
 The user can use the same tag to change the resizing configuration of the contexts if he considers it necessary.
 
 \code{.c}
-sc_hypervisor_ioctl(sched_ctx,
-                    HYPERVISOR_MIN_WORKERS, 6,
-                    HYPERVISOR_MAX_WORKERS, 12,
-                    HYPERVISOR_TIME_TO_APPLY, 2,
+sc_hypervisor_ctl(sched_ctx,
+                    SC_HYPERVISOR_MIN_WORKERS, 6,
+                    SC_HYPERVISOR_MAX_WORKERS, 12,
+                    SC_HYPERVISOR_TIME_TO_APPLY, 2,
                     NULL);
 \endcode
 
@@ -107,9 +107,9 @@ for a period longer than the one imposed by the user
 \code{.c}
 int workerids[3] = {1, 3, 10};
 int workerids2[9] = {0, 2, 4, 5, 6, 7, 8, 9, 11};
-sc_hypervisor_ioctl(sched_ctx_id,
-            HYPERVISOR_MAX_IDLE, workerids, 3, 10000.0,
-            HYPERVISOR_MAX_IDLE, workerids2, 9, 50000.0,
+sc_hypervisor_ctl(sched_ctx_id,
+            SC_HYPERVISOR_MAX_IDLE, workerids, 3, 10000.0,
+            SC_HYPERVISOR_MAX_IDLE, workerids2, 9, 50000.0,
             NULL);
 \endcode
 

+ 4 - 2
examples/lu/lu_example.c

@@ -182,6 +182,8 @@ static void init_matrix(void)
 			/* also randomize the imaginary component for complex number cases */
 			A[i + j*size] += (TYPE)(I*starpu_drand48());
 #endif
+			if (i == j)
+				A[i + j*size] *= 100;
 		}
 	}
 
@@ -299,13 +301,13 @@ int main(int argc, char **argv)
 {
 	int ret;
 
-	parse_args(argc, argv);
-
 #ifdef STARPU_QUICK_CHECK
 	size /= 4;
 	nblocks /= 4;
 #endif
 
+	parse_args(argc, argv);
+
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 		return 77;

+ 1 - 0
include/starpu_fxt.h

@@ -65,6 +65,7 @@ void starpu_fxt_options_init(struct starpu_fxt_options *options);
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
 void starpu_fxt_start_profiling(void);
 void starpu_fxt_stop_profiling(void);
+void starpu_fxt_write_data_trace(char *filename_in);
 
 #ifdef __cplusplus
 }

+ 31 - 2
mpi/examples/Makefile.am

@@ -44,13 +44,15 @@ BUILT_SOURCES =
 
 CLEANFILES = *.gcno *.gcda *.linkinfo
 
-EXTRA_DIST = 					\
+EXTRA_DIST = 				\
 	mpi_lu/mpi_lu-float.h		\
 	mpi_lu/mpi_lu-double.h		\
 	mpi_lu/plu_example.c		\
+	mpi_lu/plu_implicit_example.c	\
 	mpi_lu/plu_solve.c		\
 	mpi_lu/pxlu.h			\
 	mpi_lu/pxlu.c			\
+	mpi_lu/pxlu_implicit.c		\
 	mpi_lu/pxlu_kernels.h		\
 	mpi_lu/pxlu_kernels.c		\
 	matrix_decomposition/mpi_cholesky_codelets.h 	\
@@ -101,7 +103,9 @@ if !NO_BLAS_LIB
 
 examplebin_PROGRAMS += 			\
 	mpi_lu/plu_example_float	\
-	mpi_lu/plu_example_double
+	mpi_lu/plu_example_double	\
+	mpi_lu/plu_implicit_example_float	\
+	mpi_lu/plu_implicit_example_double
 
 mpi_lu_plu_example_float_LDADD =	\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
@@ -126,8 +130,33 @@ mpi_lu_plu_example_double_SOURCES =	\
 	mpi_lu/pdlu_kernels.c	    	\
 	mpi_lu/pdlu.c		    	\
 	$(top_srcdir)/examples/common/blas.c
+
+mpi_lu_plu_implicit_example_float_LDADD =	\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
+	$(STARPU_LIBNUMA_LDFLAGS)				\
+	$(STARPU_BLAS_LDFLAGS) -lm
+
+mpi_lu_plu_implicit_example_float_SOURCES =	\
+	mpi_lu/plu_implicit_example_float.c	\
+	mpi_lu/plu_solve_float.c		\
+	mpi_lu/pslu_kernels.c			\
+	mpi_lu/pslu_implicit.c			\
+	$(top_srcdir)/examples/common/blas.c
+
+mpi_lu_plu_implicit_example_double_LDADD =	\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
+	$(STARPU_LIBNUMA_LDFLAGS)				\
+	$(STARPU_BLAS_LDFLAGS) -lm
+
+mpi_lu_plu_implicit_example_double_SOURCES =	\
+	mpi_lu/plu_implicit_example_double.c	\
+	mpi_lu/plu_solve_double.c		\
+	mpi_lu/pdlu_kernels.c			\
+	mpi_lu/pdlu_implicit.c			\
+	$(top_srcdir)/examples/common/blas.c
 endif
 
+
 ########################
 # MPI Cholesky example #
 ########################

+ 19 - 0
mpi/examples/mpi_lu/pdlu_implicit.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "mpi_lu-double.h"
+#include "pxlu_implicit.c"

+ 6 - 0
mpi/examples/mpi_lu/plu_example.c

@@ -109,6 +109,12 @@ static void parse_args(int rank, int argc, char **argv)
 			char *argptr;
 			q = strtol(argv[++i], &argptr, 10);
 		}
+
+		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) {
+			fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]);
+			fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n");
+			exit(0);
+		}
 	}
 }
 

+ 357 - 0
mpi/examples/mpi_lu/plu_implicit_example.c

@@ -0,0 +1,357 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include <starpu.h>
+
+#include "pxlu.h"
+//#include "pxlu_kernels.h"
+
+#ifdef STARPU_HAVE_LIBNUMA
+#include <numaif.h>
+#endif
+
+static unsigned long size = 4096;
+static unsigned nblocks = 16;
+static unsigned check = 0;
+static int p = 1;
+static int q = 1;
+static unsigned display = 0;
+
+#ifdef STARPU_HAVE_LIBNUMA
+static unsigned numa = 0;
+#endif
+
+static size_t allocated_memory = 0;
+static size_t allocated_memory_extra = 0;
+
+static starpu_data_handle_t *dataA_handles;
+static TYPE **dataA;
+
+int get_block_rank(unsigned i, unsigned j);
+
+static void parse_args(int argc, char **argv)
+{
+	int i;
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-size") == 0) {
+			char *argptr;
+			size = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-nblocks") == 0) {
+			char *argptr;
+			nblocks = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-check") == 0) {
+			check = 1;
+		}
+
+		if (strcmp(argv[i], "-display") == 0) {
+			display = 1;
+		}
+
+		if (strcmp(argv[i], "-numa") == 0) {
+#ifdef STARPU_HAVE_LIBNUMA
+			numa = 1;
+#else
+			if (rank == 0)
+				fprintf(stderr, "Warning: libnuma is not available\n");
+#endif
+		}
+
+		if (strcmp(argv[i], "-p") == 0) {
+			char *argptr;
+			p = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-q") == 0) {
+			char *argptr;
+			q = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) {
+			fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]);
+			fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n");
+			exit(0);
+		}
+	}
+}
+
+unsigned STARPU_PLU(display_flag)(void)
+{
+	return display;
+}
+
+static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks)
+{
+	const unsigned block_size = (psize/pnblocks);
+
+	unsigned i, j;
+	for (i = 0; i < block_size; i++)
+	     for (j = 0; j < block_size; j++)
+	     {
+		  blockptr[j+i*block_size] = (TYPE)starpu_drand48();
+	     }
+}
+
+static void init_matrix(int rank)
+{
+#ifdef STARPU_HAVE_LIBNUMA
+	if (numa)
+	{
+		fprintf(stderr, "Using INTERLEAVE policy\n");
+		unsigned long nodemask = ((1<<0)|(1<<1));
+		int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3);
+		if (ret)
+			perror("set_mempolicy failed");
+	}
+#endif
+
+	/* Allocate a grid of data handles, not all of them have to be allocated later on */
+	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t));
+	dataA = calloc(nblocks*nblocks, sizeof(TYPE *));
+	allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
+
+	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
+
+	/* Allocate all the blocks that belong to this mpi node */
+	unsigned long i,j;
+	for (j = 0; j < nblocks; j++)
+	{
+		for (i = 0; i < nblocks; i++)
+		{
+			int block_rank = get_block_rank(i, j);
+			TYPE **blockptr = &dataA[j+i*nblocks];
+//			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
+			starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i];
+
+			if (block_rank == rank)
+			{
+				/* This blocks should be treated by the current MPI process */
+				/* Allocate and fill it */
+				starpu_malloc((void **)blockptr, blocksize);
+				allocated_memory += blocksize;
+
+				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
+				fill_block_with_random(*blockptr, size, nblocks);
+				//fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j);
+				if (i == j)
+				{
+					unsigned tmp;
+					for (tmp = 0; tmp < size/nblocks; tmp++)
+					{
+						(*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks;
+					}
+				}
+
+				/* Register it to StarPU */
+				starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM,
+					(uintptr_t)*blockptr, size/nblocks,
+					size/nblocks, size/nblocks, sizeof(TYPE));
+			}
+			else {
+				starpu_matrix_data_register(handleptr, -1,
+					0, size/nblocks,
+					size/nblocks, size/nblocks, sizeof(TYPE));
+				*blockptr = STARPU_POISON_PTR;
+			}
+			starpu_data_set_rank(*handleptr, block_rank);
+			starpu_data_set_tag(*handleptr, j+i*nblocks);
+		}
+	}
+
+	//display_all_blocks(nblocks, size/nblocks);
+}
+
+TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j)
+{
+	return dataA[j+i*nblocks];
+}
+
+int get_block_rank(unsigned i, unsigned j)
+{
+	/* Take a 2D block cyclic distribution */
+	/* NB: p (resp. q) is for "direction" i (resp. j) */
+	return (j % q) * p + (i % p);
+}
+
+starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
+{
+	return dataA_handles[j+i*nblocks];
+}
+
+static void display_grid(int rank, unsigned pnblocks)
+{
+	if (!display)
+		return;
+
+	//if (rank == 0)
+	{
+		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
+
+		unsigned i, j;
+		for (j = 0; j < pnblocks; j++)
+		{
+			for (i = 0; i < pnblocks; i++)
+			{
+				TYPE *blockptr = STARPU_PLU(get_block)(i, j);
+				starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j);
+
+				fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle);
+			}
+			fprintf(stderr, "\n");
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int rank;
+	int world_size;
+
+	starpu_srand48((long int)time(NULL));
+
+	parse_args(argc, argv);
+
+	int ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+
+	STARPU_ASSERT(p*q == world_size);
+
+	starpu_cublas_init();
+
+	/*
+	 * 	Problem Init
+	 */
+
+	init_matrix(rank);
+
+	fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank,
+                        (int)(allocated_memory/(1024*1024)),
+			(int)(allocated_memory_extra/(1024*1024)),
+                        (int)((allocated_memory+allocated_memory_extra)/(1024*1024)));
+
+	display_grid(rank, nblocks);
+
+	TYPE *a_r = NULL;
+//	STARPU_PLU(display_data_content)(a_r, size);
+
+	TYPE *x, *y;
+
+	if (check)
+	{
+		x = calloc(size, sizeof(TYPE));
+		STARPU_ASSERT(x);
+
+		y = calloc(size, sizeof(TYPE));
+		STARPU_ASSERT(y);
+
+		if (rank == 0)
+		{
+			unsigned ind;
+			for (ind = 0; ind < size; ind++)
+				x[ind] = (TYPE)starpu_drand48();
+		}
+
+		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
+
+		if (rank == 0)
+			STARPU_PLU(display_data_content)(a_r, size);
+
+//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
+	}
+
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+
+	/*
+	 * 	Report performance
+	 */
+
+	if (rank == 0)
+	{
+		fprintf(stderr, "Computation took: %f ms\n", timing/1000);
+
+		unsigned n = size;
+		double flop = (2.0f*n*n*n)/3.0f;
+		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	}
+
+	/*
+	 *	Test Result Correctness
+	 */
+
+	if (check)
+	{
+		/*
+		 *	Compute || A - LU ||
+		 */
+
+		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);
+
+#if 0
+		/*
+		 *	Compute || Ax - LUx ||
+		 */
+
+		unsigned ind;
+
+		y2 = calloc(size, sizeof(TYPE));
+		STARPU_ASSERT(y);
+
+		if (rank == 0)
+		{
+			for (ind = 0; ind < size; ind++)
+			{
+				y2[ind] = (TYPE)0.0;
+			}
+		}
+
+		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
+
+		/* Compute y2 = y2 - y */
+		CPU_AXPY(size, -1.0, y, 1, y2, 1);
+
+		TYPE err = CPU_ASUM(size, y2, 1);
+		int max = CPU_IAMAX(size, y2, 1);
+
+		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
+		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
+#endif
+	}
+
+	/*
+	 * 	Termination
+	 */
+
+	starpu_cublas_shutdown();
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return 0;
+}

+ 19 - 0
mpi/examples/mpi_lu/plu_implicit_example_double.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "mpi_lu-double.h"
+#include "plu_implicit_example.c"

+ 19 - 0
mpi/examples/mpi_lu/plu_implicit_example_float.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "mpi_lu-float.h"
+#include "plu_implicit_example.c"

+ 19 - 0
mpi/examples/mpi_lu/pslu_implicit.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "mpi_lu-float.h"
+#include "pxlu_implicit.c"

+ 162 - 0
mpi/examples/mpi_lu/pxlu_implicit.c

@@ -0,0 +1,162 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012, 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "pxlu.h"
+#include "pxlu_kernels.h"
+#include <sys/time.h>
+
+//#define VERBOSE_INIT	1
+
+//#define DEBUG	1
+
+static unsigned no_prio = 0;
+
+static unsigned nblocks = 0;
+static int rank = -1;
+static int world_size = -1;
+
+struct callback_arg {
+	unsigned i, j, k;
+};
+
+/*
+ *	Task 11 (diagonal factorization)
+ */
+
+static void create_task_11(unsigned k)
+{
+	starpu_mpi_insert_task(MPI_COMM_WORLD,
+			&STARPU_PLU(cl11),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_RW, STARPU_PLU(get_block_handle)(k, k),
+			STARPU_PRIORITY, !no_prio ?
+				STARPU_MAX_PRIO : STARPU_MIN_PRIO,
+			0);
+}
+
+/*
+ *	Task 12 (Update lower left (TRSM))
+ */
+
+static void create_task_12(unsigned k, unsigned j)
+{
+#warning temporary fix 
+	starpu_mpi_insert_task(MPI_COMM_WORLD,
+			//&STARPU_PLU(cl12),
+			&STARPU_PLU(cl21),
+			STARPU_VALUE, &j, sizeof(j),
+			STARPU_VALUE, &j, sizeof(j),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_R, STARPU_PLU(get_block_handle)(k, k),
+			STARPU_RW, STARPU_PLU(get_block_handle)(k, j),
+			STARPU_PRIORITY, !no_prio && (j == k+1) ?
+				STARPU_MAX_PRIO : STARPU_MIN_PRIO,
+			0);
+}
+
+/*
+ *	Task 21 (Update upper right (TRSM))
+ */
+
+static void create_task_21(unsigned k, unsigned i)
+{
+#warning temporary fix 
+	starpu_mpi_insert_task(MPI_COMM_WORLD,
+			//&STARPU_PLU(cl21),
+			&STARPU_PLU(cl12),
+			STARPU_VALUE, &i, sizeof(i),
+			STARPU_VALUE, &i, sizeof(i),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_R, STARPU_PLU(get_block_handle)(k, k),
+			STARPU_RW, STARPU_PLU(get_block_handle)(i, k),
+			STARPU_PRIORITY, !no_prio && (i == k+1) ?
+				STARPU_MAX_PRIO : STARPU_MIN_PRIO,
+			0);
+}
+
+/*
+ *	Task 22 (GEMM)
+ */
+
+static void create_task_22(unsigned k, unsigned i, unsigned j)
+{
+	starpu_mpi_insert_task(MPI_COMM_WORLD,
+			&STARPU_PLU(cl22),
+			STARPU_VALUE, &i, sizeof(i),
+			STARPU_VALUE, &j, sizeof(j),
+			STARPU_VALUE, &k, sizeof(k),
+			STARPU_R, STARPU_PLU(get_block_handle)(k, j),
+			STARPU_R, STARPU_PLU(get_block_handle)(i, k),
+			STARPU_RW, STARPU_PLU(get_block_handle)(i, j),
+			STARPU_PRIORITY, !no_prio && (i == k + 1) && (j == k +1) ?
+				STARPU_MAX_PRIO : STARPU_MIN_PRIO,
+			0);
+}
+
+/*
+ *	code to bootstrap the factorization 
+ */
+
+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
+{
+	struct timeval start;
+	struct timeval end;
+
+	nblocks = _nblocks;
+	rank = _rank;
+	world_size = _world_size;
+
+	/* create all the DAG nodes */
+	unsigned i,j,k;
+
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+
+	gettimeofday(&start, NULL);
+
+	for (k = 0; k < nblocks; k++)
+	{
+		create_task_11(k);
+
+		for (i = k+1; i<nblocks; i++)
+		{
+			create_task_12(k, i);
+			create_task_21(k, i);
+		}
+
+		for (i = k+1; i<nblocks; i++)
+		{
+			for (j = k+1; j<nblocks; j++)
+			{
+				create_task_22(k, i, j);
+			}
+		}
+	}
+
+	starpu_task_wait_for_all();
+
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+
+	gettimeofday(&end, NULL);
+
+	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+	
+//	fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
+	
+	return timing;
+}

+ 5 - 5
sc_hypervisor/examples/app_driven_test/app_driven_test.c

@@ -80,11 +80,11 @@ void* submit_tasks_thread(void *arg)
 			task[i]->hypervisor_tag = tag;
 			/* indicate particular settings the context should have when the 
 			   resizing will be done */
-			sc_hypervisor_ioctl(sched_ctx,
-						   HYPERVISOR_TIME_TO_APPLY, tag,
-						   HYPERVISOR_MIN_WORKERS, 2,
-						   HYPERVISOR_MAX_WORKERS, 12,
-						   HYPERVISOR_NULL);
+			sc_hypervisor_ctl(sched_ctx,
+						   SC_HYPERVISOR_TIME_TO_APPLY, tag,
+						   SC_HYPERVISOR_MIN_WORKERS, 2,
+						   SC_HYPERVISOR_MAX_WORKERS, 12,
+						   SC_HYPERVISOR_NULL);
 			printf("require resize for sched_ctx %d at tag %d\n", sched_ctx, tag);
 			/* specify that the contexts should be resized when the task having this
 			   particular tag will finish executing */

+ 76 - 76
sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c

@@ -272,22 +272,22 @@ void construct_contexts(void (*bench)(float*, unsigned, unsigned))
 	p1.nworkers = nworkers1;
 	sc_hypervisor_register_ctx(p1.ctx, 0.0);
 
-	/* sc_hypervisor_ioctl(p1.ctx, */
-	/* 			   HYPERVISOR_MAX_IDLE, p1.workers, p1.nworkers, 5000.0, */
-	/* 			   HYPERVISOR_MAX_IDLE, p1.workers, gpu+gpu1, 100000.0, */
-	/* 			   HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
-	/* 			   HYPERVISOR_GRANULARITY, 2, */
-	/* 			   HYPERVISOR_MIN_TASKS, 1000, */
-	/* 			   HYPERVISOR_NEW_WORKERS_MAX_IDLE, 100000.0, */
-	/* 			   HYPERVISOR_MIN_WORKERS, 6, */
-	/* 			   HYPERVISOR_MAX_WORKERS, 12, */
+	/* sc_hypervisor_ctl(p1.ctx, */
+	/* 			   SC_HYPERVISOR_MAX_IDLE, p1.workers, p1.nworkers, 5000.0, */
+	/* 			   SC_HYPERVISOR_MAX_IDLE, p1.workers, gpu+gpu1, 100000.0, */
+	/* 			   SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
+	/* 			   SC_HYPERVISOR_GRANULARITY, 2, */
+	/* 			   SC_HYPERVISOR_MIN_TASKS, 1000, */
+	/* 			   SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 100000.0, */
+	/* 			   SC_HYPERVISOR_MIN_WORKERS, 6, */
+	/* 			   SC_HYPERVISOR_MAX_WORKERS, 12, */
 	/* 			   NULL); */
 
-	sc_hypervisor_ioctl(p1.ctx,
-				   HYPERVISOR_GRANULARITY, 2,
-				   HYPERVISOR_MIN_TASKS, 1000,
-				   HYPERVISOR_MIN_WORKERS, 6,
-				   HYPERVISOR_MAX_WORKERS, 12,
+	sc_hypervisor_ctl(p1.ctx,
+				   SC_HYPERVISOR_GRANULARITY, 2,
+				   SC_HYPERVISOR_MIN_TASKS, 1000,
+				   SC_HYPERVISOR_MIN_WORKERS, 6,
+				   SC_HYPERVISOR_MAX_WORKERS, 12,
 				   NULL);
 
 	k = 0;
@@ -308,22 +308,22 @@ void construct_contexts(void (*bench)(float*, unsigned, unsigned))
 	p2.nworkers = 0;
 	sc_hypervisor_register_ctx(p2.ctx, 0.0);
 
-	/* sc_hypervisor_ioctl(p2.ctx, */
-	/* 			   HYPERVISOR_MAX_IDLE, p2.workers, p2.nworkers, 2000.0, */
-	/* 			   HYPERVISOR_MAX_IDLE, p2.workers, gpu+gpu2, 5000.0, */
-	/* 			   HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
-	/* 			   HYPERVISOR_GRANULARITY, 2, */
-	/* 			   HYPERVISOR_MIN_TASKS, 500, */
-	/* 			   HYPERVISOR_NEW_WORKERS_MAX_IDLE, 1000.0, */
-	/* 			   HYPERVISOR_MIN_WORKERS, 4, */
-	/* 			   HYPERVISOR_MAX_WORKERS, 8, */
+	/* sc_hypervisor_ctl(p2.ctx, */
+	/* 			   SC_HYPERVISOR_MAX_IDLE, p2.workers, p2.nworkers, 2000.0, */
+	/* 			   SC_HYPERVISOR_MAX_IDLE, p2.workers, gpu+gpu2, 5000.0, */
+	/* 			   SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
+	/* 			   SC_HYPERVISOR_GRANULARITY, 2, */
+	/* 			   SC_HYPERVISOR_MIN_TASKS, 500, */
+	/* 			   SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 1000.0, */
+	/* 			   SC_HYPERVISOR_MIN_WORKERS, 4, */
+	/* 			   SC_HYPERVISOR_MAX_WORKERS, 8, */
 	/* 			   NULL); */
 
-	sc_hypervisor_ioctl(p2.ctx,
-				   HYPERVISOR_GRANULARITY, 2,
-				   HYPERVISOR_MIN_TASKS, 500,
-				   HYPERVISOR_MIN_WORKERS, 0,
-				   HYPERVISOR_MAX_WORKERS, 6,
+	sc_hypervisor_ctl(p2.ctx,
+				   SC_HYPERVISOR_GRANULARITY, 2,
+				   SC_HYPERVISOR_MIN_TASKS, 500,
+				   SC_HYPERVISOR_MIN_WORKERS, 0,
+				   SC_HYPERVISOR_MAX_WORKERS, 6,
 				   NULL);
 
 }
@@ -337,33 +337,33 @@ void set_hypervisor_conf(int event, int task_tag)
 /* 		{ */
 /* 			if(it < 2) */
 /* 			{ */
-/* 				sc_hypervisor_ioctl(p2.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 2, */
-/* 							   HYPERVISOR_MAX_WORKERS, 4, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p2.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 2, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 4, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 
 /* 				printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag); */
-/* 				sc_hypervisor_ioctl(p1.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 6, */
-/* 							   HYPERVISOR_MAX_WORKERS, 8, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p1.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 6, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 8, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 /* 				printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag); */
 /* 				sc_hypervisor_resize(p1.ctx, task_tag); */
 /* 			} */
 /* 			if(it == 2) */
 /* 			{ */
-/* 				sc_hypervisor_ioctl(p2.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 12, */
-/* 							   HYPERVISOR_MAX_WORKERS, 12, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p2.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 12, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 12, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 /* 				printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag); */
-/* 				sc_hypervisor_ioctl(p1.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 0, */
-/* 							   HYPERVISOR_MAX_WORKERS, 0, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p1.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 0, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 0, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 /* 				printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag); */
 /* 				sc_hypervisor_resize(p1.ctx, task_tag); */
@@ -378,16 +378,16 @@ void set_hypervisor_conf(int event, int task_tag)
 /* 		{ */
 /* 			if(it2 < 3) */
 /* 			{ */
-/* 				sc_hypervisor_ioctl(p1.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 6, */
-/* 							   HYPERVISOR_MAX_WORKERS, 12, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p1.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 6, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 12, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 /* 				printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag); */
-/* 				sc_hypervisor_ioctl(p2.ctx, */
-/* 							   HYPERVISOR_MIN_WORKERS, 0, */
-/* 							   HYPERVISOR_MAX_WORKERS, 0, */
-/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 				sc_hypervisor_ctl(p2.ctx, */
+/* 							   SC_HYPERVISOR_MIN_WORKERS, 0, */
+/* 							   SC_HYPERVISOR_MAX_WORKERS, 0, */
+/* 							   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 /* 							   NULL); */
 /* 				printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag); */
 /* 				sc_hypervisor_resize(p2.ctx, task_tag); */
@@ -401,9 +401,9 @@ void set_hypervisor_conf(int event, int task_tag)
 	/* 	if(event == START_BENCH) */
 	/* 	{ */
 	/* 		int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 		sc_hypervisor_ioctl(p1.ctx, */
-	/* 					   HYPERVISOR_MAX_IDLE, workers, 12, 800000.0, */
-	/* 					   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 		sc_hypervisor_ctl(p1.ctx, */
+	/* 					   SC_HYPERVISOR_MAX_IDLE, workers, 12, 800000.0, */
+	/* 					   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 	/* 					   NULL); */
 	/* 	} */
 	/* 	else */
@@ -411,20 +411,20 @@ void set_hypervisor_conf(int event, int task_tag)
 	/* 		if(it2 < 2) */
 	/* 		{ */
 	/* 			int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 			sc_hypervisor_ioctl(p2.ctx, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 12, 500.0, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 3, 200.0, */
-	/* 						   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 			sc_hypervisor_ctl(p2.ctx, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 12, 500.0, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 3, 200.0, */
+	/* 						   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 	/* 						   NULL); */
 	/* 		} */
 	/* 		if(it2 == 2) */
 	/* 		{ */
 	/* 			int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 			sc_hypervisor_ioctl(p2.ctx, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 12, 1000.0, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 3, 500.0, */
-	/* 						   HYPERVISOR_TIME_TO_APPLY, task_tag, */
-	/* 						   HYPERVISOR_MAX_WORKERS, 12, */
+	/* 			sc_hypervisor_ctl(p2.ctx, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 12, 1000.0, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 3, 500.0, */
+	/* 						   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 						   SC_HYPERVISOR_MAX_WORKERS, 12, */
 	/* 						   NULL); */
 	/* 		} */
 	/* 		it2++; */
@@ -434,10 +434,10 @@ void set_hypervisor_conf(int event, int task_tag)
 	/* 	if(event == START_BENCH) */
 	/* 	{ */
 	/* 		int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 		sc_hypervisor_ioctl(p1.ctx, */
-	/* 					   HYPERVISOR_MAX_IDLE, workers, 12, 1500.0, */
-	/* 					   HYPERVISOR_MAX_IDLE, workers, 3, 4000.0, */
-	/* 					   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 		sc_hypervisor_ctl(p1.ctx, */
+	/* 					   SC_HYPERVISOR_MAX_IDLE, workers, 12, 1500.0, */
+	/* 					   SC_HYPERVISOR_MAX_IDLE, workers, 3, 4000.0, */
+	/* 					   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 	/* 					   NULL); */
 	/* 	} */
 	/* 	if(event == END_BENCH) */
@@ -445,19 +445,19 @@ void set_hypervisor_conf(int event, int task_tag)
 	/* 		if(it < 2) */
 	/* 		{ */
 	/* 			int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 			sc_hypervisor_ioctl(p1.ctx, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 12, 100.0, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 3, 5000.0, */
-	/* 						   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 			sc_hypervisor_ctl(p1.ctx, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 12, 100.0, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 3, 5000.0, */
+	/* 						   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 	/* 						   NULL); */
 	/* 		} */
 	/* 		if(it == 2) */
 	/* 		{ */
 	/* 			int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
-	/* 			sc_hypervisor_ioctl(p1.ctx, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 12, 5000.0, */
-	/* 						   HYPERVISOR_MAX_IDLE, workers, 3, 10000.0, */
-	/* 						   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+	/* 			sc_hypervisor_ctl(p1.ctx, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 12, 5000.0, */
+	/* 						   SC_HYPERVISOR_MAX_IDLE, workers, 3, 10000.0, */
+	/* 						   SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
 	/* 						   NULL); */
 	/* 		} */
 

+ 16 - 16
sc_hypervisor/include/sc_hypervisor_config.h

@@ -24,21 +24,21 @@ extern "C"
 {
 #endif
 
-/* ioctl properties*/
-#define HYPERVISOR_MAX_IDLE -1
-#define HYPERVISOR_MIN_WORKING -2
-#define HYPERVISOR_PRIORITY -3
-#define HYPERVISOR_MIN_WORKERS -4
-#define HYPERVISOR_MAX_WORKERS -5
-#define HYPERVISOR_GRANULARITY -6
-#define HYPERVISOR_FIXED_WORKERS -7
-#define HYPERVISOR_MIN_TASKS -8
-#define HYPERVISOR_NEW_WORKERS_MAX_IDLE -9
-#define HYPERVISOR_TIME_TO_APPLY -10
-#define HYPERVISOR_EMPTY_CTX_MAX_IDLE -11
-#define HYPERVISOR_NULL -12
-#define	HYPERVISOR_ISPEED_W_SAMPLE -13
-#define HYPERVISOR_ISPEED_CTX_SAMPLE -14
+/* ctl properties*/
+#define SC_HYPERVISOR_MAX_IDLE -1
+#define SC_HYPERVISOR_MIN_WORKING -2
+#define SC_HYPERVISOR_PRIORITY -3
+#define SC_HYPERVISOR_MIN_WORKERS -4
+#define SC_HYPERVISOR_MAX_WORKERS -5
+#define SC_HYPERVISOR_GRANULARITY -6
+#define SC_HYPERVISOR_FIXED_WORKERS -7
+#define SC_HYPERVISOR_MIN_TASKS -8
+#define SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE -9
+#define SC_HYPERVISOR_TIME_TO_APPLY -10
+#define SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE -11
+#define SC_HYPERVISOR_NULL -12
+#define	SC_HYPERVISOR_ISPEED_W_SAMPLE -13
+#define SC_HYPERVISOR_ISPEED_CTX_SAMPLE -14
 
 
 #define MAX_IDLE_TIME 5000000000
@@ -90,7 +90,7 @@ void sc_hypervisor_set_config(unsigned sched_ctx, void *config);
 struct sc_hypervisor_policy_config *sc_hypervisor_get_config(unsigned sched_ctx);
 
 /* impose different parameters to a configuration of a context */
-void sc_hypervisor_ioctl(unsigned sched_ctx, ...);
+void sc_hypervisor_ctl(unsigned sched_ctx, ...);
 
 #ifdef __cplusplus
 }

+ 20 - 20
sc_hypervisor/src/sc_config.c

@@ -102,7 +102,7 @@ struct sc_hypervisor_policy_config* sc_hypervisor_get_config(unsigned sched_ctx)
 	return hypervisor.sched_ctx_w[sched_ctx].config;
 }
 
-static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list varg_list, unsigned later)
+static struct sc_hypervisor_policy_config* _ctl(unsigned sched_ctx, va_list varg_list, unsigned later)
 {
 	struct sc_hypervisor_policy_config *config = NULL;
 
@@ -118,11 +118,11 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 	int *workerids;
 	int nworkers;
 
-	while ((arg_type = va_arg(varg_list, int)) != HYPERVISOR_NULL)
+	while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL)
 	{
 		switch(arg_type)
 		{
-		case HYPERVISOR_MAX_IDLE:
+		case SC_HYPERVISOR_MAX_IDLE:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 			double max_idle = va_arg(varg_list, double);
@@ -131,7 +131,7 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 
 			break;
 
-		case HYPERVISOR_EMPTY_CTX_MAX_IDLE:
+		case SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 			double empty_ctx_max_idle = va_arg(varg_list, double);
@@ -141,7 +141,7 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 
 			break;
 
-		case HYPERVISOR_MIN_WORKING:
+		case SC_HYPERVISOR_MIN_WORKING:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 			double min_working = va_arg(varg_list, double);
@@ -151,7 +151,7 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 
 			break;
 
-		case HYPERVISOR_PRIORITY:
+		case SC_HYPERVISOR_PRIORITY:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 			int priority = va_arg(varg_list, int);
@@ -160,19 +160,19 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 				config->priority[workerids[i]] = priority;
 			break;
 
-		case HYPERVISOR_MIN_WORKERS:
+		case SC_HYPERVISOR_MIN_WORKERS:
 			config->min_nworkers = va_arg(varg_list, unsigned);
 			break;
 
-		case HYPERVISOR_MAX_WORKERS:
+		case SC_HYPERVISOR_MAX_WORKERS:
 			config->max_nworkers = va_arg(varg_list, unsigned);
 			break;
 
-		case HYPERVISOR_GRANULARITY:
+		case SC_HYPERVISOR_GRANULARITY:
 			config->granularity = va_arg(varg_list, unsigned);
 			break;
 
-		case HYPERVISOR_FIXED_WORKERS:
+		case SC_HYPERVISOR_FIXED_WORKERS:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 
@@ -180,11 +180,11 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 				config->fixed_workers[workerids[i]] = 1;
 			break;
 
-		case HYPERVISOR_NEW_WORKERS_MAX_IDLE:
+		case SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE:
 			config->new_workers_max_idle = va_arg(varg_list, double);
 			break;
 
-		case HYPERVISOR_ISPEED_W_SAMPLE:
+		case SC_HYPERVISOR_ISPEED_W_SAMPLE:
 			workerids = va_arg(varg_list, int*);
 			nworkers = va_arg(varg_list, int);
 			double sample = va_arg(varg_list, double);
@@ -193,16 +193,16 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 				config->ispeed_w_sample[workerids[i]] = sample;
 			break;
 
-		case HYPERVISOR_ISPEED_CTX_SAMPLE:
+		case SC_HYPERVISOR_ISPEED_CTX_SAMPLE:
 			config->ispeed_ctx_sample = va_arg(varg_list, double);
 			break;
 
 /* not important for the strateg, needed just to jump these args in the iteration of the args */
-		case HYPERVISOR_TIME_TO_APPLY:
+		case SC_HYPERVISOR_TIME_TO_APPLY:
 			va_arg(varg_list, int);
 			break;
 
-		case HYPERVISOR_MIN_TASKS:
+		case SC_HYPERVISOR_MIN_TASKS:
 			va_arg(varg_list, int);
 			break;
 
@@ -215,7 +215,7 @@ static struct sc_hypervisor_policy_config* _ioctl(unsigned sched_ctx, va_list va
 }
 
 
-void sc_hypervisor_ioctl(unsigned sched_ctx, ...)
+void sc_hypervisor_ctl(unsigned sched_ctx, ...)
 {
 	va_list varg_list;
 	va_start(varg_list, sched_ctx);
@@ -224,16 +224,16 @@ void sc_hypervisor_ioctl(unsigned sched_ctx, ...)
 	int stop = 0;
 	int task_tag = -1;
 
-	while ((arg_type = va_arg(varg_list, int)) != HYPERVISOR_NULL)
+	while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL)
 	{
 		switch(arg_type)
 		{
-		case HYPERVISOR_TIME_TO_APPLY:
+		case SC_HYPERVISOR_TIME_TO_APPLY:
 			task_tag = va_arg(varg_list, int);
 			stop = 1;
 			break;
 
-		case HYPERVISOR_MIN_TASKS:
+		case SC_HYPERVISOR_MIN_TASKS:
 			hypervisor.min_tasks = va_arg(varg_list, int);
 			hypervisor.check_min_tasks[sched_ctx] = 1;
 			break;
@@ -246,7 +246,7 @@ void sc_hypervisor_ioctl(unsigned sched_ctx, ...)
 	va_start(varg_list, sched_ctx);
 
 	/* if config not null => save hypervisor configuration and consider it later */
-	struct sc_hypervisor_policy_config *config = _ioctl(sched_ctx, varg_list, (task_tag > 0));
+	struct sc_hypervisor_policy_config *config = _ctl(sched_ctx, varg_list, (task_tag > 0));
 	if(config != NULL)
 	{
 		struct configuration_entry *entry;

+ 2 - 0
src/common/fxt.h

@@ -135,6 +135,8 @@
 
 #define _STARPU_FUT_MEMORY_FULL			0x5152
 
+#define _STARPU_FUT_DATA_LOAD 0x5153
+
 #ifdef STARPU_USE_FXT
 #include <fxt/fxt.h>
 #include <fxt/fut.h>

+ 4 - 10
src/core/disk_ops/disk_stdio.c

@@ -56,16 +56,13 @@ starpu_stdio_alloc (void *base, size_t size)
 	int id = -1;
 
 	/* create template for mkstemp */
-	unsigned int sizeBase = 16;
-	while(sizeBase < (strlen(base)+7))
-		sizeBase *= 2;
-
-	char * baseCpy = malloc(sizeBase*sizeof(char));
+	char * baseCpy = malloc(strlen(base)+8);
 	STARPU_ASSERT(baseCpy != NULL);
 
 	char * tmp = "STARPU_XXXXXX";
 
 	strcpy(baseCpy, (char *) base);
+	strcat(baseCpy,"/");
 	strcat(baseCpy,tmp);
 
 #ifdef STARPU_HAVE_WINDOWS
@@ -144,13 +141,10 @@ starpu_stdio_open (void *base, void *pos, size_t size)
 	STARPU_ASSERT(obj != NULL);
 
 	/* create template */
-	unsigned int sizeBase = 16;
-	while(sizeBase < (strlen(base)+strlen(pos)+1))
-		sizeBase *= 2;
-	
-	char * baseCpy = malloc(sizeBase*sizeof(char));
+	char * baseCpy = malloc(strlen(base)+1+strlen(pos)+1);
 	STARPU_ASSERT(baseCpy != NULL);
 	strcpy(baseCpy,(char *) base);
+	strcat(baseCpy,(char *) "/");
 	strcat(baseCpy,(char *) pos);
 
 	int id = open(baseCpy, O_RDWR);

+ 6 - 5
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -119,12 +119,13 @@ starpu_unistd_global_open (struct starpu_unistd_global_obj * obj, void *base, vo
 {
 	/* create template */
 	unsigned int sizeBase = 16;
-	while(sizeBase < (strlen(base)+strlen(pos)+1))
+	while(sizeBase < (strlen(base)+1+strlen(pos)+1))
 		sizeBase *= 2;
 	
 	char * baseCpy = malloc(sizeBase*sizeof(char));
 	STARPU_ASSERT(baseCpy != NULL);
 	strcpy(baseCpy,(char *) base);
+	strcat(baseCpy,(char *) "/");
 	strcat(baseCpy,(char *) pos);
 
 	int id = open(baseCpy, obj->flags);
@@ -169,10 +170,10 @@ starpu_unistd_global_read (void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *
 	STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex);
 
 	int res = lseek(tmp->descriptor, offset, SEEK_SET); 
-	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd read failed");
+	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for read failed: offset %lu got errno %d", (unsigned long) offset, errno);
 
 	ssize_t nb = read(tmp->descriptor, buf, size);
-	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd read failed");
+	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd read failed: size %lu got errno %d", (unsigned long) size, errno);
 	
 	STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex);
 
@@ -220,10 +221,10 @@ starpu_unistd_global_write (void *base STARPU_ATTRIBUTE_UNUSED, void *obj, const
 	STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex);
 	
 	int res = lseek(tmp->descriptor, offset, SEEK_SET); 
-	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd write failed");
+	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for write failed: offset %lu got errno %d", (unsigned long) offset, errno);
 
 	ssize_t nb = write (tmp->descriptor, buf, size);
-	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd write failed");
+	STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd write failed: size %lu got errno %d", (unsigned long) size, errno);
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex);
 

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -416,7 +416,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 	/* Dump the history into the model file in case it is necessary */
 	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 	{
-		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\t\tdev (us)\t\tsum\t\tsum2\t\tn\n");
+		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\t\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
 		while (ptr)
 		{

+ 1 - 1
src/core/perfmodel/perfmodel_print.c

@@ -28,7 +28,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 	ptr = per_arch_model->list;
 
 	if (!parameter && ptr)
-		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\t\tstddev (us)\t\tn\n");
+		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\tstddev (us)\t\tn\n");
 
 	while (ptr)
 	{

+ 11 - 0
src/datawizard/coherency.c

@@ -681,6 +681,10 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 
 	int workerid = starpu_worker_get_id();
 
+#ifdef STARPU_USE_FXT
+	unsigned total_size = 0;
+#endif
+
 	unsigned index;
 	for (index = 0; index < nbuffers; index++)
 	{
@@ -701,8 +705,15 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 		ret = fetch_data(handle, local_replicate, mode);
 		if (STARPU_UNLIKELY(ret))
 			goto enomem;
+
+#ifdef STARPU_USE_FXT
+		total_size += _starpu_data_get_size(handle);
+#endif
 	}
 
+#ifdef STARPU_USE_FXT
+	FUT_DO_PROBE2(_STARPU_FUT_DATA_LOAD, workerid, total_size);
+#endif
 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
 	for (index = 0; index < nbuffers; index++)
 	{

+ 14 - 2
src/datawizard/malloc.c

@@ -370,7 +370,13 @@ starpu_malloc_on_node(unsigned dst_node, size_t size)
 	{
 		case STARPU_CPU_RAM:
 		{
-			starpu_malloc((void**) &addr, size);
+			starpu_malloc_flags((void**) &addr, size,
+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
+					0
+#else
+					STARPU_MALLOC_PINNED
+#endif
+					);
 			break;
 		}
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
@@ -462,7 +468,13 @@ starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 	switch(kind)
 	{
 		case STARPU_CPU_RAM:
-			starpu_free((void*)addr);
+			starpu_free_flags((void*)addr, size,
+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
+					0
+#else
+					STARPU_MALLOC_PINNED
+#endif
+					);
 			break;
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 		case STARPU_CUDA_RAM:

+ 103 - 0
src/debug/traces/starpu_fxt.c

@@ -1889,4 +1889,107 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 	options->nworkers = nworkers;
 }
+
+static FILE *out_data_total_trace_file;
+
+struct parse_task
+{
+	unsigned exec_time;
+	unsigned data_total;
+};
+
+static struct parse_task tasks[STARPU_NMAXWORKERS];
+
+#define NANO_SEC_TO_MILI_SEC 0.000001
+
+static void write_task(struct parse_task pt)
+{
+	double time = pt.exec_time * NANO_SEC_TO_MILI_SEC;
+	fprintf(out_data_total_trace_file, "%lf %d\n", time, pt.data_total);
+}
+
+
+void starpu_fxt_write_data_trace(char *filename_in)
+{
+	int fd_in;
+	fd_in = open(filename_in, O_RDONLY);
+	if (fd_in < 0)
+	{
+	        perror("open failed :");
+	        exit(-1);
+	}
+
+	static fxt_t fut;
+	fut = fxt_fdopen(fd_in);
+	if (!fut)
+	{
+	        perror("fxt_fdopen :");
+	        exit(-1);
+	}
+
+	fxt_blockev_t block;
+	block = fxt_blockev_enter(fut);
+
+	out_data_total_trace_file = fopen("data_total.txt", "w+");
+	if(!out_data_total_trace_file)
+        {
+                perror("open failed :");
+                exit(-1);
+        }
+
+	struct fxt_ev_64 ev;
+	while(1)
+	{
+		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
+		if (ret != FXT_EV_OK)
+		{
+			break;
+		}
+		
+		unsigned workerid;
+
+		switch (ev.code)
+		{
+		case _STARPU_FUT_WORKER_INIT_START:
+			register_worker_id(ev.param[4], ev.param[1]);
+			break;
+			
+		case _STARPU_FUT_START_CODELET_BODY:
+			workerid = find_worker_id(ev.param[2]);
+			tasks[workerid].exec_time = ev.time;
+			break;
+			
+		case _STARPU_FUT_END_CODELET_BODY:
+			workerid = find_worker_id(ev.param[4]);
+			tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time;
+			write_task(tasks[workerid]);
+			break;
+
+		case _STARPU_FUT_DATA_LOAD:
+			workerid = ev.param[0];
+			tasks[workerid].data_total = ev.param[1];
+			break;
+			
+		default:
+#ifdef STARPU_VERBOSE
+			fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
+				(unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time));
+#endif
+			break;
+		}
+	}
+	
+	if (close(fd_in))
+	{
+	        perror("close failed :");
+	        exit(-1);
+	}
+	
+	if(fclose(out_data_total_trace_file))
+	{
+		perror("close failed :");
+		exit(-1);
+	}
+
+}
 #endif // STARPU_USE_FXT

+ 1 - 1
tests/disk/disk_compute.c

@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 	int pid = getpid();
 	snprintf(pid_str, 16, "%d", pid);
 
-	char * base = "/tmp/";
+	char * base = "/tmp";
 
 	char * name_file_start = malloc(128*sizeof(char));
 	strcpy(name_file_start, "STARPU_DISK_COMPUTE_DATA_");

+ 1 - 1
tests/disk/disk_copy.c

@@ -41,7 +41,7 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto enodev;
 
 	/* register a disk */
-	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp/", 1024*1024*200);
+	int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp", 1024*1024*200);
 	/* can't write on /tmp/ */
 	if (new_dd == -ENOENT) goto enoent;
 	

+ 8 - 2
tools/Makefile.am

@@ -73,11 +73,13 @@ endif
 if STARPU_USE_FXT
 bin_PROGRAMS += 			\
 	starpu_fxt_tool			\
-	starpu_fxt_stats
+	starpu_fxt_stats		\
+	starpu_fxt_data_trace
 
 STARPU_TOOLS += 			\
 	starpu_fxt_tool			\
-	starpu_fxt_stats
+	starpu_fxt_stats		\
+	starpu_fxt_data_trace
 
 starpu_fxt_tool_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
 starpu_fxt_tool_LDADD = $(FXT_LIBS)
@@ -86,6 +88,10 @@ starpu_fxt_tool_LDFLAGS = $(FXT_LDFLAGS)
 starpu_fxt_stats_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
 starpu_fxt_stats_LDADD = $(FXT_LIBS)
 starpu_fxt_stats_LDFLAGS = $(FXT_LDFLAGS)
+
+starpu_fxt_data_trace_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
+starpu_fxt_data_trace_LDADD = $(FXT_LIBS)
+starpu_fxt_data_trace_LDFLAGS = $(FXT_LDFLAGS)
 endif
 
 bin_PROGRAMS += 			\

+ 44 - 0
tools/starpu_fxt_data_trace.c

@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <config.h>
+#include <starpu.h>
+
+#define PROGNAME "starpu_fxt_data_trace"
+
+static void usage(char *progname)
+{
+	fprintf(stderr, "Usage : %s <filename>\n", progname);
+	exit(77);
+}
+
+static void write_plt(){
+	FILE *plt = fopen("data_trace.gp", "w+");
+	if(!plt){
+		fprintf(stderr, "Error while creating data_trace.plt");
+		exit(-1);
+	}
+
+	fprintf(plt, "#!/usr/bin/gnuplot -persist\n\n");
+	fprintf(plt, "set term postscript eps enhanced color\n");
+	fprintf(plt, "set output \"data_trace.eps\"\n");
+	fprintf(plt, "set title \"Data trace\"\n");
+	fprintf(plt, "set logscale x\n");
+	fprintf(plt, "set logscale y\n");
+	fprintf(plt, "set xlabel \"tasks size (ms)\"\n");
+	fprintf(plt, "set ylabel \"data size (B)\"\n");
+	fprintf(plt, "plot \"data_total.txt\" using 1:2 with dots lw 1\n");
+	if(fclose(plt)){
+		perror("close failed :");
+		exit(-1);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	if(argc != 2)
+	{
+		usage(argv[0]);
+	}
+	starpu_fxt_write_data_trace(argv[1]);
+	write_plt();
+	return 0;
+}