Olivier Aumage 10 lat temu
rodzic
commit
a0bdd13f08
38 zmienionych plików z 601 dodań i 231 usunięć
  1. 3 1
      ChangeLog
  2. 5 3
      configure.ac
  3. 2 0
      doc/doxygen/Makefile.am
  4. 9 0
      doc/doxygen/chapters/01building.doxy
  5. 4 4
      doc/doxygen/chapters/02basic_examples.doxy
  6. 1 1
      doc/doxygen/chapters/07data_management.doxy
  7. 10 0
      doc/doxygen/chapters/13offline_performance_tools.doxy
  8. 5 5
      doc/doxygen/chapters/18mic_scc_support.doxy
  9. 5 1
      doc/doxygen/chapters/api/workers.doxy
  10. 1 1
      doc/doxygen/doxygen-config.cfg.in
  11. 1 49
      doc/doxygen/refman.tex
  12. 39 94
      examples/Makefile.am
  13. 0 1
      examples/callback/prologue.c
  14. 10 1
      examples/openmp/vector_scal_omp.c
  15. 2 2
      include/starpu_data_interfaces.h
  16. 3 0
      include/starpu_worker.h
  17. 1 1
      mpi/examples/Makefile.am
  18. 1 1
      mpi/examples/mpi_lu/plu_outofcore_example.c
  19. 3 2
      mpi/examples/stencil/stencil5.c
  20. 12 10
      src/core/jobs.c
  21. 5 5
      src/core/perfmodel/perfmodel_print.c
  22. 7 6
      src/core/task.c
  23. 10 0
      src/core/workers.c
  24. 4 4
      src/datawizard/interfaces/bcsr_interface.c
  25. 3 3
      src/datawizard/interfaces/block_interface.c
  26. 2 2
      src/datawizard/interfaces/coo_interface.c
  27. 4 4
      src/datawizard/interfaces/csr_interface.c
  28. 17 2
      src/datawizard/interfaces/data_interface.c
  29. 3 3
      src/datawizard/interfaces/matrix_interface.c
  30. 3 3
      src/datawizard/interfaces/variable_interface.c
  31. 3 3
      src/datawizard/interfaces/vector_interface.c
  32. 2 2
      src/datawizard/interfaces/void_interface.c
  33. 3 1
      src/datawizard/memalloc.c
  34. 2 1
      src/datawizard/user_interactions.c
  35. 1 1
      src/debug/traces/starpu_paje.c
  36. 22 14
      tests/loader.c
  37. 284 0
      tools/starpu_paje_summary.Rmd
  38. 109 0
      tools/starpu_paje_summary.in

+ 3 - 1
ChangeLog

@@ -18,7 +18,7 @@ StarPU 1.2.0 (svn revision xxxx)
 ==============================================
 
 New features:
-  * Xeon Phi support
+  * MIC Xeon Phi support
   * SCC support
   * New function starpu_sched_ctx_exec_parallel_code to execute a
     parallel code on the workers of the given scheduler context
@@ -73,6 +73,7 @@ Small features:
     manage the tag.
   * On Linux x86, spinlocks now block after a hundred tries. This avoids
     typical 10ms pauses when the application thread tries to submit tasks.
+  * New function char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define
@@ -93,6 +94,7 @@ The scheduling context release
 
 New features:
   * One can register an existing on-GPU buffer to be used by a handle.
+  * Add the starpu_paje_summary statistics tool.
 
 StarPU 1.1.2 (svn revision xxx)
 ==============================================

+ 5 - 3
configure.ac

@@ -54,8 +54,8 @@ AC_CANONICAL_SYSTEM
 dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
 dnl when they're available.
 m4_ifdef([AM_SILENT_RULES],
-  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests])],
-  [AM_INIT_AUTOMAKE([1.10 -Wall foreign])])
+  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests subdir-objects])],
+  [AM_INIT_AUTOMAKE([1.10 -Wall foreign subdir-objects])])
 
 m4_ifdef([AM_SILENT_RULES],
   [AM_SILENT_RULES(yes)])
@@ -92,7 +92,7 @@ AC_PATH_PROG(svnversioncommand, svnversion)
 
 # use svnversion to record the current repository revision only if
 # subversion is installed and we are in a working copy
-if test "$svnversioncommand" = "" || test `LC_ALL=C $svnversioncommand -n $srcdir` = "exported" ; then
+if test "$svnversioncommand" = "" || test "`LC_ALL=C $svnversioncommand -n $srcdir`" = "exported" ; then
    if test -f $srcdir/STARPU-REVISION ; then
       cp $srcdir/STARPU-REVISION .
    else
@@ -2477,6 +2477,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x tools/starpu_workers_activity
   chmod +x tools/starpu_paje_draw_histogram
   chmod +x tools/starpu_paje_state_stats
+  chmod +x tools/starpu_paje_summary
   chmod +x doc/doxygen/doxygen_filter.sh
 ])
 
@@ -2506,6 +2507,7 @@ AC_OUTPUT([
 	tools/starpu_workers_activity
 	tools/starpu_paje_draw_histogram
 	tools/starpu_paje_state_stats
+	tools/starpu_paje_summary
 	socl/Makefile
 	socl/src/Makefile
 	socl/examples/Makefile

+ 2 - 0
doc/doxygen/Makefile.am

@@ -237,6 +237,8 @@ $(DOX_TAG): $(dox_inputs)
 	@if test -f html/navtree.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' html/navtree.js ; fi
 	@$(SED) -i 's/.*"Files.html".*//' html/pages.html
 	@if test -f latex/main.tex ; then mv latex/main.tex latex/index.tex ; fi
+	@$(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex
+	@cat $(top_srcdir)/doc/doxygen/refman.tex >> $(DOX_LATEX_DIR)/refman.tex
 
 dist_pdf_DATA = $(DOX_PDF)
 

+ 9 - 0
doc/doxygen/chapters/01building.doxy

@@ -108,6 +108,15 @@ $ cd build
 $ ../configure
 \endverbatim
 
+By default, StarPU will be installed in <c>/usr/local/bin</c>,
+<c>/usr/local/lib</c>, etc. You can specify an installation prefix
+other than <c>/usr/local</c> using the option <c>--prefix</c>, for
+instance:
+
+\verbatim
+$ ../configure --prefix=$HOME/starpu
+\endverbatim
+
 \subsection BuildingStarPU Building StarPU
 
 \verbatim

+ 4 - 4
doc/doxygen/chapters/02basic_examples.doxy

@@ -156,7 +156,7 @@ int main(int argc, char **argv)
 
 \verbatim
 $ make hello_world
-cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
+cc $(pkg-config --cflags starpu-1.2) hello_world.c -o hello_world $(pkg-config --libs starpu-1.2)
 $ ./hello_world
 Hello world
 \endverbatim
@@ -234,7 +234,7 @@ int main(int argc, char **argv)
 
 \verbatim
 $ make hello_world
-cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
+cc $(pkg-config --cflags starpu-1.2) hello_world.c -o hello_world $(pkg-config --libs starpu-1.2)
 $ ./hello_world
 Hello world (params = {1, 2.000000} )
 \endverbatim
@@ -285,7 +285,7 @@ int main(int argc, char **argv)
 
 \verbatim
 $ make hello_world
-cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
+cc $(pkg-config --cflags starpu-1.2) hello_world.c -o hello_world $(pkg-config --libs starpu-1.2) 
 $ ./hello_world
 Hello world
 Callback function (arg 42)
@@ -606,7 +606,7 @@ pointer.
 
 \verbatim
 $ make vector_scal
-cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2)  vector_scal.c   -o vector_scal
+cc $(pkg-config --cflags starpu-1.2) vector_scal.c -o vector_scal $(pkg-config --libs starpu-1.2)
 $ ./vector_scal
 0.000000 3.000000 6.000000 9.000000 12.000000
 \endverbatim

+ 1 - 1
doc/doxygen/chapters/07data_management.doxy

@@ -120,7 +120,7 @@ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector,
                             NX, sizeof(vector[0]));
 
 /* Partition the vector in PARTS sub-vectors */
-starpu_data_filter f =
+struct starpu_data_filter f =
 {
     .filter_func = starpu_vector_filter_block,
     .nchildren = PARTS

+ 10 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -337,6 +337,7 @@ $ R
 > install.packages("plyr")
 > install.packages("ggplot2")
 > install.packages("data.table")
+> install.packages("knitr")
 \endverbatim
 
 The pj_dump tool from pajeng is also needed (see
@@ -364,6 +365,15 @@ and see the resulting pdf file:
 \image html paje_draw_histogram.png
 \image latex paje_draw_histogram.eps "" width=\textwidth
 
+A quick statistical report can be generated by using:
+
+\verbatim
+$ starpu_paje_summary native.trace simgrid.trace
+\endverbatim
+
+it includes gantt charts, execution summaries, as well as state duration charts
+and time distribution histograms.
+
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 
 StarPU can record a trace of what tasks are needed to complete the

+ 5 - 5
doc/doxygen/chapters/18mic_scc_support.doxy

@@ -6,13 +6,13 @@
  * See the file version.doxy for copying conditions.
  */
 
-/*! \page MICSCCSupport MIC/SCC Support
+/*! \page MICSCCSupport MIC Xeon Phi / SCC Support
 
 \section Compilation Compilation
 
 SCC support just needs the presence of the RCCE library.
 
-MIC support actually needs two compilations of StarPU, one for the host and one for
+MIC Xeon Phi support actually needs two compilations of StarPU, one for the host and one for
 the device. The PATH environment variable has to include the path to the
 cross-compilation toolchain, for instance <c>/usr/linux-k1om-4.7/bin</c>
 The script <c>mic-configure</c> can then be used to achieve the two compilations: it basically
@@ -20,12 +20,12 @@ calls <c>configure</c> as appropriate from two new directories: <c>build_mic</c>
 <c>build_host</c>. <c>make</c> and <c>make install</c> can then be used as usual and will
 recurse into both directories.
 
-\section PortingApplicationsToMICSCC Porting Applications To MIC/SCC
+\section PortingApplicationsToMICSCC Porting Applications To MIC Xeon Phi / SCC
 
-The simplest way to port an application to MIC/SCC is to set the field
+The simplest way to port an application to MIC Xeon Phi or SCC is to set the field
 starpu_codelet::cpu_funcs_name, to provide StarPU with the function
 name of the CPU implementation. StarPU will thus simply use the
-existing CPU implementation (cross-rebuilt in the MIC case). The
+existing CPU implementation (cross-rebuilt in the MIC Xeon Phi case). The
 functions have to be globally-visible (i.e. not <c>static</c>) for
 StarPU to be able to look them up.
 

+ 5 - 1
doc/doxygen/chapters/api/workers.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  * See the file version.doxy for copying conditions.
  */
@@ -222,4 +222,8 @@ Returns the type of the given node as defined by
 this function should be used in the allocation function to determine
 on which device the memory needs to be allocated.
 
+\fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
+\ingroup API_Workers_Properties
+Returns the given worker type as a string.
+
 */

+ 1 - 1
doc/doxygen/doxygen-config.cfg.in

@@ -66,6 +66,6 @@ EXAMPLE_PATH           = @top_srcdir@/doc/doxygen \
 
 INPUT_FILTER           = @top_builddir@/doc/doxygen/doxygen_filter.sh
 
-LATEX_HEADER           = @top_srcdir@/doc/doxygen/refman.tex
+#LATEX_HEADER           = @top_srcdir@/doc/doxygen/refman.tex
 
 IMAGE_PATH             = @top_srcdir@/doc/doxygen/chapters

+ 1 - 49
doc/doxygen/refman.tex

@@ -1,52 +1,4 @@
-\documentclass{book}
-\usepackage[a4paper,top=2.5cm,bottom=2.5cm,left=2.5cm,right=2.5cm]{geometry}
-\usepackage{makeidx}
-\usepackage{natbib}
-\usepackage{graphicx}
-\usepackage{multicol}
-\usepackage{float}
-\usepackage{listings}
-\usepackage{color}
-\usepackage{ifthen}
-\usepackage[table]{xcolor}
-\usepackage{textcomp}
-\usepackage{alltt}
-\usepackage{ifpdf}
-\usepackage{./version}
-\ifpdf
-\usepackage[pdftex,
-            pagebackref=true,
-            colorlinks=true,
-            linkcolor=blue,
-            unicode
-           ]{hyperref}
-\else
-\usepackage[ps2pdf,
-            pagebackref=true,
-            colorlinks=true,
-            linkcolor=blue,
-            unicode
-           ]{hyperref}
-\usepackage{pspicture}
-\fi
-\usepackage[utf8]{inputenc}
-\usepackage{mathptmx}
-\usepackage[scaled=.90]{helvet}
-\usepackage{courier}
-\usepackage{sectsty}
-\usepackage{amssymb}
-\usepackage[titles]{tocloft}
-\usepackage{doxygen}
-\lstset{language=C++,inputencoding=utf8,basicstyle=\footnotesize,breaklines=true,breakatwhitespace=true,tabsize=8,numbers=left }
-\makeindex
-\setcounter{tocdepth}{3}
-\renewcommand{\familydefault}{\sfdefault}
-\hfuzz=15pt
-\setlength{\emergencystretch}{15pt}
-\hbadness=750
-\tolerance=750
-\begin{document}
-\hypersetup{pageanchor=false,citecolor=blue}
+\input{./version.sty}
 \begin{titlepage}
 \vspace*{4cm}
 {\Huge \textbf{StarPU Handbook}}\\

+ 39 - 94
examples/Makefile.am

@@ -123,22 +123,22 @@ noinst_HEADERS = 				\
 # What to install and what to check #
 #####################################
 
-STARPU_EXAMPLES	=
-TESTS		=	$(STARPU_EXAMPLES)
-TESTS		+=	scheduler/schedulers.sh
+examplebin_PROGRAMS 	+=	$(STARPU_EXAMPLES)
 
+TESTS			=	$(STARPU_EXAMPLES)
+TESTS			+=	scheduler/schedulers.sh
 
 if STARPU_HAVE_WINDOWS
-check_PROGRAMS	=	$(STARPU_EXAMPLES)
+check_PROGRAMS		=	$(STARPU_EXAMPLES)
 else
-check_PROGRAMS	=	$(LOADER) $(STARPU_EXAMPLES)
+check_PROGRAMS		=	$(LOADER) $(STARPU_EXAMPLES)
 endif
 
 if !STARPU_HAVE_WINDOWS
 ## test loader program
 if !STARPU_CROSS_COMPILING
 LOADER			=	loader
-loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+loader_CPPFLAGS 	=	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
 LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
 loader_SOURCES		=	../tests/loader.c
 else
@@ -155,22 +155,21 @@ endif
 
 endif
 
-examplebin_PROGRAMS +=				\
+# STARPU_EXAMPLES list all applications which have to be compiled and checked
+# Applications which should only be compiled are added directly in examplebin_PROGRAMS
+# see for instance mandelbrot/mandelbrot
+
+STARPU_EXAMPLES =				\
 	basic_examples/hello_world		\
 	basic_examples/vector_scal		\
 	basic_examples/mult			\
 	basic_examples/block			\
 	basic_examples/variable			\
 	basic_examples/multiformat              \
-	basic_examples/dynamic_handles		\
 	cpp/incrementer_cpp			\
-	filters/custom_mf/custom_mf_filter      \
 	filters/fvector				\
 	filters/fblock				\
 	filters/fmatrix				\
-	filters/shadow				\
-	filters/shadow2d			\
-	filters/shadow3d			\
 	tag_example/tag_example			\
 	tag_example/tag_example2		\
 	tag_example/tag_example3		\
@@ -185,33 +184,28 @@ examplebin_PROGRAMS +=				\
 	interface/complex			\
 	matvecmult/matvecmult			\
 	profiling/profiling			\
+	scheduler/dummy_sched			\
 	sched_ctx/sched_ctx			\
-	sched_ctx/parallel_code			\
-	sched_ctx/dummy_sched_with_ctx		\
 	sched_ctx/prio				\
-	sched_ctx/sched_ctx_without_sched_policy\
-	sched_ctx/nested_sched_ctxs		\
+	sched_ctx/dummy_sched_with_ctx		\
 	worker_collections/worker_tree_example  \
 	worker_collections/worker_list_example  \
 	reductions/dot_product			\
-	reductions/minmax_reduction		\
-	mandelbrot/mandelbrot			\
-	ppm_downscaler/ppm_downscaler		\
-	ppm_downscaler/yuv_downscaler
+	reductions/minmax_reduction
 
 if !STARPU_SIMGRID
-examplebin_PROGRAMS +=				\
+STARPU_EXAMPLES +=				\
 	scheduler/dummy_sched
 
 if STARPU_HAVE_F77_H
-examplebin_PROGRAMS +=				\
+STARPU_EXAMPLES +=				\
 	basic_examples/vector_scal_fortran	\
 	fortran/hello
 endif
 endif
 
 if !NO_BLAS_LIB
-examplebin_PROGRAMS +=				\
+STARPU_EXAMPLES +=				\
 	axpy/axpy				\
 	mult/sgemm 				\
 	mult/dgemm				\
@@ -229,7 +223,7 @@ examplebin_PROGRAMS +=				\
 endif
 
 if MKL_BLAS_LIB
-examplebin_PROGRAMS +=				\
+STARPU_EXAMPLES +=				\
 	lu/lu_example_complex_float		\
 	lu/lu_example_complex_double		\
 	lu/lu_implicit_example_complex_float	\
@@ -237,46 +231,10 @@ examplebin_PROGRAMS +=				\
 endif
 
 if ATLAS_BLAS_LIB
-examplebin_PROGRAMS +=				\
+STARPU_EXAMPLES +=				\
 	spmv/dw_block_spmv
 endif
 
-STARPU_EXAMPLES +=				\
-	basic_examples/hello_world		\
-	basic_examples/vector_scal		\
-	basic_examples/mult			\
-	basic_examples/block			\
-	basic_examples/variable			\
-	basic_examples/multiformat              \
-	cpp/incrementer_cpp			\
-	filters/fvector				\
-	filters/fblock				\
-	filters/fmatrix				\
-	tag_example/tag_example			\
-	tag_example/tag_example2		\
-	tag_example/tag_example3		\
-	tag_example/tag_example4		\
-	tag_example/tag_restartable		\
-	spmd/vector_scal_spmd			\
-	spmv/spmv				\
-	callback/callback			\
-	callback/prologue			\
-	incrementer/incrementer			\
-	binary/binary				\
-	interface/complex			\
-	matvecmult/matvecmult			\
-	profiling/profiling			\
-	scheduler/dummy_sched			\
-	sched_ctx/sched_ctx			\
-	sched_ctx/prio				\
-	sched_ctx/dummy_sched_with_ctx		\
-	sched_ctx/sched_ctx_without_sched_policy\
-	sched_ctx/nested_sched_ctxs		\
-	worker_collections/worker_tree_example  \
-	worker_collections/worker_list_example  \
-	reductions/dot_product			\
-	reductions/minmax_reduction
-
 if STARPU_LONG_CHECK
 STARPU_EXAMPLES +=				\
 	sched_ctx/parallel_code
@@ -288,35 +246,17 @@ STARPU_EXAMPLES +=				\
 	fortran/hello
 endif
 
-if !NO_BLAS_LIB
+if !STARPU_HAVE_WINDOWS
 STARPU_EXAMPLES +=				\
-	axpy/axpy				\
-	mult/sgemm 				\
-	mult/dgemm				\
-	cholesky/cholesky_tag			\
-	cholesky/cholesky_tile_tag		\
-	cholesky/cholesky_grain_tag		\
-	cholesky/cholesky_implicit		\
-	lu/lu_example_float			\
-	lu/lu_example_double			\
-	lu/lu_implicit_example_float		\
-	lu/lu_implicit_example_double		\
-	heat/heat				\
-	cg/cg					\
-	pipeline/pipeline
-endif
+	openmp/vector_scal_omp			\
+	sched_ctx/sched_ctx_without_sched_policy\
+	sched_ctx/nested_sched_ctxs		\
+	sched_ctx/sched_ctx_without_sched_policy
 
-if MKL_BLAS_LIB
+if STARPU_LONG_CHECK
 STARPU_EXAMPLES +=				\
-	lu/lu_example_complex_float		\
-	lu/lu_example_complex_double		\
-	lu/lu_implicit_example_complex_float	\
-	lu/lu_implicit_example_complex_double
+	sched_ctx/parallel_code
 endif
-
-if ATLAS_BLAS_LIB
-STARPU_EXAMPLES +=				\
-	spmv/dw_block_spmv
 endif
 
 ##################
@@ -428,7 +368,6 @@ nobase_STARPU_OPENCL_DATA_DATA += 		\
 	basic_examples/variable_kernels_opencl_kernel.cl
 endif
 
-
 ###########
 # Filters #
 ###########
@@ -448,10 +387,16 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 	filters/fblock_opencl_kernel.cl
 endif
 
-
 #############################
 # Custom multiformat filter #
 #############################
+
+#TODO: see why the application is failing
+#lt-custom_mf_filter: .../src/datawizard/malloc.c:784: starpu_free_on_node: Assertion `chunk != _starpu_chunk_list_end(chunks[dst_node])' failed.
+
+examplebin_PROGRAMS +=				\
+	filters/custom_mf/custom_mf_filter
+
 filters_custom_mf_custom_mf_filter_SOURCES=\
 	filters/custom_mf/custom_mf_filter.c \
 	filters/custom_mf/custom_interface.c   \
@@ -826,6 +771,9 @@ endif
 # Mandelbrot Set #
 ##################
 
+examplebin_PROGRAMS +=				\
+	mandelbrot/mandelbrot
+
 mandelbrot_mandelbrot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS)
 if HAVE_X11
 mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS)
@@ -846,9 +794,7 @@ top_hello_world_top_SOURCES =			\
 # Pi #
 ######
 
-check_PROGRAMS +=				\
-	pi/pi					\
-	pi/pi_redux
+if !STARPU_HAVE_WINDOWS
 
 examplebin_PROGRAMS +=				\
 	pi/pi					\
@@ -874,6 +820,7 @@ pi_pi_redux_SOURCES +=				\
 pi_pi_redux_LDADD =				\
 	$(STARPU_CURAND_LDFLAGS)
 endif
+endif
 
 ###########################
 # OpenGL interoperability #
@@ -915,9 +862,6 @@ endif
 ##################
 
 if !STARPU_HAVE_WINDOWS
-examplebin_PROGRAMS +=		\
-	openmp/vector_scal_omp
-
 openmp_vector_scal_omp_CFLAGS = \
 	$(AM_CFLAGS) -fopenmp
 
@@ -937,3 +881,4 @@ showcheck:
 	for i in $(SUBDIRS) ; do \
 		make -C $$i showcheck ; \
 	done
+

+ 0 - 1
examples/callback/prologue.c

@@ -17,7 +17,6 @@
 
 #include <starpu.h>
 #include <sys/time.h>
-#include <omp.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 

+ 10 - 1
examples/openmp/vector_scal_omp.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2010-2013  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -93,6 +93,7 @@ int main(int argc, char **argv)
 	conf.sched_policy_name = "pheft";
 
 	ret = starpu_init(&conf);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_data_handle_t vector_handle;
@@ -111,6 +112,7 @@ int main(int argc, char **argv)
 		task->cl_arg_size = sizeof(factor);
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
@@ -122,5 +124,12 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
 	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
 
+	free(vector);
 	return 0;
+
+enodev:
+	starpu_data_unregister(vector_handle);
+	free(vector);
+	starpu_shutdown();
+	return 77;
 }

+ 2 - 2
include/starpu_data_interfaces.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2014  Université de Bordeaux 1
- * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011-2012  Institut National de Recherche en Informatique et Automatique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -111,7 +111,7 @@ struct starpu_data_interface_ops
 	uint32_t 	 (*footprint)			(starpu_data_handle_t handle);
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
 	void 		 (*display)			(starpu_data_handle_t handle, FILE *f);
-	ssize_t		 (*describe)			(void *interface, char *buf, size_t size);
+	ssize_t		 (*describe)			(void *data_interface, char *buf, size_t size);
 	enum starpu_data_interface_id interfaceid;
 	size_t interface_size;
 

+ 3 - 0
include/starpu_worker.h

@@ -116,6 +116,9 @@ struct starpu_tree* starpu_workers_get_tree(void);
 unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
 
 unsigned starpu_worker_is_slave(int workerid);
+
+char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
+
 #ifdef __cplusplus
 }
 #endif

+ 1 - 1
mpi/examples/Makefile.am

@@ -152,7 +152,7 @@ mpi_lu_plu_implicit_example_double_LDADD =	\
 	$(STARPU_BLAS_LDFLAGS) -lm
 
 mpi_lu_plu_implicit_example_double_SOURCES =	\
-	mpi_lu/plu_outofcore_example_double.c	\
+	mpi_lu/plu_implicit_example_double.c	\
 	mpi_lu/plu_solve_double.c		\
 	mpi_lu/pdlu_kernels.c			\
 	mpi_lu/pdlu_implicit.c			\

+ 1 - 1
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -38,7 +38,7 @@ static unsigned check = 0;
 static int p = 1;
 static int q = 1;
 static unsigned display = 0;
-static char *path = "/tmp/starpu-mpi_LU";
+static char *path = "./starpu-ooc-files";
 
 #ifdef STARPU_HAVE_LIBNUMA
 static unsigned numa = 0;

+ 3 - 2
mpi/examples/stencil/stencil5.c

@@ -165,11 +165,12 @@ int main(int argc, char **argv)
 		{
 			int mpi_rank = my_distrib2(x, y, size);
 			if (!data_handles[x][y] && (mpi_rank == my_rank
-				 || my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
-				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)))
+				 || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size)
+				 || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size)))
 			{
 				/* Register newly-needed data */
 				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
+				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
 			}
 			if (data_handles[x][y] && mpi_rank != starpu_data_get_rank(data_handles[x][y]))
 			{

+ 12 - 10
src/core/jobs.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2014  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011, 2014  INRIA
  *
@@ -215,22 +215,24 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
 #ifdef STARPU_USE_SC_HYPERVISOR
-	int workerid = starpu_worker_get_id();
-	int i;
 	size_t data_size = 0;
-	for(i = 0; i < STARPU_NMAXBUFS; i++)
-	{
-		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
-		if (handle != NULL)
-			data_size += _starpu_data_get_size(handle);
-	}
+	int workerid = starpu_worker_get_id();
 #endif //STARPU_USE_SC_HYPERVISOR
 
 	/* We release handle reference count */
 	if (task->cl && !continuation)
 	{
 		unsigned i;
-		for (i=0; i<task->cl->nbuffers; i++)
+#ifdef STARPU_USE_SC_HYPERVISOR
+		for(i = 0; i < task->cl->nbuffers; i++)
+		{
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+			if (handle != NULL)
+				data_size += _starpu_data_get_size(handle);
+		}
+#endif //STARPU_USE_SC_HYPERVISOR
+
+		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);

+ 5 - 5
src/core/perfmodel/perfmodel_print.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011, 2013  Université de Bordeaux 1
- * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -194,7 +194,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 	{
 		if (strcmp(arch, "cpu") == 0)
 		{
-			unsigned implid;
+			int implid;
 			struct starpu_perfmodel_arch perf_arch;
 			perf_arch.type = STARPU_CPU_WORKER;
 			perf_arch.devid = 0;
@@ -214,7 +214,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 				exit(-1);
 			}
 
-			unsigned implid;
+			int implid;
 			struct starpu_perfmodel_arch perf_arch;
 			perf_arch.type = STARPU_CPU_WORKER;
 			perf_arch.devid = 0;
@@ -227,7 +227,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 		if (strcmp(arch, "cuda") == 0)
 		{
 			unsigned devid;
-			unsigned implid;
+			int implid;
 			struct starpu_perfmodel_arch perf_arch;
 			perf_arch.type = STARPU_CUDA_WORKER;
 			perf_arch.ncore = 0;
@@ -251,7 +251,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			perf_arch.type = STARPU_CUDA_WORKER;
 			perf_arch.devid = gpuid;
 			perf_arch.ncore = 0;
-			unsigned implid;
+			int implid;
 			for (implid = 0; implid < STARPU_MAXIMPLEMENTATIONS; implid++)
 				starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
 			return 0;

+ 7 - 6
src/core/task.c

@@ -293,12 +293,13 @@ int _starpu_submit_job(struct _starpu_job *j)
 		_starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j);
 		int i;
 		size_t data_size = 0;
-		for(i = 0; i < STARPU_NMAXBUFS; i++)
-		{
-			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
-			if (handle != NULL)
-				data_size += _starpu_data_get_size(handle);
-		}
+		if (j->task->cl)
+			for(i = 0; i < j->task->cl->nbuffers; i++)
+			{
+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+				if (handle != NULL)
+					data_size += _starpu_data_get_size(handle);
+			}
 
 		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);

+ 10 - 0
src/core/workers.c

@@ -1987,3 +1987,13 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 	return nsched_ctxs;
 }
 
+char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
+{
+	if (type == STARPU_CPU_WORKER) return "STARPU_CPU_WORKER";
+	if (type == STARPU_CUDA_WORKER) return "STARPU_CUDA_WORKER";
+	if (type == STARPU_OPENCL_WORKER) return "STARPU_OPENCL_WORKER";
+	if (type == STARPU_MIC_WORKER) return "STARPU_MIC_WORKER";
+	if (type == STARPU_SCC_WORKER) return "STARPU_SCC_WORKER";
+	if (type == STARPU_ANY_WORKER) return "STARPU_ANY_WORKER";
+	return "STARPU_unknown_WORKER";
+}

+ 4 - 4
src/datawizard/interfaces/bcsr_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2014  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,7 +44,7 @@ static void free_bcsr_buffer_on_node(void *data_interface, unsigned node);
 static size_t bcsr_interface_get_size(starpu_data_handle_t handle);
 static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 
 struct starpu_data_interface_ops starpu_interface_bcsr_ops =
@@ -327,9 +327,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	return ret;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) interface;
+	struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) data_interface;
 	return snprintf(buf, size, "b%ux%ux%ux%ux%u",
 			(unsigned) bcsr->nnz,
 			(unsigned) bcsr->nrow,

+ 3 - 3
src/datawizard/interfaces/block_interface.c

@@ -77,7 +77,7 @@ static int block_compare(void *data_interface_a, void *data_interface_b);
 static void display_block_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_block_ops =
 {
@@ -731,9 +731,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	return ret;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_block_interface *block = (struct starpu_block_interface *) interface;
+	struct starpu_block_interface *block = (struct starpu_block_interface *) data_interface;
 	return snprintf(buf, size, "B%ux%ux%ux%u",
 			(unsigned) block->nx,
 			(unsigned) block->ny,

+ 2 - 2
src/datawizard/interfaces/coo_interface.c

@@ -190,9 +190,9 @@ display_coo_interface(starpu_data_handle_t handle, FILE *f)
 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_coo_interface *coo = (struct starpu_coo_interface *) interface;
+	struct starpu_coo_interface *coo = (struct starpu_coo_interface *) data_interface;
 	return snprintf(buf, size, "M%ux%ux%ux%u",
 			(unsigned) coo->nx,
 			(unsigned) coo->ny,

+ 4 - 4
src/datawizard/interfaces/csr_interface.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,7 +43,7 @@ static void free_csr_buffer_on_node(void *data_interface, unsigned node);
 static size_t csr_interface_get_size(starpu_data_handle_t handle);
 static int csr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_csr_ops =
 {
@@ -293,9 +293,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	return ret;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_csr_interface *csr = (struct starpu_csr_interface *) interface;
+	struct starpu_csr_interface *csr = (struct starpu_csr_interface *) data_interface;
 	return snprintf(buf, size, "C%ux%ux%u",
 			(unsigned) csr->nnz,
 			(unsigned) csr->nrow,

+ 17 - 2
src/datawizard/interfaces/data_interface.c

@@ -66,6 +66,11 @@ void _starpu_data_interface_shutdown()
 {
 	struct handle_entry *entry, *tmp;
 
+	if (registered_handles)
+	{
+		_STARPU_DISP("[warning] The application has not unregistered all data handles.\n");
+	}
+
 	_starpu_spin_destroy(&registered_handles_lock);
 
 	HASH_ITER(hh, registered_handles, entry, tmp)
@@ -503,7 +508,12 @@ int _starpu_data_set_rank(starpu_data_handle_t handle, int rank)
 
 int starpu_data_set_rank(starpu_data_handle_t handle, int rank)
 {
-	_STARPU_DISP("Warning: You should call starpu_mpi_data_register which will insure MPI cache will be cleared when unregistering the data\n");
+	static int first=1;
+	if (first)
+	{
+		_STARPU_DISP("Warning: You should call starpu_mpi_data_register which will insure MPI cache will be cleared when unregistering the data\n");
+		first=0;
+	}
 	return _starpu_data_set_rank(handle, rank);
 }
 
@@ -552,7 +562,12 @@ int _starpu_data_set_tag(starpu_data_handle_t handle, int tag)
 
 int starpu_data_set_tag(starpu_data_handle_t handle, int tag)
 {
-	_STARPU_DISP("Warning: You should call starpu_mpi_data_register which will insure MPI cache will be cleared when unregistering the data\n");
+	static int first=1;
+	if (first)
+	{
+		_STARPU_DISP("Warning: You should call starpu_mpi_data_register which will insure MPI cache will be cleared when unregistering the data\n");
+		first=0;
+	}
 	return _starpu_data_set_tag(handle, tag);
 }
 

+ 3 - 3
src/datawizard/interfaces/matrix_interface.c

@@ -90,7 +90,7 @@ static int matrix_compare(void *data_interface_a, void *data_interface_b);
 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_matrix_ops =
 {
@@ -679,9 +679,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	return ret;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) interface;
+	struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) data_interface;
 	return snprintf(buf, size, "M%ux%ux%u",
 			(unsigned) matrix->nx,
 			(unsigned) matrix->ny,

+ 3 - 3
src/datawizard/interfaces/variable_interface.c

@@ -44,7 +44,7 @@ static int variable_compare(void *data_interface_a, void *data_interface_b);
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_variable_ops =
 {
@@ -247,9 +247,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_variable_interface *variable = (struct starpu_variable_interface *) interface;
+	struct starpu_variable_interface *variable = (struct starpu_variable_interface *) data_interface;
 	return snprintf(buf, size, "v%u",
 			(unsigned) variable->elemsize);
 }

+ 3 - 3
src/datawizard/interfaces/vector_interface.c

@@ -44,7 +44,7 @@ static int vector_compare(void *data_interface_a, void *data_interface_b);
 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_vector_ops =
 {
@@ -285,9 +285,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 	return ret;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
-	struct starpu_vector_interface *vector = (struct starpu_vector_interface *) interface;
+	struct starpu_vector_interface *vector = (struct starpu_vector_interface *) data_interface;
 	return snprintf(buf, size, "V%ux%u",
 			(unsigned) vector->nx,
 			(unsigned) vector->elemsize);

+ 2 - 2
src/datawizard/interfaces/void_interface.c

@@ -41,7 +41,7 @@ static int void_compare(void *data_interface_a, void *data_interface_b);
 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_void_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
-static ssize_t describe(void *interface, char *buf, size_t size);
+static ssize_t describe(void *data_interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_void_ops =
 {
@@ -140,7 +140,7 @@ static int dummy_copy(void *src_interface STARPU_ATTRIBUTE_UNUSED,
 	return 0;
 }
 
-static ssize_t describe(void *interface, char *buf, size_t size)
+static ssize_t describe(void *data_interface, char *buf, size_t size)
 {
 	return snprintf(buf, size, "0");
 }

+ 3 - 1
src/datawizard/memalloc.c

@@ -983,8 +983,10 @@ int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_
 	replicate->allocated = 1;
 	replicate->automatically_allocated = 1;
 
-	if (dst_node == STARPU_MAIN_RAM)
+	if (replicate->relaxed_coherency == 0 && dst_node == STARPU_MAIN_RAM)
 	{
+		/* We are allocating the buffer in main memory, also register it
+		 * for the gcc plugin.  */
 		void *ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
 		if (ptr != NULL)
 		{

+ 2 - 1
src/datawizard/user_interactions.c

@@ -322,7 +322,8 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 	 * We enqueue the "post" sync task in the list associated to the handle
 	 * so that it is submitted by the starpu_data_release
 	 * function. */
-	_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);
+	if (sequential_consistency)
+		_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);
 
         _STARPU_LOG_OUT();
 	return 0;

+ 1 - 1
src/debug/traces/starpu_paje.c

@@ -284,7 +284,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 6       Sl       Ctx%u      Sleeping         \".9 .1 .0\"		\n\
 6       P       Ctx%u       Progressing         \".4 .1 .6\"		\n\
 6       U       Ctx%u       Unpartitioning         \".0 .0 1.0\"		\n",
-		i, i, i, i, i, i, i, i, i, i);
+		i, i, i, i, i, i, i, i, i, i, i);
 	fprintf(file, "\
 6       A       MS      Allocating         \".4 .1 .0\"		\n\
 6       Ar       MS      AllocatingReuse       \".1 .1 .8\"		\n\

+ 22 - 14
tests/loader.c

@@ -129,28 +129,36 @@ static void test_cleaner(int sig)
 	exit(EXIT_FAILURE);
 }
 
+static int _decode(char **src, char *motif, const char *value)
+{
+	char *found;
+
+	found = strstr(*src, motif);
+	if (found == NULL) return 0;
+
+	char *new_src = malloc((strlen(*src)+strlen(value))*sizeof(char));
+	strcpy(new_src, "");
+
+	strncat(new_src, *src, strlen(*src)-strlen(found));
+	strcat(new_src, value);
+	strcat(new_src, found+strlen(motif));
+
+	*src = strdup(new_src);
+	return 1;
+}
+
 static void decode(char **src, char *motif, const char *value)
 {
 	if (*src)
 	{
-		char *y = strstr(*src, motif);
-		if (y && value == NULL)
+		if (strstr(*src, motif) && value == NULL)
 		{
 			fprintf(stderr, "error: $%s undefined\n", motif);
 			exit(EXIT_FAILURE);
 		}
-		while (y)
-		{
-			char *neo = malloc((strlen(*src)-strlen(motif)+strlen(value)) * sizeof(char));
-			char *to = neo;
-
-			to = strncpy(to, *src, strlen(*src)-strlen(y)); to += strlen(*src)-strlen(y);
-			to = strcpy(to, value); to += strlen(value);
-			strcpy(to, y+strlen(motif));
-
-			*src = strdup(neo);
-			y = strstr(*src, motif);
-		}
+		int d = _decode(src, motif, value);
+		while (d)
+			d = _decode(src, motif, value);
 	}
 }
 

+ 284 - 0
tools/starpu_paje_summary.Rmd

@@ -0,0 +1,284 @@
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#sec-1">1. Introduction</a>
+<ul>
+<li>
+<ul>
+<li><a href="#sec-1-0-1">1.0.1. How to compile</a></li>
+<li><a href="#sec-1-0-2">1.0.2. Software dependencies</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a href="#sec-2">2. Gantt Charts of the whole Trace</a></li>
+<li><a href="#sec-3">3. Table Summary</a></li>
+<li><a href="#sec-4">4. State Duration during the Execution Time</a></li>
+<li><a href="#sec-5">5. Distribution Histograms</a></li>
+</ul>
+</div>
+</div>
+```{r Setup, echo=FALSE}
+opts_chunk$set(echo=FALSE)
+```
+
+
+
+```{r Install_R_libraries}
+InstalledPackage <- function(package) 
+{
+    available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)))
+    missing <- package[!available]
+    if (length(missing) > 0) return(FALSE)
+    return(TRUE)
+}
+
+CRANChoosen <- function()
+{
+    return(getOption("repos")["CRAN"] != "@CRAN@")
+}
+
+UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org") 
+{
+    if(!InstalledPackage(package))
+    {
+	if(!CRANChoosen())
+	{       
+	    chooseCRANmirror()
+	    if(!CRANChoosen())
+	    {
+		options(repos = c(CRAN = defaultCRANmirror))
+	    }
+	}
+
+	suppressMessages(suppressWarnings(install.packages(package)))
+	if(!InstalledPackage(package)) return(FALSE)
+    }
+    return(TRUE)
+}
+
+# Now install desired libraries
+libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer")
+for(libr in libraries) 
+{ 
+    if(!UsePackage(libr))
+    {
+	stop("Error!", libr)
+    }
+}
+```
+
+```{r Load_R_files}
+# Load ggplot and plyr just for the following cases
+   library(ggplot2)
+   library(plyr)
+   library(data.table)
+   library(RColorBrewer) 
+
+# Defining non-computation states:
+def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")
+
+# Function for reading .csv file
+read_df <- function(file,range1,range2) {
+  df<-read.csv(file, header=FALSE, strip.white=TRUE)
+  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
+  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
+  df$Origin<-as.factor(as.character(file))
+
+# Changing names if needed:
+  df$Value <- as.character(df$Value)
+  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
+
+# Small cleanup
+df$Start<-round(df$Start,digit=1)
+df$End<-round(df$End,digit=1)
+df$ResourceId<-as.factor(df$ResourceId)
+df$Value<-as.factor(df$Value)
+
+# Start from zero
+  m <- min(df$Start)
+  df$Start <- df$Start - m
+  df$End <- df$Start+df$Duration
+
+# Return data frame
+  df
+}
+```
+
+```{r Load_traces}
+df<-data.frame()
+if( !exists("input_traces") )
+  input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv")
+
+for (i in 1:length(input_traces)){
+  dfs<-read_df(input_traces[i])
+  df<-rbindlist(list(df,dfs))
+}
+
+# Color palettes
+colourCount = length(unique(df$Value))
+getPalette = colorRampPalette(brewer.pal(9, "Set1"))
+
+# Order of Value so we can have good colors
+ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value]))
+ordered_states<-append(sort(ker_states), def_states)
+df$Value <- factor(df$Value, levels=ordered_states)
+
+# Order of ResourceId so we can have y-axis
+df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId))))
+```
+
+# Introduction
+
+This document presents a basic analysis of multiple StarPU
+traces. First, paje *traces* will be transferred into *.csv* files and
+then we analyze them with **R**. This summary is a first step that
+should help researchers verify their hypothesis or find problematic
+areas that require more exhaustive investigation.
+
+Be cautious, as the following results are only a brief analysis of
+the traces and many important phenomena could still be hidden. Also,
+be very careful when comparing different states or traces. Even
+though some large discrepancies can be irrelevant, in other cases
+even the smallest differences can be essential in understanding what
+exactly happened during the StarPU execution.
+
+### How to compile
+
+    ./starpu_summary.sh example.native.trace example.simgrid.trace
+
+### Software dependencies
+
+In order to run this analysis you need to have R installed:
+
+    sudo apt-get install r-base 
+
+Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (<https://github.com/schnorr/pajeng>), so we encourage users to install it.
+
+When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package:
+
+    R> install.packages("knitr")
+
+Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*)
+
+# Gantt Charts of the whole Trace
+
+First, we show a simple gantt chart of every trace. X-axis is a
+simple timeline of the execution, *Resources* on y-axis correspond
+to different CPUs/GPUs that were used and finally different colors
+represent different *States* of the application.
+
+This kind of figures can often point to the idle time or
+synchronization problems. Small disadvantage is that in most cases
+there are too many states, thus it is impossible to display them all
+on a single plot without aggregation. Therefore for any strange
+behavior at a certain part of the trace, we strongly suggest to zoom
+on the interval it occurred.
+
+```{r Gantt1}
+ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + 
+ theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + 
+ geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + 
+ facet_wrap(~Origin,ncol=1,scale="free_y")
+```
+
+Second, we will concentrate only on computation kernel states, to
+get rid of visualization artifacts that can be introduced by other
+(sometimes irrelevant) states. Normally, this plot should not be too
+different from the previous one.
+
+```{r Gantt2}
+# Select only computation kernels
+ df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),]
+
+# Start from zero
+  m <- min(df1$Start)
+  df1$Start <- df1$Start - m
+  df1$End <- df1$Start+df1$Duration
+
+# Plot
+ ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + 
+  theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + 
+  geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + 
+  facet_wrap(~Origin,ncol=1,scale="free_y")
+```
+
+# Table Summary
+
+Here we present how much time application spent in each state
+(OverallDuration), how many times it was in that state (Count),
+mean and median values of duration (Mean and Median), and finally
+what is a standard deviation (StandDev).
+
+General information provided by this table can sometimes give an
+idea to application experts which parts of code are not working as
+desired. Be aware that this kind of tables hide many important
+things, such as outliers, multiple modes, etc.
+
+```{r Table}
+options(width=120)
+ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration))
+```
+
+# State Duration during the Execution Time
+
+Now, we show how duration of each state was changing during the
+execution. This can display a general behavior of a state; show if
+there are outliers or multiple modes; are some events occurring in
+groups, etc. . It can also suggest a strange behavior of a state
+during a certain time interval, which should be later investigated
+more carefully.
+
+  However, since each event is represented by a single point (and
+there is no "alpha" factor), those events that happen almost
+simultaneously are overplotted. Therefore density of events along
+execution time may not be easy to read.
+
+```{r Dur}
+ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y")
+```
+
+# Distribution Histograms
+
+Finally, we show a distribution of *Duration* for each state in form
+of histograms. X-axis is partitioned into bins with equidistant time
+intervals in milliseconds, while y-axis represents the number of
+occurrences inside such intervals for a certain state. Note that for
+the first plot y-axis is not fixed, meaning that the scale changes
+from one row to another. This plot allows to not only to see what
+was the most frequent duration of a state, but also to compare
+duration between different states.
+
+```{r Hist1}
+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y")
+```
+
+Similar to the previous figure, only now traces are showed vertically
+instead of horizontally. Note that for this plot x-axis is not fixed,
+meaning that the scale changes from one column to another. This plot
+allows to compare frequency of different states and in case of
+multiple traces to easily compare duration distribution for each
+state.
+
+```{r Hist2}
+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x")
+```

+ 109 - 0
tools/starpu_paje_summary.in

@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+# 
+# Copyright (C) 2014  Université Joseph Fourier
+# Copyright (C) 2014  Université Bordeaux
+# 
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+# 
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+# Script for giving statistical analysis of the paje trace
+
+set -e # fail fast
+
+# File names
+basename="$PWD"
+outputfile="summary.html"
+analysis_script="$(dirname $(which $0))/starpu_paje_summary.Rmd"
+analysis_input=""
+
+# Command line arguments
+inputfiles=""
+
+help_script()
+{
+cat << EOF
+Give statistical analysis of the paje trace
+
+Options:
+   -h      Show this message
+
+Examples:
+$0 example.native.trace
+$0 example.native.trace example.simgrid.trace
+
+Report bugs to <@PACKAGE_BUGREPORT@>
+EOF
+}
+
+if [ "$1" = "--version" ] ; then
+    echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@"
+    exit 0
+fi
+
+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
+    help_script
+    exit 0
+fi
+
+while getopts "h" opt; do
+  case $opt in
+    \?)
+      echo "Invalid option: -$OPTARG"
+      help_script
+      exit 3
+      ;;
+  esac
+done
+
+# Reading files that need to be analyzed
+shift $((OPTIND - 1))
+inputfiles=$@
+# Error if there is no input files specified
+if [[ $# < 1 ]]; then
+    echo "Error!"
+    help_script
+    exit 2
+fi
+
+#####################################
+# Transforming input files into .csv
+for file in $inputfiles; do
+    if [ ! -s $file ]
+	then
+	echo "Error: file $file does not exist!"
+	exit 5
+    fi
+# Sorting traces
+    grep -e '^\(\(%\)\|\(\(1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > start.trace
+    grep -e '^\(\(%\)\|\(\(1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v  $file > end.trace
+    sort -s -V --key=2,2 end.trace > endSorted.trace
+    cat start.trace endSorted.trace > outputSorted.trace
+
+# Transferring to .csv
+    pj_dump -n outputSorted.trace > $file.csv
+    perl -i -ne 'print if /^State/' $file.csv
+done
+
+analysis_input=`echo \"$inputfiles".csv\"" | sed 's/  */.csv", "/g'`
+
+#####################################
+# Running analysis file to get actual results
+Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\
+            outputRmd = gsub('.html\$','.Rmd',outputhtml);\
+            knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)"
+
+# Cleanup: delete temporary files
+rm -f outputSorted.trace
+rm -f start.trace
+rm -f end.trace
+rm -f endSorted.trace