Parcourir la source

Merge remote-tracking branch 'svn/trunk' into branches/multiple_regression

Luka Stanisic il y a 8 ans
Parent
commit
131d7f3582
100 fichiers modifiés avec 1451 ajouts et 933 suppressions
  1. 9 1
      ChangeLog
  2. 3 3
      Makefile.am
  3. 30 2
      README.dev
  4. 82 23
      configure.ac
  5. 10 10
      doc/Makefile.am
  6. 17 0
      doc/devel/handle_refcnt
  7. 18 0
      doc/devel/replicate_refcnt
  8. 17 7
      doc/doxygen/Makefile.am
  9. 1 1
      doc/doxygen/chapters/320_scheduling.doxy
  10. 8 2
      doc/doxygen/chapters/360_debugging_tools.doxy
  11. 22 2
      doc/doxygen/chapters/430_mic_scc_support.doxy
  12. 4 3
      doc/doxygen/chapters/470_simgrid.doxy
  13. 15 6
      doc/doxygen/chapters/501_environment_variables.doxy
  14. 19 0
      doc/doxygen/chapters/api/scheduling_contexts.doxy
  15. 15 0
      doc/doxygen/dev/checkDoc.sh
  16. 15 0
      doc/doxygen/dev/starpu_check_documented.py
  17. 15 0
      doc/doxygen/doxygen_filter.sh.in
  18. 9 22
      doc/tutorial/README
  19. 16 0
      examples/README.txt
  20. 16 16
      examples/audio/starpu_audio_processing.c
  21. 0 1
      examples/axpy/axpy_opencl.c
  22. 3 2
      examples/basic_examples/multiformat.c
  23. 0 1
      examples/basic_examples/multiformat_conversion_codelets_opencl.c
  24. 0 1
      examples/basic_examples/vector_scal_opencl.c
  25. 2 2
      examples/callback/prologue.c
  26. 13 12
      examples/cg/cg.c
  27. 4 4
      examples/cg/cg_kernels.c
  28. 27 9
      examples/cholesky/cholesky.h
  29. 7 5
      examples/cholesky/cholesky_grain_tag.c
  30. 10 8
      examples/cholesky/cholesky_implicit.c
  31. 7 5
      examples/cholesky/cholesky_tag.c
  32. 8 6
      examples/cholesky/cholesky_tile_tag.c
  33. 0 1
      examples/filters/custom_mf/conversion_opencl.c
  34. 1 1
      examples/filters/custom_mf/custom_interface.c
  35. 0 1
      examples/filters/custom_mf/custom_opencl.c
  36. 0 1
      examples/filters/fblock_opencl.c
  37. 3 2
      examples/filters/fmatrix.c
  38. 54 49
      examples/heat/dw_factolu.c
  39. 2 2
      examples/heat/dw_factolu.h
  40. 1 4
      examples/heat/heat.c
  41. 0 1
      examples/interface/complex_kernels_opencl.c
  42. 22 16
      examples/lu/lu_example.c
  43. 3 2
      examples/lu/xlu_implicit.c
  44. 5 4
      examples/lu/xlu_implicit_pivot.c
  45. 10 10
      examples/mandelbrot/mandelbrot.c
  46. 7 1
      examples/matvecmult/matvecmult.c
  47. 3 1
      examples/mult/xgemm.c
  48. 2 2
      examples/pi/pi.c
  49. 1 1
      examples/pi/pi_redux.c
  50. 9 6
      examples/ppm_downscaler/ppm_downscaler.c
  51. 0 2
      examples/reductions/dot_product.c
  52. 1 1
      examples/sched_ctx/dummy_sched_with_ctx.c
  53. 3 3
      examples/sched_ctx/nested_sched_ctxs.c
  54. 8 3
      examples/sched_ctx/parallel_code.c
  55. 1 2
      examples/sched_ctx/parallel_tasks_reuse_handle.c
  56. 3 3
      examples/sched_ctx/sched_ctx_without_sched_policy.c
  57. 2 2
      examples/sched_ctx/sched_ctx_without_sched_policy_awake.c
  58. 3 0
      examples/sched_ctx/two_cpu_contexts.c
  59. 8 8
      examples/sched_ctx_utils/sched_ctx_utils.c
  60. 1 1
      examples/scheduler/dummy_sched.c
  61. 8 10
      examples/spmd/vector_scal_spmd.c
  62. 3 3
      examples/spmv/dw_block_spmv.c
  63. 6 6
      examples/spmv/matrix_market/mm_to_bcsr.c
  64. 385 427
      examples/spmv/matrix_market/mmio.c
  65. 0 1
      examples/spmv/spmv_kernels.c
  66. 0 1
      examples/stencil/life_opencl.c
  67. 0 1
      examples/stencil/shadow_opencl.c
  68. 1 1
      examples/stencil/stencil-blocks.c
  69. 6 8
      examples/stencil/stencil-kernels.c
  70. 6 6
      examples/stencil/stencil.c
  71. 3 2
      examples/tag_example/tag_example2.c
  72. 3 2
      examples/tag_example/tag_example3.c
  73. 2 2
      examples/tag_example/tag_restartable.c
  74. 5 5
      examples/worker_collections/worker_list_example.c
  75. 27 0
      include/fstarpu_mod.f90
  76. 6 5
      include/pthread_win32/pthread.h
  77. 6 5
      include/pthread_win32/semaphore.h
  78. 3 1
      include/starpu_config.h.in
  79. 4 0
      include/starpu_sched_ctx.h
  80. 1 1
      include/starpu_task.h
  81. 2 1
      include/starpu_tree.h
  82. 2 1
      include/starpu_worker.h
  83. 7 2
      mic-configure
  84. 5 1
      mpi/src/starpu_mpi.c
  85. 1 2
      mpi/src/starpu_mpi_comm.c
  86. 1 1
      src/common/fxt.c
  87. 16 16
      src/common/fxt.h
  88. 3 2
      src/core/debug.c
  89. 265 2
      src/core/debug.h
  90. 4 3
      src/core/dependencies/data_arbiter_concurrency.c
  91. 3 10
      src/core/dependencies/implicit_data_deps.c
  92. 19 46
      src/core/dependencies/tags.c
  93. 3 6
      src/core/dependencies/task_deps.c
  94. 6 27
      src/core/jobs.c
  95. 1 1
      src/core/perfmodel/perfmodel_bus.c
  96. 13 8
      src/core/perfmodel/perfmodel_print.c
  97. 44 23
      src/core/sched_ctx.c
  98. 4 1
      src/core/sched_ctx.h
  99. 2 9
      src/core/sched_policy.c
  100. 0 0
      src/core/simgrid.h

+ 9 - 1
ChangeLog

@@ -2,7 +2,7 @@
 #
 # Copyright (C) 2009-2016  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
-# Copyright (C) 2014 INRIA
+# Copyright (C) 2014, 2016 INRIA
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -21,6 +21,14 @@ StarPU 1.3.0 (svn revision xxxx)
 New features:
   * New scheduler with heterogeneous priorities
   * Support priorities for data transfers.
+  * Add support for Ayudame version 2.x debugging library.
+
+Small features:
+  * Scheduling contexts may now be associated a user data pointer at creation
+    time, that can later be recalled through starpu_sched_ctx_get_user_data().
+  * Add STARPU_SIMGRID_TASK_SUBMIT_COST to simulate the cost of task submission
+    in simgrid mode. This provides more accurate simgrid predictions, especially
+    for the beginning of the execution.
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 3 - 3
Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2009-2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+# Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
 # Copyright (C) 2014  INRIA
 # Copyright (C) 2016  Inria
 #
@@ -165,8 +165,8 @@ txtdir = ${prefix}
 else
 txtdir = ${docdir}
 endif
-txt_DATA = AUTHORS COPYING.LGPL README STARPU-REVISION
-EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README STARPU-VERSION STARPU-REVISION build-aux/svn2cl.xsl mic-configure
+txt_DATA = AUTHORS COPYING.LGPL README README.dev STARPU-REVISION
+EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README README.dev STARPU-VERSION STARPU-REVISION build-aux/svn2cl.xsl mic-configure
 
 DISTCLEANFILES = STARPU-REVISION
 

+ 30 - 2
README.dev

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009, 2010, 2011  Université de Bordeaux
-# Copyright (C) 2010, 2011  CNRS
+# Copyright (C) 2009, 2010, 2011, 2016  Université de Bordeaux
+# Copyright (C) 2010, 2011, 2016  CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -17,11 +17,39 @@
 Contents
 ========
 
+- Directory structure
 - Developer Warnings
 - Naming Conventions
 - Coding Style
 - Error handling
 
+Directory structure
+-------------------
+
+The directory structure is as follows:
+- src        : internal source for StarPU
+- include    : public API
+- tests      : unitary tests
+- examples   : examples using StarPU
+- doc        : documentation for StarPU
+- tools      : tools for StarPU
+
+StarPU extensions have their own directory (src/include/tests/examples) structure:
+
+- mpi           : The MPI support
+- socl          : the StarPU OpenCL-compatible interface
+- gcc-plugin    : the GCC plug-in that extends the C programming language with pragmas and attributes
+- sc_hypervisor : The Scheduling Context Hypervisor
+- starpufft     : The FFT support
+- starpu-top    : StarPU-Top Interface
+
+Some directories contain only build system details:
+- build-aux
+- m4
+- autom4te.cache
+
+
+
 Developer Warnings
 ------------------
 

+ 82 - 23
configure.ac

@@ -3,8 +3,7 @@
 # Copyright (C) 2009-2016  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
 # Copyright (C) 2011  Télécom-SudParis
-# Copyright (C) 2011, 2012, 2014  INRIA
-# Copyright (C) 2015, 2016  Inria
+# Copyright (C) 2011, 2012, 2014-2016  INRIA
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -60,6 +59,7 @@ AC_CANONICAL_SYSTEM
 
 dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
 dnl when they're available.
+dnl do not use option subdir-objects, it causes fortran compilation to fail
 m4_ifdef([AM_SILENT_RULES],
   [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests])],
   [AM_INIT_AUTOMAKE([1.10 -Wall foreign])])
@@ -157,15 +157,15 @@ case "$target" in
 *-*-mingw*|*-*-cygwin*)
   starpu_windows=yes
   libext=a
-  AC_DEFINE(STARPU_HAVE_WINDOWS, [], [Define this on windows.])
+  AC_DEFINE(STARPU_HAVE_WINDOWS, [1], [Define this on windows.])
   ;;
 *-*-linux*)
   starpu_linux=yes
-  AC_DEFINE(STARPU_LINUX_SYS, 1, [Define to 1 on Linux])
+  AC_DEFINE(STARPU_LINUX_SYS, [1], [Define to 1 on Linux])
   ;;
 *-*darwin*)
   starpu_darwin=yes
-  AC_DEFINE(STARPU_HAVE_DARWIN, [], [Define this on darwin.])
+  AC_DEFINE(STARPU_HAVE_DARWIN, [1], [Define this on darwin.])
   ;;
 esac
 AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"])
@@ -212,7 +212,7 @@ then
 fi
 
 AC_CHECK_TYPE([struct timespec], 
-	       AC_DEFINE(STARPU_HAVE_STRUCT_TIMESPEC,[],[struct timespec is defined]),
+	       AC_DEFINE(STARPU_HAVE_STRUCT_TIMESPEC,[1],[struct timespec is defined]),
 	       [], [
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -234,7 +234,7 @@ then
 		#include <pthread.h>
 		]],
 		[[ pthread_t t; pthread_create(&t, NULL, NULL, NULL); ]])],
-		AC_DEFINE(STARPU_NATIVE_WINTHREADS,[],[Using native windows threads]),
+		AC_DEFINE(STARPU_NATIVE_WINTHREADS,[1],[Using native windows threads]),
 		AC_MSG_ERROR([pthread_create unavailable]))
 else
     AC_CHECK_LIB([pthread], [pthread_create])
@@ -246,13 +246,13 @@ AC_CHECK_FUNCS([sysconf])
 
 AC_CHECK_FUNC([pthread_spin_lock], have_pthread_spin_lock=yes, have_pthread_spin_lock=no)
 if test x$have_pthread_spin_lock = xyes; then
-	AC_DEFINE(HAVE_PTHREAD_SPIN_LOCK,[],[pthread_spin_lock is available])
-	AC_DEFINE(STARPU_HAVE_PTHREAD_SPIN_LOCK,[],[pthread_spin_lock is available])
+	AC_DEFINE(HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available])
+	AC_DEFINE(STARPU_HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available])
 fi
 
 AC_CHECK_FUNC([pthread_barrier_init], have_pthread_barrier=yes, have_pthread_barrier=no)
 if test x$have_pthread_barrier = xyes; then
-	AC_DEFINE(STARPU_HAVE_PTHREAD_BARRIER,[],[pthread_barrier is available])
+	AC_DEFINE(STARPU_HAVE_PTHREAD_BARRIER,[1],[pthread_barrier is available])
 fi
 
 # yes, that's non portable, but it's still better than sched_setaffinity
@@ -364,7 +364,7 @@ STARPU_SEARCH_LIBS([LIBNUMA],[set_mempolicy],[numa],[enable_libnuma=yes],[enable
 AC_MSG_CHECKING(whether libnuma is available)
 AC_MSG_RESULT($enable_libnuma)
 if test x$enable_libnuma = xyes; then
-	AC_DEFINE(STARPU_HAVE_LIBNUMA,[],[libnuma is available])
+	AC_DEFINE(STARPU_HAVE_LIBNUMA,[1],[libnuma is available])
 fi
 
 ###############################################################################
@@ -741,7 +741,7 @@ if test x$enable_cuda_memcpy_peer = xyes -a x$enable_cuda = xyes ; then
     LDFLAGS="${SAVED_LDFLAGS}"
 fi
 if test x$have_cuda_memcpy_peer = xyes; then
-    AC_DEFINE(HAVE_CUDA_MEMCPY_PEER,[],[Peer transfers are supported in CUDA])
+    AC_DEFINE(HAVE_CUDA_MEMCPY_PEER,[1],[Peer transfers are supported in CUDA])
 fi
 
 if test x$enable_cuda = xyes; then
@@ -1013,7 +1013,7 @@ AC_ARG_ENABLE(opencl-simulator, [AS_HELP_STRING([--enable-opencl-simulator],
 				enable_opencl_simulator=$enableval, enable_opencl_simulator=no)
 if test x$enable_opencl_simulator = xyes; then
 	enable_simgrid=yes
-	AC_DEFINE(STARPU_OPENCL_SIMULATOR, 1, [Define this to enable using an OpenCL simulator])
+	AC_DEFINE(STARPU_OPENCL_SIMULATOR, [1], [Define this to enable using an OpenCL simulator])
 fi
 
 AC_ARG_WITH(simgrid-dir,
@@ -1093,7 +1093,7 @@ if test x$enable_simgrid = xyes ; then
 	                 [
 			   AC_MSG_ERROR(StarPU needs a version of Simgrid which defines the type msg_host_t (should be any version >= 3.8.1))
 		         ])
-	AC_DEFINE(STARPU_SIMGRID, 1, [Define this to enable simgrid execution])
+	AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution])
 	# We won't bind or detect anything
 	with_hwloc=no
 
@@ -1523,6 +1523,7 @@ AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug], [enable debug mode])],
 AC_MSG_RESULT($enable_debug)
 
 AC_ARG_ENABLE(spinlock_check, [AS_HELP_STRING([--enable-spinlock-check], [enable spinlock check])], enable_spinlock_check=$enableval, enable_spinlock_check=no)
+AC_ARG_ENABLE(fstack-protector-all, [AS_HELP_STRING([--disable-fstack-protector-all], [disable GCC option -fstack-protector-all])], enable_fstack_protector_all=$enableval, enable_fstack_protector_all=yes)
 
 if test x$enable_debug = xyes; then
 	AC_DEFINE(STARPU_DEBUG, [1], [enable debugging statements])
@@ -1531,10 +1532,11 @@ if test x$enable_debug = xyes; then
 	enable_spinlock_check=yes
 	if test x$GCC = xyes; then
 		if test x$starpu_windows != xyes ; then
-			CFLAGS="$CFLAGS -fstack-protector-all"
-			CXXFLAGS="$CXXFLAGS -fstack-protector-all"
+			if test x$enable_fstack_protector_all = xyes ; then
+			   CFLAGS="$CFLAGS -fstack-protector-all"
+			   CXXFLAGS="$CXXFLAGS -fstack-protector-all"
+			fi
 		fi
-		CPPFLAGS="$CPPFLAGS -D_FORTIFY_SOURCE=2"
 	fi
 else
 	CFLAGS="-O3 $CFLAGS"
@@ -1560,6 +1562,15 @@ AC_ARG_ENABLE(fast, [AS_HELP_STRING([--enable-fast],
 AC_MSG_RESULT($enable_fast)
 if test x$enable_fast = xyes; then
 	AC_DEFINE(STARPU_NO_ASSERT, [1], [disable assertions])
+else
+        # fortify gets really enabled only with optimizations, avoid enabling it
+        # when they optimizations are not enabled, because with some glibc it
+        # spews a lot of warnings.
+	if test x$enable_debug != xyes; then
+		if test x$GCC = xyes; then
+			CPPFLAGS="$CPPFLAGS -D_FORTIFY_SOURCE=1"
+		fi
+	fi
 fi
 
 AC_MSG_CHECKING(whether debug messages should be displayed)
@@ -1719,7 +1730,50 @@ fi
 
 AC_CHECK_HEADERS([glpk.h], [AC_DEFINE([STARPU_HAVE_GLPK_H], [1], [Define to 1 if you have the <glpk.h> header file.])])
 STARPU_HAVE_LIBRARY(GLPK, [glpk])
+
+AC_ARG_WITH(ayudame1-include-dir,
+	[AS_HELP_STRING([--with-ayudame1-include-dir=<path>],
+	[specify where Ayudame version 1 headers are installed])],
+	[
+		ayudame1_include_dir="$withval"
+		if test -n "$ayudame1_include_dir"; then
+			CPPFLAGS="-I$ayudame1_include_dir $CPPFLAGS"
+		fi
+	], [ayudame1_include_dir=no])
+AC_ARG_WITH(ayudame2-include-dir,
+	[AS_HELP_STRING([--with-ayudame2-include-dir=<path>],
+	[specify where Ayudame version 2 headers are installed])],
+	[
+		ayudame2_include_dir="$withval"
+		if test -n "$ayudame2_include_dir"; then
+			CPPFLAGS="-I$ayudame2_include_dir $CPPFLAGS"
+		fi
+	], [ayudame2_include_dir=no])
+
+# Ayudame 1 header is capitalized
 AC_CHECK_HEADERS([Ayudame.h])
+AC_ARG_ENABLE(ayudame1, [AS_HELP_STRING([--disable-ayudame1],
+				   [Do not use Ayudame lib version 1])],
+				   enable_ayudame1=$enableval, enable_ayudame1=yes)
+# Ayudame 2 header is lowercase
+AC_CHECK_HEADERS([ayudame.h])
+AC_ARG_ENABLE(ayudame2, [AS_HELP_STRING([--disable-ayudame2],
+				   [Do not use Ayudame lib version 2])],
+				   enable_ayudame2=$enableval, enable_ayudame2=yes)
+if test x$enable_ayudame1 = xyes -a x$ac_cv_header_Ayudame_h = xyes; then
+   AC_DEFINE([STARPU_USE_AYUDAME1], [1], [Define to 1 if Ayudame 1 is available and should be used])
+   ayu_msg="yes, use version 1"
+else
+   if test x$enable_ayudame2 = xyes -a x$ac_cv_header_ayudame_h = xyes; then
+      AC_DEFINE([STARPU_USE_AYUDAME2], [1], [Define to 1 if Ayudame 2 is available and should be used])
+      ayu_msg="yes, use version 2"
+   else
+      ayu_msg="no"
+   fi
+fi
+
+AM_CONDITIONAL([STARPU_USE_AYUDAME1], [test "x$enable_ayudame1" = "xyes"])
+AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 
 ###############################################################################
 #                                                                             #
@@ -2000,7 +2054,7 @@ AC_MSG_RESULT($use_mpi)
 AC_SUBST(USE_MPI, $use_mpi)
 AM_CONDITIONAL(USE_MPI, test x$use_mpi = xyes)
 if test x$use_mpi = xyes; then
-	AC_DEFINE(STARPU_USE_MPI,[],[whether the StarPU MPI library is available])
+	AC_DEFINE(STARPU_USE_MPI,[1],[whether the StarPU MPI library is available])
 else
 	running_mpi_check=no
 fi
@@ -2181,7 +2235,7 @@ if test "x$FC" != "x"; then
 		fi
 	fi
 	if test "x$enable_build_fortran" = "xyes" ; then
-		AC_DEFINE(STARPU_HAVE_FC, [], [Define this if a Fortran compiler is available])
+		AC_DEFINE(STARPU_HAVE_FC, [1], [Define this if a Fortran compiler is available])
 		if test x$use_mpi = xyes; then
 			AC_ARG_WITH(mpifort, [AS_HELP_STRING([--with-mpifort[=<path to mpifort>]],
 				    [Path of the mpifort compiler])],
@@ -2220,7 +2274,7 @@ if test "x$FC" != "x"; then
 						)
 				CC=$OLD_CC
 				if test "x$use_mpi_fort" = xyes; then
-					AC_DEFINE([HAVE_MPI_COMM_F2C], 1, [Function MPI_Comm_f2c is available])
+					AC_DEFINE([HAVE_MPI_COMM_F2C], [1], [Function MPI_Comm_f2c is available])
 					AC_MSG_CHECKING(mpifort path)
 					AC_MSG_RESULT($mpifort_path)
 					AC_SUBST(MPIFORT, $mpifort_path)
@@ -2323,7 +2377,7 @@ AC_ARG_ENABLE(openmp, [AS_HELP_STRING([--enable-openmp],
 AC_MSG_CHECKING(for OpenMP runtime support)
 
 if test x$enable_openmp = xyes; then
-	AC_DEFINE(STARPU_OPENMP, 1, [Define this to enable OpenMP runtime support])
+	AC_DEFINE(STARPU_OPENMP, [1], [Define this to enable OpenMP runtime support])
 fi
 
 AM_CONDITIONAL([STARPU_OPENMP], [test "x$enable_openmp" = "xyes"])
@@ -2717,7 +2771,7 @@ if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then
     ICC=""
 fi
 if test "x$ICC" != "x"; then
-  AC_DEFINE(STARPU_HAVE_ICC, [], [Define this if icc is available])
+  AC_DEFINE(STARPU_HAVE_ICC, [1], [Define this if icc is available])
 fi
 AM_CONDITIONAL([STARPU_HAVE_ICC], [test "x$ICC" != "x"])
 
@@ -2765,6 +2819,10 @@ if test "$enable_build_doc" = "yes" ; then
    if test "$epstopdfcommand" = "" ; then
 	enable_build_doc="no"
    fi
+   if test -f "$srcdir/doc/doxygen/starpu.pdf"
+   then
+	enable_build_doc="no"
+   fi
 fi
 AC_MSG_CHECKING(whether documentation should be compiled)
 AC_MSG_RESULT($enable_build_doc)
@@ -2813,6 +2871,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x doc/doxygen/doxygen_filter.sh
   mkdir -p tests/microbenchs
   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
+  test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead.gp || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.gp tests/microbenchs/
   test -e tests/microbenchs/microbench.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/microbench.sh tests/microbenchs/
   test -e tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh tests/microbenchs/
@@ -2964,7 +3023,7 @@ AC_MSG_NOTICE([
                SOCL test suite:                             $run_socl_check
                Scheduler Hypervisor:                        $build_sc_hypervisor
                simgrid enabled:                             $enable_simgrid
-               ayudame enabled:                             $ac_cv_header_Ayudame_h
+               ayudame enabled:                             $ayu_msg
 	       Native fortran support:                      $enable_build_fortran
 	       Native MPI fortran support:                  $use_mpi_fort
 ])

+ 10 - 10
doc/Makefile.am

@@ -1,20 +1,20 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013, 2014  CNRS
+# Copyright (C) 2013, 2014, 2016  CNRS
 #
-# Permission is granted to copy, distribute and/or modify this document
-# under the terms of the GNU Free Documentation License, Version 1.3
-# or any later version published by the Free Software Foundation;
-# with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
 #
-# See the GNU Free Documentation License in COPYING.GFDL for more details.
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-if BUILD_DOC
 SUBDIRS = doxygen
 DIST_SUBDIRS = doxygen
-else
-DIST_SUBDIRS =
-endif
 
 EXTRA_DIST =    tutorial/hello_world.c \
 		tutorial/hello_world_plugin.c \

+ 17 - 0
doc/devel/handle_refcnt

@@ -1,3 +1,20 @@
+#!/bin/bash
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2012 Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
 handle.refcnt usage
 ===================
 

+ 18 - 0
doc/devel/replicate_refcnt

@@ -1,3 +1,21 @@
+#!/bin/bash
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2015 Université de Bordeaux
+# Copyright (C) 2012 Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
 replicate.refcnt usage
 ======================
 

+ 17 - 7
doc/doxygen/Makefile.am

@@ -4,24 +4,32 @@
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
 # Copyright (C) 2014  INRIA
 #
-# Permission is granted to copy, distribute and/or modify this document
-# under the terms of the GNU Free Documentation License, Version 1.3
-# or any later version published by the Free Software Foundation;
-# with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
 #
-# See the GNU Free Documentation License in COPYING.GFDL for more details.
-
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 DOXYGEN = doxygen
 PDFLATEX = pdflatex
 MAKEINDEX = makeindex
 
+if BUILD_DOC
 DOX_DIR = $(top_builddir)/doc/doxygen
+else
+DOX_DIR = $(top_srcdir)/doc/doxygen
+endif
+
 DOX_CONFIG = $(top_srcdir)/doc/doxygen/doxygen.cfg
 
 DOX_HTML_DIR = html
 DOX_LATEX_DIR = latex
-DOX_PDF = starpu.pdf
+DOX_PDF = $(DOX_DIR)/starpu.pdf
 DOX_TAG = starpu.tag
 
 chapters =	\
@@ -268,6 +276,7 @@ $(DOX_TAG): $(dox_inputs)
 	@$(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex
 	@cat $(top_srcdir)/doc/doxygen/refman.tex >> $(DOX_LATEX_DIR)/refman.tex
 
+if BUILD_DOC
 dist_pdf_DATA = $(DOX_PDF)
 
 $(DOX_PDF): $(DOX_TAG) refman.tex
@@ -299,6 +308,7 @@ CLEANFILES = $(DOX_TAG) starpu_config.h \
     $(DOX_HTML_DIR) \
     $(DOX_LATEX_DIR) \
     $(DOX_PDF)
+endif
 
 # Rule to update documentation on web server. Should only be used locally.
 PUBLISHHOST	?= gforge

+ 1 - 1
doc/doxygen/chapters/320_scheduling.doxy

@@ -145,7 +145,7 @@ instance, to force execution of a task on CUDA0:
 
 \code{.c}
 task->execute_on_a_specific_worker = 1;
-task->worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0);
+task->workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0);
 \endcode
 
 One can also specify the order in which tasks must be executed by setting the

+ 8 - 2
doc/doxygen/chapters/360_debugging_tools.doxy

@@ -16,8 +16,14 @@ can be generated and displayed graphically, see \ref GeneratingTracesWithFxT.
 Generally-speaking, if you have troubles, pass \ref enable-debug "--enable-debug" to
 <c>./configure</c> to enable some checks which impact performance, but will
 catch common issues, possibly earlier than the actual problem you are observing,
-which may just be a consequence of a bug that happened earlier. If your program
-is valgrind-safe, you can use it, see \ref UsingOtherDebugger.
+which may just be a consequence of a bug that happened earlier. Also, make sure
+not to have the \ref enable-fast "--enable-fast" option which drops very useful
+catchup assertions. If your program is valgrind-safe, you can use it, see \ref
+UsingOtherDebugger.
+
+Depending on your toolchain, it might happen that you get
+<c>undefined reference to `__stack_chk_guard'</c> errors. In that case, use the
+<c>--disable-fstack-protector-all</c> option to avoid the issue.
 
 Then, if your program crashes with an assertion error, a segfault, etc. you can send us the result of
 

+ 22 - 2
doc/doxygen/chapters/430_mic_scc_support.doxy

@@ -32,7 +32,10 @@ directly on the Phi without any exchange with the host CPU. The binaries in
 For MPI support, you will probably have to specify different MPI compiler path
 or option for the host and the device builds, for instance:
 
-<c>./mic-configure --with-mic-param=--with-mpicc="/.../mpiicc -mmic" --with-mic-param=--with-mpicc=/.../mpiicc</c>
+\verbatim
+./mic-configure --with-mic-param=--with-mpicc="/.../mpiicc -mmic" \
+    --with-host-param=--with-mpicc=/.../mpiicc
+\endverbatim
 
 In case you have troubles with the coi or scif libraries (the Intel paths are
 really not standard, it seems...), you can still make a build in native mode
@@ -43,12 +46,29 @@ only, by using <c>mic-configure --enable-native-mic</c> (and notably without
 
 The simplest way to port an application to MIC Xeon Phi or SCC is to set the field
 starpu_codelet::cpu_funcs_name, to provide StarPU with the function
-name of the CPU implementation. StarPU will thus simply use the
+name of the CPU implementation, so for instance:
+
+\verbatim
+struct starpu_codelet cl = {
+    .cpu_funcs = {myfunc},
+    .cpu_funcs_name = {"myfunc"},
+    .nbuffers = 1,
+}
+\endverbatim
+
+StarPU will thus simply use the
 existing CPU implementation (cross-rebuilt in the MIC Xeon Phi case). The
 functions have to be globally-visible (i.e. not <c>static</c>) for
 StarPU to be able to look them up, and -rdynamic must be passed to gcc (or
 -export-dynamic to ld) so that symbols of the main program are visible.
 
+If you have used the <c>.where</c> field, you additionally need to add in it
+<c>STARPU_MIC</c> for the Xeon Phi, and/or <c>STARPU_SCC</c> for the SCC.
+
+For non-native MIC Xeon Phi execution, the 'main' function of the application, on the sink, should call starpu_init() immediately upon start-up; the starpu_init() function never returns. On the host, the 'main' function may freely perform application related initialization calls as usual, before calling starpu_init().
+
+For MIC Xeon Phi, the application may programmatically detect whether executing on the sink or on the host, by checking whether the STARPU_SINK environment variable is defined (on the sink) or not (on the host).
+
 For SCC execution, the function starpu_initialize() also has to be
 used instead of starpu_init(), so as to pass <c>argc</c> and
 <c>argv</c>.

+ 4 - 3
doc/doxygen/chapters/470_simgrid.doxy

@@ -2,7 +2,7 @@
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
- * Copyright (C) 2011, 2012 INRIA
+ * Copyright (C) 2011, 2012, 2016 INRIA
  * See the file version.doxy for copying conditions.
  */
 
@@ -139,8 +139,9 @@ be extended as well), change the available GPU memory size, PCI memory bandwidth
 
 The simulation can be tweaked, to be able to tune it between a very accurate
 simulation and a very simple simulation (which is thus close to scheduling
-theory results), see the \ref STARPU_SIMGRID_CUDA_MALLOC_COST and
-\ref STARPU_SIMGRID_CUDA_QUEUE_COST environment variables.
+theory results), see the \ref STARPU_SIMGRID_CUDA_MALLOC_COST,
+\ref STARPU_SIMGRID_CUDA_QUEUE_COST and \ref STARPU_SIMGRID_TASK_SUBMIT_COST
+environment variables.
 
 \section SimulationMPIApplications MPI Applications
 

+ 15 - 6
doc/doxygen/chapters/501_environment_variables.doxy

@@ -2,7 +2,7 @@
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
- * Copyright (C) 2011, 2012 INRIA
+ * Copyright (C) 2011, 2012, 2016 INRIA
  * See the file version.doxy for copying conditions.
  */
 
@@ -106,11 +106,11 @@ MIC equivalent of the environment variable \ref STARPU_NCUDA, i.e. the number of
 MIC devices to use.
 </dd>
 
-<dt>STARPU_NMICCORES</dt>
+<dt>STARPU_NMICTHREADS</dt>
 <dd>
-\anchor STARPU_NMICCORES
-\addindex __env__STARPU_NMICCORES
-Number of cores to use on the MIC devices.
+\anchor STARPU_NMICTHREADS
+\addindex __env__STARPU_NMICTHREADS
+Number of threads to use on the MIC devices.
 </dd>
 
 <dt>STARPU_NSCC</dt>
@@ -531,7 +531,7 @@ taken into account in simgrid mode.
 <dd>
 \anchor STARPU_PCI_FLAT
 \addindex __env__STARPU_PCI_FLAT
-When unset or set to to 0, the platform file created for simgrid will
+When unset or set to 0, the platform file created for simgrid will
 contain PCI bandwidths and routes.
 </dd>
 
@@ -551,6 +551,15 @@ MiB. The default is 1, thus allowing 64GiB virtual memory when Linux's
 <c>sysctl vm.max_map_count</c> value is the default 65535.
 </dd>
 
+<dt>STARPU_SIMGRID_TASK_SUBMIT_COST</dt>
+<dd>
+\anchor STARPU_SIMGRID_TASK_SUBMIT_COST
+\addindex __env__STARPU_SIMGRID_TASK_SUBMIT_COST
+When set to 1 (which is the default), task submission costs are taken into
+account in simgrid mode. This provides more accurate simgrid predictions,
+especially for the beginning of the execution.
+</dd>
+
 </dl>
 
 \section MiscellaneousAndDebug Miscellaneous And Debug

+ 19 - 0
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -71,6 +71,9 @@ scheduling policy.
 <li> ::STARPU_SCHED_CTX_POLICY_INIT, followed by a function pointer
 (ie. void init_sched(void)) allowing to initialize the scheduling policy.
 </li>
+<li> ::STARPU_SCHED_CTX_USER_DATA, followed by a pointer
+to a custom user data structure, to be retrieved by \ref starpu_sched_ctx_get_user_data().
+</li>
 </ul>
 
 \def STARPU_SCHED_CTX_POLICY_NAME
@@ -98,6 +101,11 @@ maximum scheduler priority value.
 This macro is used when calling starpu_sched_ctx_create() to specify a
 function pointer allowing to initialize the scheduling policy.
 
+\def STARPU_SCHED_CTX_USER_DATA
+\ingroup API_Scheduling_Contexts
+This macro is used when calling starpu_sched_ctx_create() to specify a
+pointer to some user data related to the context being created.
+
 \fn unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap)
 \ingroup API_Scheduling_Contexts
 Create a context indicating an approximate interval of resources
@@ -160,6 +168,13 @@ Returns the list of workers in the array \p workerids, the returned value is the
 number of workers. The user should free the \p workerids table after finishing
 using it (it is allocated inside the function with the proper size)
 
+\fn unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids)
+\ingroup API_Scheduling_Contexts
+Returns the list of workers in the array \p workerids, the returned value is the 
+number of workers. This list is provided in raw order, i.e. not sorted by tree or list order,
+and the user should not free the \p workerids table.
+This function is thus much less costly than starpu_sched_ctx_get_workers_list.
+
 \fn unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id)
 \ingroup API_Scheduling_Contexts
 Return the number of workers managed by the specified contexts
@@ -237,6 +252,10 @@ todo
 \ingroup API_Scheduling_Contexts
 todo
 
+\fn void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id)
+\ingroup API_Scheduling_Contexts
+Return the user data pointer associated to the scheduling context.
+
 @name Scheduling Context Worker Collection
 \ingroup API_Scheduling_Contexts
 

+ 15 - 0
doc/doxygen/dev/checkDoc.sh

@@ -1,4 +1,19 @@
 #!/bin/bash
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2016 CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 dirname=$(dirname $0)
 

+ 15 - 0
doc/doxygen/dev/starpu_check_documented.py

@@ -1,4 +1,19 @@
 #!/usr/bin/python3
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2016 CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 import os
 import sys

+ 15 - 0
doc/doxygen/doxygen_filter.sh.in

@@ -1,4 +1,19 @@
 #!/bin/bash
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2014 CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 if [ "$(basename $1)" == "starpufft.h" ] ; then
     gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep starpufft

+ 9 - 22
doc/tutorial/README

@@ -1,32 +1,19 @@
+#
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2009-2011  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2013  CNRS
 #
-# Redistribution  and  use  in  source and binary forms, with or without
-# modification,  are  permitted  provided  that the following conditions
-# are met:
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
 #
-# * Redistributions  of  source  code  must  retain  the above copyright
-#   notice,  this  list  of  conditions  and  the  following  disclaimer.
-# * Redistributions  in  binary  form must reproduce the above copyright
-#   notice,  this list of conditions and the following disclaimer in the
-#   documentation  and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-#   derived from this software without specific prior written permission.
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
-# THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL
-# SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
-# LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE
-# DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 Instructions on how to compile and run StarPU examples
 ------------------------------------------------------

+ 16 - 0
examples/README.txt

@@ -1,3 +1,19 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2016 Université de Bordeaux
+# Copyright (C) 2016 CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
 audio
 	This applies a simple band filter over audio files
 

+ 16 - 16
examples/audio/starpu_audio_processing.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -68,8 +68,8 @@ static double start;
 static double end;
 static unsigned task_per_worker[STARPU_NMAXWORKERS] = {0};
 
-/* 
- *	Functions to Manipulate WAV files 
+/*
+ *	Functions to Manipulate WAV files
  */
 
 unsigned get_wav_data_bytes_length(FILE *file)
@@ -91,8 +91,8 @@ void copy_wav_header(FILE *srcfile, FILE *dstfile)
 	fseek(srcfile, 0, SEEK_SET);
 	fseek(dstfile, 0, SEEK_SET);
 
-	fread(buffer, 1, headersize, infile);	
-	fwrite(buffer, 1, headersize, outfile);	
+	fread(buffer, 1, headersize, infile);
+	fwrite(buffer, 1, headersize, outfile);
 }
 
 void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_file)
@@ -104,7 +104,7 @@ void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_fil
 
 	/* we skip the header to only keep the data */
 	fseek(infile, headersize, SEEK_SET);
-	
+
 	for (v=0;v<size;v++)
 	{
 		signed char val = (signed char)fgetc(infile);
@@ -113,7 +113,7 @@ void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_fil
 		arrayout[v] = 256*val2 + val;
 
 #if SAVE_RAW
-		fprintf(save_file, "%d %f\n", currentpos++, arrayout[v]);
+		fprintf(save_file, "%u %f\n", currentpos++, arrayout[v]);
 #endif
 	}
 }
@@ -128,10 +128,10 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 
 	/* we assume that the header is copied using copy_wav_header */
 	fseek(outfile, headersize, SEEK_SET);
-	
+
 	for (v=0;v<size;v++)
 	{
-		signed char val = ((int)arrayin[v]) % 256; 
+		signed char val = ((int)arrayin[v]) % 256;
 		signed char val2  = ((int)arrayin[v]) / 256;
 
 		fputc(val, outfile);
@@ -139,7 +139,7 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 
 #if SAVE_RAW
 		if (save_file)
-	                fprintf(save_file, "%d %f\n", currentpos++, arrayin[v]);
+	                fprintf(save_file, "%u %f\n", currentpos++, arrayin[v]);
 #endif
 	}
 }
@@ -177,7 +177,7 @@ static void band_filter_kernel_gpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *
 	cufftComplex *localout;
 
 	int workerid = starpu_worker_get_id();
-	
+
 	/* initialize the plane only during the first iteration */
 	if (!plans[workerid].is_initialized)
 	{
@@ -201,7 +201,7 @@ static void band_filter_kernel_gpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *
 	/* FFT */
 	cures = cufftExecR2C(plans[workerid].plan, localA, localout);
 	STARPU_ASSERT(cures == CUFFT_SUCCESS);
-	
+
 	/* filter low freqs */
 	unsigned lowfreq_index = (LOWFREQ*nsamples)/SAMPLERATE;
 	cudaMemsetAsync(&localout[0], 0, lowfreq_index*sizeof(fftwf_complex), starpu_cuda_get_local_stream());
@@ -226,11 +226,11 @@ static void band_filter_kernel_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *
 	float *localA = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
 
 	int workerid = starpu_worker_get_id();
-	
+
 	/* initialize the plane only during the first iteration */
 	if (!plans[workerid].is_initialized)
 	{
-		plans[workerid].localout_cpu = malloc(nsamples*sizeof(fftwf_complex)); 
+		plans[workerid].localout_cpu = malloc(nsamples*sizeof(fftwf_complex));
 		plans[workerid].Acopy = malloc(nsamples*sizeof(float));
 
 		/* create plans, only "fftwf_execute" is thread safe in FFTW ... */
@@ -255,7 +255,7 @@ static void band_filter_kernel_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *
 
 	/* FFT */
 	fftwf_execute(plans[workerid].plan_cpu);
-	
+
 	/* filter low freqs */
 	unsigned lowfreq_index = (LOWFREQ*nsamples)/SAMPLERATE;
 	memset(&localout[0], 0, lowfreq_index*sizeof(fftwf_complex));
@@ -408,7 +408,7 @@ int main(int argc, char **argv)
 
 	unsigned niter = length_data/nsamples;
 
-	fprintf(stderr, "input: %s\noutput: %s\n#chunks %d\n", inputfilename, outputfilename, niter);
+	fprintf(stderr, "input: %s\noutput: %s\n#chunks %u\n", inputfilename, outputfilename, niter);
 
 	/* launch StarPU */
 	ret = starpu_init(NULL);

+ 0 - 1
examples/axpy/axpy_opencl.c

@@ -29,7 +29,6 @@ void axpy_opencl(void *buffers[], void *_args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
 	cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);

+ 3 - 2
examples/basic_examples/multiformat.c

@@ -167,11 +167,10 @@ create_and_submit_task(unsigned int dev)
 static void
 create_and_submit_tasks(void)
 {
-	int err;
-
 #ifdef STARPU_USE_CUDA
 	if (ncuda > 0)
 	{
+		int err;
 		err = create_and_submit_task(STARPU_CUDA);
 		if (err != 0)
 		{
@@ -184,6 +183,7 @@ create_and_submit_tasks(void)
 #ifdef STARPU_USE_CPU
 	if (ncpu > 0)
 	{
+		int err;
 		err = create_and_submit_task(STARPU_CPU);
 		if (err != 0)
 		{
@@ -196,6 +196,7 @@ create_and_submit_tasks(void)
 #ifdef STARPU_USE_OPENCL
 	if (nopencl > 0)
 	{
+		int err;
 		err = create_and_submit_task(STARPU_OPENCL);
 		if (err != 0)
 		{

+ 0 - 1
examples/basic_examples/multiformat_conversion_codelets_opencl.c

@@ -25,7 +25,6 @@ void cpu_to_opencl_opencl_func(void *buffers[], void *args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]);

+ 0 - 1
examples/basic_examples/vector_scal_opencl.c

@@ -31,7 +31,6 @@ void scal_opencl_func(void *buffers[], void *_args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	/* length of the vector */
 	unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);

+ 2 - 2
examples/callback/prologue.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2013-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,7 +55,7 @@ void prologue_callback_func(void *callback_arg)
 void pop_prologue_callback_func(void *args)
 {
 	unsigned val = (uintptr_t) args;
-	printf("pop_prologue_callback val %d \n", val);
+	printf("pop_prologue_callback val %u \n", val);
 	STARPU_ASSERT(val == 5);
 }
 

+ 13 - 12
examples/cg/cg.c

@@ -44,18 +44,18 @@
  *		d <- r
  *		delta_new <- dot(r,r)
  *		delta_0 <- delta_new
- *	
+ *
  *		while (i < i_max && delta_new > eps^2 delta_0)
  *		{
  *			q <- Ad
  *			alpha <- delta_new/dot(d, q)
  *			x <- x + alpha d
- *	
+ *
  *			If (i is divisible by 50)
  *				r <- b - Ax
  *			else
  *				r <- r - alpha q
- *			
+ *
  *			delta_old <- delta_new
  *			delta_new <- dot(r,r)
  *			beta <- delta_new/delta_old
@@ -159,7 +159,7 @@ static void register_data(void)
 	{
 		starpu_data_set_reduction_methods(q_handle, &accumulate_vector_cl, &bzero_vector_cl);
 		starpu_data_set_reduction_methods(r_handle, &accumulate_vector_cl, &bzero_vector_cl);
-	
+
 		starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl);
 		starpu_data_set_reduction_methods(rtr_handle, &accumulate_variable_cl, &bzero_variable_cl);
 	}
@@ -271,8 +271,7 @@ static void display_matrix(void)
 
 static int cg(void)
 {
-	double delta_new, delta_old, delta_0;
-	double alpha, beta;
+	double delta_new, delta_0;
 
 	int i = 0;
 	int ret;
@@ -282,7 +281,7 @@ static int cg(void)
 	if (ret == -ENODEV) return ret;
 
 	/* r <- r - A x */
-	ret = gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
+	ret = gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction);
 	if (ret == -ENODEV) return ret;
 
 	/* d <- r */
@@ -307,6 +306,9 @@ static int cg(void)
 
 	while ((i < i_max) && ((double)delta_new > (double)(eps*eps*delta_0)))
 	{
+		double delta_old;
+		double alpha, beta;
+
 		/* q <- A d */
 		gemv_kernel(q_handle, A_handle, d_handle, 0.0, 1.0, nblocks, use_reduction);
 
@@ -317,7 +319,7 @@ static int cg(void)
 		starpu_data_acquire(dtq_handle, STARPU_R);
 		alpha = delta_new/dtq;
 		starpu_data_release(dtq_handle);
-		
+
 		/* x <- x + alpha d */
 		axpy_kernel(x_handle, d_handle, alpha, nblocks);
 
@@ -325,9 +327,9 @@ static int cg(void)
 		{
 			/* r <- b */
 			copy_handle(r_handle, b_handle, nblocks);
-		
+
 			/* r <- r - A x */
-			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
+			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction);
 		}
 		else
 		{
@@ -404,7 +406,6 @@ static void parse_args(int argc, char **argv)
 		{
 			FPRINTF(stderr, "usage: %s [-h] [-nblocks #blocks] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]);
 			exit(-1);
-			continue;
 		}
         }
 }
@@ -435,7 +436,7 @@ int main(int argc, char **argv)
 	partition_data();
 
 	ret = cg();
-	if (ret == -ENODEV) 
+	if (ret == -ENODEV)
 	{
 		ret = 77;
 		goto enodev;

+ 4 - 4
examples/cg/cg_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2012-2016  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -262,7 +262,7 @@ void dot_kernel_cpu(void *descr[], void *cl_arg)
 
 	unsigned n = STARPU_VECTOR_GET_NX(descr[1]);
 
-	TYPE local_dot = 0.0;
+	TYPE local_dot;
 	/* Note that we explicitely cast the result of the DOT kernel because
 	 * some BLAS library will return a double for sdot for instance. */
 	local_dot = (TYPE)DOT(n, v1, 1, v2, 1);
@@ -550,10 +550,10 @@ int scal_axpy_kernel(starpu_data_handle_t v1, TYPE p1,
 		     starpu_data_handle_t v2, TYPE p2,
 		     unsigned nblocks)
 {
-	int ret;
 	unsigned b;
 	for (b = 0; b < nblocks; b++)
 	{
+		int ret;
 		ret = starpu_task_insert(&scal_axpy_kernel_cl,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),
@@ -626,10 +626,10 @@ int axpy_kernel(starpu_data_handle_t v1,
 		starpu_data_handle_t v2, TYPE p1,
 		unsigned nblocks)
 {
-	int ret;
 	unsigned b;
 	for (b = 0; b < nblocks; b++)
 	{
+		int ret;
 		ret = starpu_task_insert(&axpy_kernel_cl,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),

+ 27 - 9
examples/cholesky/cholesky.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,7 +32,7 @@
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0)
-#define NMAXBLOCKS	32
+#define NMAXBLOCKS	128
 
 #define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
 #define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
@@ -113,15 +113,33 @@
 
 /* End of magma code */
 
+static unsigned size;
+static unsigned nblocks;
+static unsigned nbigblocks;
+
+static inline void init_sizes(void) {
+	int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count();
+	int power_sqrt = sqrt(power)/2;
+	if (power_sqrt < 1)
+		power_sqrt = 1;
+
 #ifdef STARPU_QUICK_CHECK
-static unsigned size = 320*4;
-static unsigned nblocks = 4;
-static unsigned nbigblocks = 2;
+	if (!size)
+		size = 320*2*power_sqrt;
+	if (!nblocks)
+		nblocks = 2*power_sqrt;
+	if (!nbigblocks)
+		nbigblocks = power_sqrt;
 #else
-static unsigned size = 960*16;
-static unsigned nblocks = 16;
-static unsigned nbigblocks = 8;
+	if (!size)
+		size = 960*8*power_sqrt;
+	if (!nblocks)
+		nblocks = 8*power_sqrt;
+	if (!nbigblocks)
+		nbigblocks = 4*power_sqrt;
 #endif
+}
+
 static unsigned pinned = 1;
 static unsigned noprio = 0;
 static unsigned check = 0;

+ 7 - 5
examples/cholesky/cholesky_grain_tag.c

@@ -264,7 +264,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	}
 }
 
-static void initialize_system(float **A, unsigned dim, unsigned pinned)
+static void initialize_system(int argc, char **argv, float **A, unsigned pinned)
 {
 	int ret;
 	int flags = STARPU_MALLOC_SIMULATION_FOLDED;
@@ -278,6 +278,10 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 		exit(77);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);
@@ -292,7 +296,7 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 
 	if (pinned)
 		flags |= STARPU_MALLOC_PINNED;
-	starpu_malloc_flags((void **)A, dim*dim*sizeof(float), flags);
+	starpu_malloc_flags((void **)A, size*size*sizeof(float), flags);
 }
 
 int cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned pinned)
@@ -338,10 +342,8 @@ int main(int argc, char **argv)
 
      	int ret;
 
-	parse_args(argc, argv);
-
 	float *mat = NULL;
-	initialize_system(&mat, size, pinned);
+	initialize_system(argc, argv, &mat, pinned);
 
 #ifndef STARPU_SIMGRID
 	unsigned i,j;

+ 10 - 8
examples/cholesky/cholesky_implicit.c

@@ -40,7 +40,6 @@ static void callback_turn_spmd_on(void *arg STARPU_ATTRIBUTE_UNUSED)
 
 static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 {
-	int ret;
 	double start;
 	double end;
 
@@ -59,6 +58,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	/* create all the DAG nodes */
 	for (k = 0; k < nblocks; k++)
 	{
+		int ret;
                 starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k);
 
                 ret = starpu_task_insert(&cl11,
@@ -312,22 +312,24 @@ int main(int argc, char **argv)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 
-	parse_args(argc, argv);
-
-	if(with_ctxs || with_noctxs || chole1 || chole2)
-		parse_args_ctx(argc, argv);
-
 #ifdef STARPU_HAVE_MAGMA
 	magma_init();
 #endif
 
 	int ret;
 	ret = starpu_init(NULL);
-	//starpu_fxt_stop_profiling();
-
 	if (ret == -ENODEV) return 77;
         STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	//starpu_fxt_stop_profiling();
+
+	init_sizes();
+
+	parse_args(argc, argv);
+
+	if(with_ctxs || with_noctxs || chole1 || chole2)
+		parse_args_ctx(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);

+ 7 - 5
examples/cholesky/cholesky_tag.c

@@ -227,7 +227,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, (flop/timing/1000.0f));
 }
 
-static int initialize_system(float **A, unsigned dim, unsigned pinned)
+static int initialize_system(int argc, char **argv, float **A, unsigned pinned)
 {
 	int ret;
 	int flags = STARPU_MALLOC_SIMULATION_FOLDED;
@@ -241,6 +241,10 @@ static int initialize_system(float **A, unsigned dim, unsigned pinned)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);
@@ -255,7 +259,7 @@ static int initialize_system(float **A, unsigned dim, unsigned pinned)
 
 	if (pinned)
 		flags |= STARPU_MALLOC_PINNED;
-	starpu_malloc_flags((void **)A, dim*dim*sizeof(float), flags);
+	starpu_malloc_flags((void **)A, size*size*sizeof(float), flags);
 
 	return 0;
 }
@@ -308,10 +312,8 @@ int main(int argc, char **argv)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 
-	parse_args(argc, argv);
-
 	float *mat = NULL;
-	int ret = initialize_system(&mat, size, pinned);
+	int ret = initialize_system(argc, argv, &mat, pinned);
 	if (ret) return ret;
 
 #ifndef STARPU_SIMGRID

+ 8 - 6
examples/cholesky/cholesky_tile_tag.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -220,11 +220,6 @@ int main(int argc, char **argv)
 	unsigned x, y;
 	int ret;
 
-	parse_args(argc, argv);
-	assert(nblocks <= NMAXBLOCKS);
-
-	FPRINTF(stderr, "BLOCK SIZE = %d\n", size / nblocks);
-
 #ifdef STARPU_HAVE_MAGMA
 	magma_init();
 #endif
@@ -234,6 +229,13 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+	assert(nblocks <= NMAXBLOCKS);
+
+	FPRINTF(stderr, "BLOCK SIZE = %u\n", size / nblocks);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);

+ 0 - 1
examples/filters/custom_mf/conversion_opencl.c

@@ -28,7 +28,6 @@ void cpu_to_opencl_opencl_func(void *buffers[], void *args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	unsigned n = CUSTOM_GET_NX(buffers[0]);
 	n*=2;

+ 1 - 1
examples/filters/custom_mf/custom_interface.c

@@ -245,7 +245,7 @@ static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
 {
 	struct custom_data_interface *ci = (struct custom_data_interface *)
 		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
-	fprintf(f, "Custom interface of size %d", ci->nx);
+	fprintf(f, "Custom interface of size %u", ci->nx);
 }
 
 static uint32_t

+ 0 - 1
examples/filters/custom_mf/custom_opencl.c

@@ -28,7 +28,6 @@ void custom_scal_opencl_func(void *buffers[], void *args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	unsigned n = CUSTOM_GET_NX(buffers[0]);
 	struct point *aop;

+ 0 - 1
examples/filters/fblock_opencl.c

@@ -35,7 +35,6 @@ void opencl_func(void *buffers[], void *cl_arg)
 	int id, devid, err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
         int *factor = cl_arg;
 	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]);

+ 3 - 2
examples/filters/fmatrix.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -48,7 +48,8 @@ void cpu_func(void *buffers[], void *cl_arg)
 
 int main(int argc, char **argv)
 {
-	unsigned j, n=1;
+	unsigned j;
+	int n=1;
         int matrix[NX*NY];
 	int ret, i;
 	int factor = 12;

+ 54 - 49
examples/heat/dw_factolu.c

@@ -90,7 +90,7 @@ static struct starpu_codelet cl21 =
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_RW},
 	.model = &model_21
-}; 
+};
 
 static struct starpu_codelet cl22 =
 {
@@ -103,7 +103,7 @@ static struct starpu_codelet cl22 =
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_RW},
 	.model = &model_22
-}; 
+};
 
 
 
@@ -117,7 +117,7 @@ static struct starpu_codelet cl22 =
 void dw_callback_v2_codelet_update_u22(void *argcb)
 {
 	int ret;
-	cl_args *args = argcb;	
+	cl_args *args = argcb;
 
 	unsigned k = args->k;
 	unsigned i = args->i;
@@ -128,7 +128,7 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
 	/* we did task 22k,i,j */
 	advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE;
-	
+
 	if ( (i == j) && (i == k+1))
 	{
 		/* we now reduce the LU22 part (recursion appears there) */
@@ -142,7 +142,7 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 		task->cl_arg_size = sizeof(*u11arg);
 
 		task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, k+1, k+1);
-	
+
 		u11arg->dataA = args->dataA;
 		u11arg->i = k + 1;
 		u11arg->nblocks = args->nblocks;
@@ -236,7 +236,7 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 void dw_callback_v2_codelet_update_u12(void *argcb)
 {
 	int ret;
-	cl_args *args = argcb;	
+	cl_args *args = argcb;
 
 	/* now launch the update of LU22 */
 	unsigned i = args->i;
@@ -297,7 +297,7 @@ void dw_callback_v2_codelet_update_u12(void *argcb)
 void dw_callback_v2_codelet_update_u21(void *argcb)
 {
 	int ret;
-	cl_args *args = argcb;	
+	cl_args *args = argcb;
 
 	/* now launch the update of LU22 */
 	unsigned i = args->i;
@@ -357,7 +357,6 @@ void dw_callback_v2_codelet_update_u21(void *argcb)
 
 void dw_callback_v2_codelet_update_u11(void *argcb)
 {
-	int ret;
 	/* in case there remains work, go on */
 	cl_args *args = argcb;
 
@@ -369,13 +368,13 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 	/* we did task 11k */
 	advance_11[i] = DONE;
 
-	if (i == nblocks - 1) 
+	if (i == nblocks - 1)
 	{
 		/* we are done */
 		free(argcb);
 		return;
 	}
-	else 
+	else
 	{
 		/* put new tasks */
 		unsigned slice;
@@ -390,23 +389,25 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 			}
 			else
 			{
-				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];		
+				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];
 			}
 			if (deps12 & DONE)
 			{
 				/* we may perhaps launch the task 12i,slice */
-				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
-				 if ((u & STARTED) == 0)
-				 {
+				uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
+				if ((u & STARTED) == 0)
+				{
+					int ret;
+
 					/* we are the only one that should launch that task */
 					cl_args *u12a = malloc(sizeof(cl_args));
 
 					struct starpu_task *task12 = starpu_task_create();
-						task12->callback_func = dw_callback_v2_codelet_update_u12;
-						task12->callback_arg = u12a;
-						task12->cl = &cl12;
-						task12->cl_arg = u12a;
-						task12->cl_arg_size = sizeof(*u12a);
+					task12->callback_func = dw_callback_v2_codelet_update_u12;
+					task12->callback_arg = u12a;
+					task12->cl = &cl12;
+					task12->cl_arg = u12a;
+					task12->cl_arg_size = sizeof(*u12a);
 
 					u12a->i = i;
 					u12a->k = slice;
@@ -432,14 +433,16 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 			}
 			else
 			{
-				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];		
+				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];
 			}
 			if (deps12 & DONE)
 			{
 				/* we may perhaps launch the task 12i,slice */
-				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
-				 if ((u & STARTED) == 0)
-				 {
+				uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
+				if ((u & STARTED) == 0)
+				{
+					int ret;
+
 					/* we are the only one that should launch that task */
 					cl_args *u21a = malloc(sizeof(cl_args));
 
@@ -474,34 +477,34 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
 
 /*
- *	Callbacks 
+ *	Callbacks
  */
 
 
 void dw_callback_codelet_update_u11(void *argcb)
 {
-	int ret;
 	/* in case there remains work, go on */
 	cl_args *args = argcb;
 
-	if (args->i == args->nblocks - 1) 
+	if (args->i == args->nblocks - 1)
 	{
 		/* we are done */
 		free(argcb);
 		return;
 	}
-	else 
+	else
 	{
 		/* put new tasks */
 		unsigned nslices;
 		nslices = args->nblocks - 1 - args->i;
 
 		unsigned *remaining = malloc(sizeof(unsigned));
-		*remaining = 2*nslices; 
+		*remaining = 2*nslices;
 
 		unsigned slice;
 		for (slice = args->i + 1; slice < args->nblocks; slice++)
 		{
+			int ret;
 
 			/* update slice from u12 */
 			cl_args *u12a = malloc(sizeof(cl_args));
@@ -554,29 +557,30 @@ void dw_callback_codelet_update_u11(void *argcb)
 
 void dw_callback_codelet_update_u22(void *argcb)
 {
-	int ret;
-	cl_args *args = argcb;	
+	cl_args *args = argcb;
 	unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, (-1));
 	ANNOTATE_HAPPENS_BEFORE(args->remaining);
 
 	if (remaining == 0)
 	{
+		int ret;
+
 		ANNOTATE_HAPPENS_AFTER(args->remaining);
 		/* all worker already used the counter */
 		free(args->remaining);
 
 		/* we now reduce the LU22 part (recursion appears there) */
 		cl_args *u11arg = malloc(sizeof(cl_args));
-	
+
 		struct starpu_task *task = starpu_task_create();
-			task->callback_func = dw_callback_codelet_update_u11;
-			task->callback_arg = u11arg;
-			task->cl = &cl11;
-			task->cl_arg = u11arg;
-			task->cl_arg_size = sizeof(*u11arg);
-
-			task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1);
-	
+		task->callback_func = dw_callback_codelet_update_u11;
+		task->callback_arg = u11arg;
+		task->cl = &cl11;
+		task->cl_arg = u11arg;
+		task->cl_arg_size = sizeof(*u11arg);
+
+		task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1);
+
 		u11arg->dataA = args->dataA;
 		u11arg->i = args->k + 1;
 		u11arg->nblocks = args->nblocks;
@@ -591,8 +595,7 @@ void dw_callback_codelet_update_u22(void *argcb)
 
 void dw_callback_codelet_update_u12_21(void *argcb)
 {
-	int ret;
-	cl_args *args = argcb;	
+	cl_args *args = argcb;
 	unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, -1);
 	ANNOTATE_HAPPENS_BEFORE(args->remaining);
 
@@ -612,6 +615,8 @@ void dw_callback_codelet_update_u12_21(void *argcb)
 		{
 			for (slicex = i+1; slicex < nblocks; slicex++)
 			{
+				int ret;
+
 				/* update that square matrix */
 				cl_args *u22a = malloc(sizeof(cl_args));
 
@@ -646,7 +651,7 @@ void dw_callback_codelet_update_u12_21(void *argcb)
 
 
 /*
- *	code to bootstrap the factorization 
+ *	code to bootstrap the factorization
  */
 
 void dw_codelet_facto(starpu_data_handle_t dataA, unsigned nblocks)
@@ -660,7 +665,7 @@ void dw_codelet_facto(starpu_data_handle_t dataA, unsigned nblocks)
 
 	start = starpu_timing_now();
 
-	/* inject a new task with this codelet into the system */ 
+	/* inject a new task with this codelet into the system */
 	struct starpu_task *task = starpu_task_create();
 	task->callback_func = dw_callback_codelet_update_u11;
 	task->callback_arg = args;
@@ -706,7 +711,7 @@ void dw_codelet_facto_v2(starpu_data_handle_t dataA, unsigned nblocks)
 
 	start = starpu_timing_now();
 
-	/* inject a new task with this codelet into the system */ 
+	/* inject a new task with this codelet into the system */
 	struct starpu_task *task = starpu_task_create();
 	task->callback_func = dw_callback_v2_codelet_update_u11;
 	task->callback_arg = args;
@@ -714,7 +719,7 @@ void dw_codelet_facto_v2(starpu_data_handle_t dataA, unsigned nblocks)
 	task->cl_arg = args;
 	task->cl_arg_size = sizeof(*args);
 
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0); 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0);
 
 	/* schedule the codelet */
 	int ret = starpu_task_submit(task);
@@ -777,7 +782,7 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 	{
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 		starpu_malloc((void **)B, (size_t)dim*sizeof(float));
-	} 
+	}
 	else
 	{
 		*A = malloc((size_t)dim*dim*sizeof(float));
@@ -801,8 +806,8 @@ void free_system(float *A, float *B, unsigned dim, unsigned pinned)
 	}
 }
 
-void dw_factoLU(float *matA, unsigned size, 
-		unsigned ld, unsigned nblocks, 
+void dw_factoLU(float *matA, unsigned size,
+		unsigned ld, unsigned nblocks,
 		unsigned version, unsigned _no_prio)
 {
 
@@ -820,7 +825,7 @@ void dw_factoLU(float *matA, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, 
+	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld,
 			size, size, sizeof(float));
 
 	struct starpu_data_filter f =

+ 2 - 2
examples/heat/dw_factolu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2012, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +33,7 @@
 #include "lu_kernels_model.h"
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {(fmt, ## __VA_ARGS__); }} while(0)
+#define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0)
 
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))

+ 1 - 4
examples/heat/heat.c

@@ -634,12 +634,11 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 
 		for (neighbour = 0; neighbour < nneighbours; neighbour++)
 		{
-			float val;
 			unsigned nodeneighbour =  neighbours[neighbour];
 
 			if (nodeneighbour < newsize)
 			{
-
+				float val;
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 
 				if (val != 0.0f)
@@ -658,8 +657,6 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 
 	rowptr[newsize] = pos;
 
-
-
 	return pos;
 }
 

+ 0 - 1
examples/interface/complex_kernels_opencl.c

@@ -28,7 +28,6 @@ void copy_complex_codelet_opencl(void *buffers[], void *_args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	/* length of the vector */
 	unsigned n = STARPU_COMPLEX_GET_NX(buffers[0]);

+ 22 - 16
examples/lu/lu_example.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,13 +27,8 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#ifdef STARPU_QUICK_CHECK
-static unsigned long size = 320*4;
-static unsigned nblocks = 4;
-#else
-static unsigned long size = 960*16;
-static unsigned nblocks = 16;
-#endif
+static unsigned long size = 0;
+static unsigned nblocks = 0;
 static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
@@ -313,18 +308,30 @@ int main(int argc, char **argv)
 {
 	int ret;
 
-#ifdef STARPU_QUICK_CHECK
-	size /= 4;
-	nblocks /= 4;
-#endif
-
-	parse_args(argc, argv);
-
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count();
+	int power_sqrt = sqrt(power)/2;
+	if (power_sqrt < 1)
+		power_sqrt = 1;
+
+#ifdef STARPU_QUICK_CHECK
+	if (!size)
+		size = 320*2*power_sqrt;
+	if (!nblocks)
+		nblocks = 2*power_sqrt;
+#else
+	if (!size)
+		size = 960*8*power_sqrt;
+	if (!nblocks)
+		nblocks = 8*power_sqrt;
+#endif
+
+	parse_args(argc, argv);
+
 	starpu_cublas_init();
 
 	init_matrix();
@@ -388,7 +395,6 @@ int main(int argc, char **argv)
 
 	if (bound)
 	{
-		double min;
 		if (bounddeps)
 		{
 			FILE *f = fopen("lu.pl", "w");

+ 3 - 2
examples/lu/xlu_implicit.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2011, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -114,7 +114,6 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 {
 	double start;
 	double end;
-	int ret;
 
 	/* create all the DAG nodes */
 	unsigned i,j,k;
@@ -126,6 +125,8 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 	for (k = 0; k < nblocks; k++)
 	{
+		int ret;
+
 		ret = create_task_11(dataA, k);
 		if (ret == -ENODEV) return ret;
 

+ 5 - 4
examples/lu/xlu_implicit_pivot.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -160,7 +160,6 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 {
 	double start;
 	double end;
-	int ret;
 
 	/* create all the DAG nodes */
 	unsigned i,j,k;
@@ -172,8 +171,10 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 	for (k = 0; k < nblocks; k++)
 	{
-	     ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
-	     if (ret == -ENODEV) return ret;
+		int ret;
+
+		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+		if (ret == -ENODEV) return ret;
 
 		for (i = 0; i < nblocks; i++)
 		{

+ 10 - 10
examples/mandelbrot/mandelbrot.c

@@ -135,13 +135,13 @@ static void init_x11(int width, int height, unsigned *buffer)
 static int handle_events(void)
 {
 	XEvent event;
-	XNextEvent(dpy, &event);
-
-	KeySym key;
-	char text[255];
 
+	XNextEvent(dpy, &event);
 	if (event.type == KeyPress)
 	{
+		KeySym key;
+		char text[255];
+
 		XLookupString(&event.xkey,text,255,&key,0);
 		if (key == Left)
 		{
@@ -256,7 +256,6 @@ static void compute_block_opencl(void *descr[], void *cl_arg)
 
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 	cl_int err;
 
 	int id = starpu_worker_get_id_check();
@@ -290,11 +289,10 @@ static void compute_block_opencl(void *descr[], void *cl_arg)
 
 static void compute_block(void *descr[], void *cl_arg)
 {
-	int ix, iy;
-
 	int iby, block_size;
 	double stepX, stepY;
 	int *pcnt; /* unused for sequential tasks */
+
 	starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
 
 	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
@@ -302,6 +300,8 @@ static void compute_block(void *descr[], void *cl_arg)
 	int local_iy;
 	for (local_iy = 0; local_iy < block_size; local_iy++)
 	{
+		int ix, iy;
+
 		iy = iby*block_size + local_iy;
 		for (ix = 0; ix < width; ix++)
 		{
@@ -343,11 +343,11 @@ static void compute_block_spmd(void *descr[], void *cl_arg)
 
 	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
 
-	int ix, iy; /* global coordinates */
-	int local_iy; /* current line */
-
 	while (1)
 	{
+		int ix, iy; /* global coordinates */
+		int local_iy; /* current line */
+
 		local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1;
 		ANNOTATE_HAPPENS_BEFORE(pcnt);
 		if (local_iy >= block_size)

+ 7 - 1
examples/matvecmult/matvecmult.c

@@ -32,7 +32,6 @@ void opencl_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	cl_mem mult = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]);
 	int nx = STARPU_MATRIX_GET_NX(descr[0]);
 	int ny = STARPU_MATRIX_GET_NY(descr[0]);
-	cl_event event;
 
         id = starpu_worker_get_id_check();
         devid = starpu_worker_get_devid(id);
@@ -67,6 +66,7 @@ void fillArray(float* pfData, int iSize)
     }
 }
 
+#if 0
 void printArray(float* pfData, int iSize)
 {
     int i;
@@ -76,6 +76,7 @@ void printArray(float* pfData, int iSize)
     }
     FPRINTF(stderr, "\n");
 }
+#endif
 
 void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult)
 {
@@ -224,6 +225,11 @@ int main(int argc, char **argv)
         printArray(vector, width);
         printArray(mult, height);
 #endif
+
+	free(matrix);
+	free(vector);
+	free(mult);
+	free(correctResult);
         starpu_shutdown();
 
 	return (submit == -ENODEV) ? 77 : 0;

+ 3 - 1
examples/mult/xgemm.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -85,7 +85,9 @@ static void check_output(void)
 
 static void init_problem_data(void)
 {
+#ifndef STARPU_SIMGRID
 	unsigned i,j;
+#endif
 
 	starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
 	starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);

+ 2 - 2
examples/pi/pi.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 	unsigned long total_shot_cnt = ntasks * nshot_per_task;
 
 	/* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, probability to impact the disk: pi/4 */
-	FPRINTF(stderr, "Pi approximation : %f (%ld / %ld)\n", ((TYPE)total_cnt*4)/(total_shot_cnt), total_cnt, total_shot_cnt);
+	FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", ((TYPE)total_cnt*4)/(total_shot_cnt), total_cnt, total_shot_cnt);
 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
 	FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing));
 

+ 1 - 1
examples/pi/pi_redux.c

@@ -400,7 +400,7 @@ int main(int argc, char **argv)
 	double pi_approx = ((double)shot_cnt*4.0)/total;
 
 	FPRINTF(stderr, "Reductions? %s\n", use_redux?"yes":"no");
-	FPRINTF(stderr, "Pi approximation : %f (%ld / %ld)\n", pi_approx, shot_cnt, total);
+	FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", pi_approx, shot_cnt, total);
 	FPRINTF(stderr, "Error %e \n", pi_approx - PI);
 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
 	FPRINTF(stderr, "Speed : %f GShot/s\n", total/(1e3*timing));

+ 9 - 6
examples/ppm_downscaler/ppm_downscaler.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -54,7 +54,7 @@ struct ppm_image *file_to_ppm(char *filename)
 
 	struct ppm_image *ppm = (struct ppm_image *) malloc(sizeof(struct ppm_image));
 	assert(ppm);
-	
+
 	FILE *file = fopen(filename, "r");
 	assert(file);
 
@@ -66,7 +66,7 @@ struct ppm_image *file_to_ppm(char *filename)
 		fprintf(stderr, "file %s is not valid\n", filename);
 		exit(-1);
 	}
-	
+
 	/* allocate a buffer for the image */
 #ifdef STARPU_HAVE_MEMALIGN
 	ppm->data = (struct ppm_color *) memalign(16384, ppm->ncols*ppm->nlines*sizeof(struct ppm_color));
@@ -98,7 +98,7 @@ void ppm_to_file(struct ppm_image *ppm, char *filename)
 	fprintf(file, "P6\n%d %d\n%d\n", ppm->ncols, ppm->nlines, ppm->coldepth);
 
 	fwrite(&ppm->data[0], sizeof(struct ppm_color), ppm->ncols*ppm->nlines, file);
-	
+
 	fclose(file);
 }
 
@@ -133,7 +133,7 @@ void dummy_downscale(struct ppm_image *input_ppm, struct ppm_image *output_ppm)
 
 			unsigned big_col = col*FACTOR;
 			unsigned big_line = line*FACTOR;
-			
+
 			/* compute the average value of all components */
 			unsigned i, j;
 			for (i = 0; i < FACTOR; i++)
@@ -155,7 +155,7 @@ void dummy_downscale(struct ppm_image *input_ppm, struct ppm_image *output_ppm)
 			out[col + line*output_ppm->ncols].b = (unsigned char)(sum_b/(FACTOR*FACTOR));
 
 /*			fprintf(stderr, "col %d line %d -> sum_r = %d out -> %d\n", col, line, sum_r, out[col + line*FACTOR].r); */
-	
+
 		}
 	}
 }
@@ -180,5 +180,8 @@ int main(int argc, char **argv)
 
 	ppm_to_file(output_ppm, filename_out);
 
+	free(input_ppm);
+	free(output_ppm);
+
 	return 0;
 }

+ 0 - 2
examples/reductions/dot_product.c

@@ -162,7 +162,6 @@ void redux_opencl_func(void *buffers[], void *args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]);
 	cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]);
@@ -274,7 +273,6 @@ void dot_opencl_func(void *buffers[], void *args)
         cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
 	cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]);

+ 1 - 1
examples/sched_ctx/dummy_sched_with_ctx.c

@@ -82,7 +82,6 @@ static int push_task_dummy(struct starpu_task *task)
 
         /*if there are no tasks block */
         /* wake people waiting for a task */
-        unsigned worker = 0;
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
         struct starpu_sched_ctx_iterator it;
@@ -90,6 +89,7 @@ static int push_task_dummy(struct starpu_task *task)
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
         {
+		unsigned worker;
                 worker = workers->get_next(workers, &it);
 		starpu_pthread_mutex_t *sched_mutex;
                 starpu_pthread_cond_t *sched_cond;

+ 3 - 3
examples/sched_ctx/nested_sched_ctxs.c

@@ -53,7 +53,7 @@ static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 	int w = starpu_worker_get_id();
 	unsigned sched_ctx = (uintptr_t)arg;
 	int n = parallel_code(sched_ctx);
-//	printf("w %d executed %d it \n", w, n);
+	//printf("w %d executed %d it \n", w, n);
 }
 
 
@@ -230,8 +230,8 @@ int main(int argc, char **argv)
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS);
 
 #ifdef STARPU_USE_CPU
 	free(procs1);

+ 8 - 3
examples/sched_ctx/parallel_code.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2014, 2016  Université de Bordeaux
- * Copyright (C) 2010-2015  CNRS
+ * Copyright (C) 2010-2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -112,6 +112,8 @@ int main(int argc, char **argv)
 	if (nprocs1 < 4)
 	{
 		/* Not enough procs */
+		free(procs1);
+		free(procs2);
 		starpu_shutdown();
 		return 77;
 	}
@@ -214,9 +216,12 @@ enodev:
 
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS);
 	starpu_shutdown();
 
+	free(procs1);
+	free(procs2);
+
 	return (ret == -ENODEV ? 77 : 0);
 }

+ 1 - 2
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -39,7 +39,6 @@ struct context
 void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 {
 	int sched_ctx = *(int *)sched_ctx_;
-	int i;
 	int *cpuids = NULL;
 	int ncpuids = 0;
 	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
@@ -79,7 +78,7 @@ void parallel_task_init_one_context(unsigned * context_id)
 	t->prologue_callback_pop_arg=context_id;
 	t->prologue_callback_pop_arg_free=0;
 
-	int ret=starpu_task_submit(t);
+	starpu_task_submit(t);
 }
 
 struct context main_context;

+ 3 - 3
examples/sched_ctx/sched_ctx_without_sched_policy.c

@@ -129,7 +129,7 @@ int main(int argc, char **argv)
 
 		task->cl = &sched_ctx_codelet;
 		task->cl_arg = (void*)(uintptr_t) sched_ctx1;
-		
+
 		/*submit tasks to context*/
 		ret = starpu_task_submit_to_ctx(task,sched_ctx1);
 
@@ -161,8 +161,8 @@ int main(int argc, char **argv)
 
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS);
 
 enodev:
 #ifdef STARPU_USE_CPU

+ 2 - 2
examples/sched_ctx/sched_ctx_without_sched_policy_awake.c

@@ -153,8 +153,8 @@ int main(int argc, char **argv)
 		tasks_per_ctx[1] += tasks_executed[1][i];
 	}
 
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_per_ctx[0]/nprocs1, NTASKS);
-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_per_ctx[1]/nprocs2, NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_per_ctx[0]/nprocs1, NTASKS);
+	printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_per_ctx[1]/nprocs2, NTASKS);
 
 enodev:
 #ifdef STARPU_USE_CPU

+ 3 - 0
examples/sched_ctx/two_cpu_contexts.c

@@ -112,5 +112,8 @@ int main(int argc, char **argv)
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_shutdown();
+	free(procs);
+	free(procs1);
+	free(procs2);
 	return 0;
 }

+ 8 - 8
examples/sched_ctx_utils/sched_ctx_utils.c

@@ -120,12 +120,12 @@ void start_2benchs(void (*bench)(unsigned, unsigned))
 {
 	p1.bench = bench;
 	p1.size = size1;
-	printf("size %d\n", size1);
+	printf("size %u\n", size1);
 	p1.nblocks = nblocks1;
 
 	p2.bench = bench;
 	p2.size = size2;
-	printf("size %d\n", size2);
+	printf("size %u\n", size2);
 	p2.nblocks = nblocks2;
 
 	starpu_pthread_t tid[2];
@@ -216,20 +216,20 @@ void construct_contexts(void (*bench)(unsigned, unsigned))
 	for(i = 0; i < gpu; i++)
 	{
 		procs[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 
 	for(i = gpu; i < gpu + gpu1; i++)
 	{
 		procs[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 
 
 	for(i = n_all_gpus; i < n_all_gpus + cpu1; i++)
 	{
 		procs[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 	printf("\n ");
 
@@ -244,19 +244,19 @@ void construct_contexts(void (*bench)(unsigned, unsigned))
 	for(i = 0; i < gpu; i++)
 	{
 		procs2[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 
 	for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++)
 	{
 		procs2[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 
 	for(i = n_all_gpus  + cpu1; i < n_all_gpus + cpu1 + cpu2; i++)
 	{
 		procs2[k++] = i;
-		printf("%d ", i);
+		printf("%u ", i);
 	}
 	printf("\n");
 

+ 1 - 1
examples/scheduler/dummy_sched.c

@@ -88,7 +88,6 @@ static int push_task_dummy(struct starpu_task *task)
 
         /*if there are no tasks block */
         /* wake people waiting for a task */
-        unsigned worker = 0;
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
         struct starpu_sched_ctx_iterator it;
@@ -96,6 +95,7 @@ static int push_task_dummy(struct starpu_task *task)
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
         {
+		unsigned worker;
                 worker = workers->get_next(workers, &it);
 		starpu_pthread_mutex_t *sched_mutex;
                 starpu_pthread_cond_t *sched_cond;

+ 8 - 10
examples/spmd/vector_scal_spmd.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  * Copyright (C) 2010-2013, 2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -109,16 +109,7 @@ int main(int argc, char **argv)
 	unsigned i;
 	int ret;
 
-	vector = malloc(NX*sizeof(*vector));
-
-	for (i = 0; i < NX; i++)
-		vector[i] = (i+1.0f);
-
-	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
-	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
-
 	starpu_conf_init(&conf);
-
 	conf.single_combined_worker = 1;
 	conf.sched_policy_name = "pheft";
 
@@ -126,6 +117,13 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	vector = malloc(NX*sizeof(*vector));
+	for (i = 0; i < NX; i++)
+		vector[i] = (i+1.0f);
+
+	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
+	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
+
 	starpu_data_handle_t vector_handle;
 	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 

+ 3 - 3
examples/spmv/dw_block_spmv.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,7 +44,7 @@ static sem_t sem;
 static unsigned c = 256;
 static unsigned r = 256;
 
-static unsigned remainingtasks = -1;
+static int remainingtasks = -1;
 
 static starpu_data_handle_t sparse_matrix;
 static starpu_data_handle_t vector_in, vector_out;
@@ -271,7 +271,7 @@ void launch_spmv_codelets(void)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
-	printf("end of task submission (there was %d chains for %d tasks : ratio %d tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains);
+	printf("end of task submission (there was %u chains for %u tasks : ratio %u tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains);
 }
 
 void init_problem(void)

+ 6 - 6
examples/spmv/matrix_market/mm_to_bcsr.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2014, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +19,7 @@
 
 static void print_block(tmp_block_t *block, unsigned r, unsigned c)
 {
-	printf(" **** block %d %d **** \n", block->i, block->j);
+	printf(" **** block %u %u **** \n", block->i, block->j);
 
 	unsigned i, j;
 	for (j = 0; j < r; j++)
@@ -47,9 +47,9 @@ static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c)
 static void print_bcsr(bcsr_t *bcsr)
 {
 	fprintf(stderr, "** BSCR **\n");
-	fprintf(stderr, "non zero - blocks = %d\n", bcsr->nnz_blocks);
-	fprintf(stderr, "nrows - blocks = %d\n", bcsr->nrows_blocks);
-	fprintf(stderr, "block size : c %d r %d\n", bcsr->c, bcsr->r);
+	fprintf(stderr, "non zero - blocks = %u\n", bcsr->nnz_blocks);
+	fprintf(stderr, "nrows - blocks = %u\n", bcsr->nrows_blocks);
+	fprintf(stderr, "block size : c %u r %u\n", bcsr->c, bcsr->r);
 }
 
 static unsigned count_blocks(tmp_block_t *block_list)
@@ -358,7 +358,7 @@ bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r)
 
 	for (i=0; i<nz; i++)
 	{
-		fscanf(f, "%d %d %f\n", &I[i], &J[i], &val[i]);
+		fscanf(f, "%u %u %f\n", &I[i], &J[i], &val[i]);
 		I[i]--;  /* adjust from 1-based to 0-based */
 		J[i]--;
 	}

+ 385 - 427
examples/spmv/matrix_market/mmio.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2013, 2014, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,317 +29,280 @@
 
 #include "mmio.h"
 
-int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
-                double **val_, int **I_, int **J_)
+int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, double **val_, int **I_, int **J_)
 {
-    FILE *f;
-    MM_typecode matcode;
-    int M, N, nz;
-    int i;
-    double *val;
-    int *I, *J;
-
-    if ((f = fopen(fname, "r")) == NULL)
-    {
-	    fprintf(stderr, "File <%s> not found\n", fname);
-            return -1;
-    }
-
-    if (mm_read_banner(f, &matcode) != 0)
-    {
-	    fprintf(stderr, "mm_read_unsymetric: Could not process Matrix Market banner ");
-	    fprintf(stderr, " in file [%s]\n", fname);
-	    return -1;
-    }
-
-
-
-    if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
-            mm_is_sparse(matcode)))
-    {
-        fprintf(stderr, "Sorry, this application does not support ");
-        fprintf(stderr, "Market Market type: [%s]\n",
-                mm_typecode_to_str(matcode));
-        return -1;
-    }
-
-    /* find out size of sparse matrix: M, N, nz .... */
-
-    if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
-    {
-        fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
-        return -1;
-    }
-
-    *M_ = M;
-    *N_ = N;
-    *nz_ = nz;
-
-    /* reseve memory for matrices */
-
-    I = (int *) malloc(nz * sizeof(int));
-    J = (int *) malloc(nz * sizeof(int));
-    val = (double *) malloc(nz * sizeof(double));
-
-    *val_ = val;
-    *I_ = I;
-    *J_ = J;
-
-    /* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
-    /*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
-    /*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
-
-    for (i=0; i<nz; i++)
-    {
-        fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
-        I[i]--;  /* adjust from 1-based to 0-based */
-        J[i]--;
-    }
-    fclose(f);
-
-    return 0;
+	FILE *f;
+	MM_typecode matcode;
+	int M, N, nz;
+	int i;
+	double *val;
+	int *I, *J;
+
+	if ((f = fopen(fname, "r")) == NULL)
+	{
+		fprintf(stderr, "File <%s> not found\n", fname);
+		return -1;
+	}
+
+	if (mm_read_banner(f, &matcode) != 0)
+	{
+		fprintf(stderr, "mm_read_unsymetric: Could not process Matrix Market banner ");
+		fprintf(stderr, " in file [%s]\n", fname);
+		return -1;
+	}
+
+	if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode)))
+	{
+		fprintf(stderr, "Sorry, this application does not support ");
+		fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode));
+		return -1;
+	}
+
+	/* find out size of sparse matrix: M, N, nz .... */
+	if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
+	{
+		fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
+		return -1;
+	}
+
+	*M_ = M;
+	*N_ = N;
+	*nz_ = nz;
+
+	/* reseve memory for matrices */
+	I = (int *) malloc(nz * sizeof(int));
+	J = (int *) malloc(nz * sizeof(int));
+	val = (double *) malloc(nz * sizeof(double));
+
+	*val_ = val;
+	*I_ = I;
+	*J_ = J;
+
+	/* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
+	/*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
+	/*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
+	for (i=0; i<nz; i++)
+	{
+		fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
+		I[i]--;  /* adjust from 1-based to 0-based */
+		J[i]--;
+	}
+	fclose(f);
+
+	return 0;
 }
 
 int mm_is_valid(MM_typecode matcode)
 {
-    if (!mm_is_matrix(matcode)) return 0;
-    if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
-    if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
-    if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
-                mm_is_skew(matcode))) return 0;
-    return 1;
+	if (!mm_is_matrix(matcode)) return 0;
+	if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
+	if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
+	if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) || mm_is_skew(matcode))) return 0;
+	return 1;
 }
 
 int mm_read_banner(FILE *f, MM_typecode *matcode)
 {
-    char line[MM_MAX_LINE_LENGTH];
-    char banner[MM_MAX_TOKEN_LENGTH];
-    char mtx[MM_MAX_TOKEN_LENGTH];
-    char crd[MM_MAX_TOKEN_LENGTH];
-    char data_type[MM_MAX_TOKEN_LENGTH];
-    char storage_scheme[MM_MAX_TOKEN_LENGTH];
-    char *p;
-
-
-    mm_clear_typecode(matcode);
-
-    if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
-        return MM_PREMATURE_EOF;
-
-    if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
-        storage_scheme) != 5)
-        return MM_PREMATURE_EOF;
-
-    for (p=mtx; *p!='\0'; *p=tolower(*p),p++);  /* convert to lower case */
-    for (p=crd; *p!='\0'; *p=tolower(*p),p++);
-    for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
-    for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
-
-    /* check for banner */
-    if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
-        return MM_NO_HEADER;
-
-    /* first field should be "mtx" */
-    if (strcmp(mtx, MM_MTX_STR) != 0)
-        return  MM_UNSUPPORTED_TYPE;
-    mm_set_matrix(matcode);
-
-
-    /* second field describes whether this is a sparse matrix (in coordinate
-            storgae) or a dense array */
-
-
-    if (strcmp(crd, MM_SPARSE_STR) == 0)
-        mm_set_sparse(matcode);
-    else
-    if (strcmp(crd, MM_DENSE_STR) == 0)
-            mm_set_dense(matcode);
-    else
-        return MM_UNSUPPORTED_TYPE;
-
-
-    /* third field */
-
-    if (strcmp(data_type, MM_REAL_STR) == 0)
-        mm_set_real(matcode);
-    else
-    if (strcmp(data_type, MM_COMPLEX_STR) == 0)
-        mm_set_complex(matcode);
-    else
-    if (strcmp(data_type, MM_PATTERN_STR) == 0)
-        mm_set_pattern(matcode);
-    else
-    if (strcmp(data_type, MM_INT_STR) == 0)
-        mm_set_integer(matcode);
-    else
-        return MM_UNSUPPORTED_TYPE;
-
-
-    /* fourth field */
-
-    if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
-        mm_set_general(matcode);
-    else
-    if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
-        mm_set_symmetric(matcode);
-    else
-    if (strcmp(storage_scheme, MM_HERM_STR) == 0)
-        mm_set_hermitian(matcode);
-    else
-    if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
-        mm_set_skew(matcode);
-    else
-        return MM_UNSUPPORTED_TYPE;
-
-
-    return 0;
+	char line[MM_MAX_LINE_LENGTH];
+	char banner[MM_MAX_TOKEN_LENGTH];
+	char mtx[MM_MAX_TOKEN_LENGTH];
+	char crd[MM_MAX_TOKEN_LENGTH];
+	char data_type[MM_MAX_TOKEN_LENGTH];
+	char storage_scheme[MM_MAX_TOKEN_LENGTH];
+	char *p;
+
+	mm_clear_typecode(matcode);
+
+	if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
+		return MM_PREMATURE_EOF;
+
+	if (sscanf(line, "%MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs", banner, mtx, crd, data_type, storage_scheme) != 5)
+		return MM_PREMATURE_EOF;
+
+	for (p=mtx; *p!='\0'; *p=tolower(*p),p++);  /* convert to lower case */
+	for (p=crd; *p!='\0'; *p=tolower(*p),p++);
+	for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
+	for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
+
+	/* check for banner */
+	if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
+		return MM_NO_HEADER;
+
+	/* first field should be "mtx" */
+	if (strcmp(mtx, MM_MTX_STR) != 0)
+		return  MM_UNSUPPORTED_TYPE;
+	mm_set_matrix(matcode);
+
+	/* second field describes whether this is a sparse matrix (in coordinate storage) or a dense array */
+	if (strcmp(crd, MM_SPARSE_STR) == 0)
+		mm_set_sparse(matcode);
+	else if (strcmp(crd, MM_DENSE_STR) == 0)
+		mm_set_dense(matcode);
+	else
+		return MM_UNSUPPORTED_TYPE;
+
+	/* third field */
+	if (strcmp(data_type, MM_REAL_STR) == 0)
+		mm_set_real(matcode);
+	else if (strcmp(data_type, MM_COMPLEX_STR) == 0)
+		mm_set_complex(matcode);
+	else if (strcmp(data_type, MM_PATTERN_STR) == 0)
+		mm_set_pattern(matcode);
+	else if (strcmp(data_type, MM_INT_STR) == 0)
+		mm_set_integer(matcode);
+	else
+		return MM_UNSUPPORTED_TYPE;
+
+	/* fourth field */
+	if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
+		mm_set_general(matcode);
+	else if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
+		mm_set_symmetric(matcode);
+	else if (strcmp(storage_scheme, MM_HERM_STR) == 0)
+		mm_set_hermitian(matcode);
+	else if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
+		mm_set_skew(matcode);
+	else
+		return MM_UNSUPPORTED_TYPE;
+
+	return 0;
 }
 
 int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
 {
-    if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
-        return MM_COULD_NOT_WRITE_FILE;
-    else
-        return 0;
+	if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
+		return MM_COULD_NOT_WRITE_FILE;
+	else
+		return 0;
 }
 
 int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
 {
-    char line[MM_MAX_LINE_LENGTH];
-    int num_items_read;
-
-    /* set return null parameter values, in case we exit with errors */
-    *M = *N = *nz = 0;
-
-    /* now continue scanning until you reach the end-of-comments */
-    do
-    {
-        if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
-            return MM_PREMATURE_EOF;
-    }while (line[0] == '%');
-
-    /* line[] is either blank or has M,N, nz */
-    if (sscanf(line, "%d %d %d", M, N, nz) == 3)
-        return 0;
-
-    else
-    do
-    {
-        num_items_read = fscanf(f, "%d %d %d", M, N, nz);
-        if (num_items_read == EOF) return MM_PREMATURE_EOF;
-    }
-    while (num_items_read != 3);
-
-    return 0;
+	char line[MM_MAX_LINE_LENGTH];
+
+	/* set return null parameter values, in case we exit with errors */
+	*M = *N = *nz = 0;
+
+	/* now continue scanning until you reach the end-of-comments */
+	do
+	{
+		if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
+			return MM_PREMATURE_EOF;
+	} while (line[0] == '%');
+
+	/* line[] is either blank or has M,N, nz */
+	if (sscanf(line, "%d %d %d", M, N, nz) == 3)
+		return 0;
+	else
+	{
+		int num_items_read;
+		do
+		{
+			num_items_read = fscanf(f, "%d %d %d", M, N, nz);
+			if (num_items_read == EOF) return MM_PREMATURE_EOF;
+		}
+		while (num_items_read != 3);
+	}
+
+	return 0;
 }
 
-
 int mm_read_mtx_array_size(FILE *f, int *M, int *N)
 {
-    char line[MM_MAX_LINE_LENGTH];
-    int num_items_read;
-    /* set return null parameter values, in case we exit with errors */
-    *M = *N = 0;
-
-    /* now continue scanning until you reach the end-of-comments */
-    do
-    {
-        if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
-            return MM_PREMATURE_EOF;
-    }while (line[0] == '%');
-
-    /* line[] is either blank or has M,N, nz */
-    if (sscanf(line, "%d %d", M, N) == 2)
-        return 0;
-
-    else /* we have a blank line */
-    do
-    {
-        num_items_read = fscanf(f, "%d %d", M, N);
-        if (num_items_read == EOF) return MM_PREMATURE_EOF;
-    }
-    while (num_items_read != 2);
-
-    return 0;
+	char line[MM_MAX_LINE_LENGTH];
+	/* set return null parameter values, in case we exit with errors */
+	*M = *N = 0;
+
+	/* now continue scanning until you reach the end-of-comments */
+	do
+	{
+		if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
+			return MM_PREMATURE_EOF;
+	} while (line[0] == '%');
+
+	/* line[] is either blank or has M,N, nz */
+	if (sscanf(line, "%d %d", M, N) == 2)
+		return 0;
+
+	else /* we have a blank line */
+	{
+		int num_items_read;
+		do
+		{
+			num_items_read = fscanf(f, "%d %d", M, N);
+			if (num_items_read == EOF) return MM_PREMATURE_EOF;
+		}
+		while (num_items_read != 2);
+	}
+
+	return 0;
 }
 
 int mm_write_mtx_array_size(FILE *f, int M, int N)
 {
-    if (fprintf(f, "%d %d\n", M, N) != 2)
-        return MM_COULD_NOT_WRITE_FILE;
-    else
-        return 0;
+	if (fprintf(f, "%d %d\n", M, N) != 2)
+		return MM_COULD_NOT_WRITE_FILE;
+	else
+		return 0;
 }
 
-
-
 /*-------------------------------------------------------------------------*/
 
 /******************************************************************/
 /* use when I[], J[], and val[]J, and val[] are already allocated */
 /******************************************************************/
 
-int mm_read_mtx_crd_data(FILE *f, int M,
-			 int N, int nz, int I[], int J[],
-        double val[], MM_typecode matcode)
+int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode)
 {
-    int i;
-    if (mm_is_complex(matcode))
-    {
-        for (i=0; i<nz; i++)
-            if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
-                != 4) return MM_PREMATURE_EOF;
-    }
-    else if (mm_is_real(matcode))
-    {
-        for (i=0; i<nz; i++)
-        {
-            if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
-                != 3) return MM_PREMATURE_EOF;
-
-        }
-    }
-
-    else if (mm_is_pattern(matcode))
-    {
-        for (i=0; i<nz; i++)
-            if (fscanf(f, "%d %d", &I[i], &J[i])
-                != 2) return MM_PREMATURE_EOF;
-    }
-    else
-        return MM_UNSUPPORTED_TYPE;
-
-    return 0;
-
+	int i;
+	if (mm_is_complex(matcode))
+	{
+		for (i=0; i<nz; i++)
+			if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1]) != 4)
+				return MM_PREMATURE_EOF;
+	}
+	else if (mm_is_real(matcode))
+	{
+		for (i=0; i<nz; i++)
+		{
+			if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]) != 3)
+				return MM_PREMATURE_EOF;
+		}
+	}
+	else if (mm_is_pattern(matcode))
+	{
+		for (i=0; i<nz; i++)
+			if (fscanf(f, "%d %d", &I[i], &J[i]) != 2)
+				return MM_PREMATURE_EOF;
+	}
+	else
+		return MM_UNSUPPORTED_TYPE;
+
+	return 0;
 }
 
-int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
-        double *real, double *imag, MM_typecode matcode)
+int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *imag, MM_typecode matcode)
 {
-    if (mm_is_complex(matcode))
-    {
-            if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
-                != 4) return MM_PREMATURE_EOF;
-    }
-    else if (mm_is_real(matcode))
-    {
-            if (fscanf(f, "%d %d %lg\n", I, J, real)
-                != 3) return MM_PREMATURE_EOF;
-
-    }
-
-    else if (mm_is_pattern(matcode))
-    {
-            if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
-    }
-    else
-        return MM_UNSUPPORTED_TYPE;
-
-    return 0;
-
+	if (mm_is_complex(matcode))
+	{
+		if (fscanf(f, "%d %d %lg %lg", I, J, real, imag) != 4)
+			return MM_PREMATURE_EOF;
+	}
+	else if (mm_is_real(matcode))
+	{
+		if (fscanf(f, "%d %d %lg\n", I, J, real) != 3)
+			return MM_PREMATURE_EOF;
+	}
+	else if (mm_is_pattern(matcode))
+	{
+		if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
+	}
+	else
+		return MM_UNSUPPORTED_TYPE;
+
+	return 0;
 }
 
-
 /************************************************************************
     mm_read_mtx_crd()  fills M, N, nz, array of values, and return
                         type code, e.g. 'MCRS'
@@ -348,116 +311,123 @@ int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
                             (nz pairs of real/imaginary values)
 ************************************************************************/
 
-int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
-        double **val, MM_typecode *matcode)
+int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, double **val, MM_typecode *matcode)
 {
-    int ret_code;
-    FILE *f;
-
-    if (strcmp(fname, "stdin") == 0) f=stdin;
-    else
-    if ((f = fopen(fname, "r")) == NULL)
-        return MM_COULD_NOT_READ_FILE;
-
-
-    if ((ret_code = mm_read_banner(f, matcode)) != 0)
-        return ret_code;
-
-    if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
-            mm_is_matrix(*matcode)))
-        return MM_UNSUPPORTED_TYPE;
-
-    if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
-        return ret_code;
-
-
-    *I = (int *)  malloc(*nz * sizeof(int));
-    *J = (int *)  malloc(*nz * sizeof(int));
-    *val = NULL;
-
-    if (mm_is_complex(*matcode))
-    {
-        *val = (double *) malloc(*nz * 2 * sizeof(double));
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
-                *matcode);
-        if (ret_code != 0) return ret_code;
-    }
-    else if (mm_is_real(*matcode))
-    {
-        *val = (double *) malloc(*nz * sizeof(double));
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
-                *matcode);
-        if (ret_code != 0) return ret_code;
-    }
-
-    else if (mm_is_pattern(*matcode))
-    {
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
-                *matcode);
-        if (ret_code != 0) return ret_code;
-    }
-
-    if (f != stdin) fclose(f);
-    return 0;
+	int ret_code;
+	FILE *f;
+
+	if (strcmp(fname, "stdin") == 0) f=stdin;
+	else
+		if ((f = fopen(fname, "r")) == NULL)
+			return MM_COULD_NOT_READ_FILE;
+
+
+	if ((ret_code = mm_read_banner(f, matcode)) != 0)
+	{
+		if (f != stdin) fclose(f);
+		return ret_code;
+	}
+
+	if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && mm_is_matrix(*matcode)))
+	{
+		if (f != stdin) fclose(f);
+		return MM_UNSUPPORTED_TYPE;
+	}
+
+	if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
+	{
+		if (f != stdin) fclose(f);
+		return ret_code;
+	}
+
+	*I = (int *) malloc(*nz * sizeof(int));
+	*J = (int *) malloc(*nz * sizeof(int));
+	*val = NULL;
+
+	if (mm_is_complex(*matcode))
+	{
+		*val = (double *) malloc(*nz * 2 * sizeof(double));
+		ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode);
+		if (ret_code != 0)
+		{
+			if (f != stdin) fclose(f);
+			return ret_code;
+		}
+	}
+	else if (mm_is_real(*matcode))
+	{
+		*val = (double *) malloc(*nz * sizeof(double));
+		ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode);
+		if (ret_code != 0)
+		{
+			if (f != stdin) fclose(f);
+			return ret_code;
+		}
+	}
+	else if (mm_is_pattern(*matcode))
+	{
+		ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode);
+		if (ret_code != 0)
+		{
+			if (f != stdin) fclose(f);
+			return ret_code;
+		}
+	}
+
+	if (f != stdin) fclose(f);
+	return 0;
 }
 
 int mm_write_banner(FILE *f, MM_typecode matcode)
 {
-    char *str = mm_typecode_to_str(matcode);
-    int ret_code;
-
-    ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
-    free(str);
-    if (ret_code !=2 )
-        return MM_COULD_NOT_WRITE_FILE;
-    else
-        return 0;
+	char *str = mm_typecode_to_str(matcode);
+	int ret_code;
+
+	ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
+	free(str);
+	if (ret_code != 2)
+		return MM_COULD_NOT_WRITE_FILE;
+	else
+		return 0;
 }
 
-int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
-        double val[], MM_typecode matcode)
+int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode)
 {
-    FILE *f;
-    int i;
-
-    if (strcmp(fname, "stdout") == 0)
-        f = stdout;
-    else
-    if ((f = fopen(fname, "w")) == NULL)
-        return MM_COULD_NOT_WRITE_FILE;
-
-    /* print banner followed by typecode */
-    fprintf(f, "%s ", MatrixMarketBanner);
-    fprintf(f, "%s\n", mm_typecode_to_str(matcode));
-
-    /* print matrix sizes and nonzeros */
-    fprintf(f, "%d %d %d\n", M, N, nz);
-
-    /* print values */
-    if (mm_is_pattern(matcode))
-        for (i=0; i<nz; i++)
-            fprintf(f, "%d %d\n", I[i], J[i]);
-    else
-    if (mm_is_real(matcode))
-        for (i=0; i<nz; i++)
-            fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
-    else
-    if (mm_is_complex(matcode))
-        for (i=0; i<nz; i++)
-            fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
-                        val[2*i+1]);
-    else
-    {
-        if (f != stdout) fclose(f);
-        return MM_UNSUPPORTED_TYPE;
-    }
-
-    if (f !=stdout) fclose(f);
-
-    return 0;
+	FILE *f;
+	int i;
+
+	if (strcmp(fname, "stdout") == 0)
+		f = stdout;
+	else if ((f = fopen(fname, "w")) == NULL)
+		return MM_COULD_NOT_WRITE_FILE;
+
+	/* print banner followed by typecode */
+	fprintf(f, "%s ", MatrixMarketBanner);
+	fprintf(f, "%s\n", mm_typecode_to_str(matcode));
+
+	/* print matrix sizes and nonzeros */
+	fprintf(f, "%d %d %d\n", M, N, nz);
+
+	/* print values */
+	if (mm_is_pattern(matcode))
+		for (i=0; i<nz; i++)
+			fprintf(f, "%d %d\n", I[i], J[i]);
+	else if (mm_is_real(matcode))
+		for (i=0; i<nz; i++)
+			fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
+	else if (mm_is_complex(matcode))
+		for (i=0; i<nz; i++)
+			fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i], val[2*i+1]);
+	else
+	{
+		if (f != stdout) fclose(f);
+		return MM_UNSUPPORTED_TYPE;
+	}
+
+	if (f !=stdout) fclose(f);
+	return 0;
 }
 
-
 /**
 *  Create a new copy of a string s.  mm_strdup() is a common routine, but
 *  not part of ANSI C, so it is included here.  Used by mm_typecode_to_str().
@@ -472,58 +442,46 @@ char *mm_strdup(const char *s)
 
 char  *mm_typecode_to_str(MM_typecode matcode)
 {
-    char buffer[MM_MAX_LINE_LENGTH];
-    char *types[4];
-/*	char *mm_strdup(const char *); */
-    int error =0;
-
-    /* check for MTX type */
-    if (mm_is_matrix(matcode))
-        types[0] = MM_MTX_STR;
-    else
-        error=1;
-
-    /* check for CRD or ARR matrix */
-    if (mm_is_sparse(matcode))
-        types[1] = MM_SPARSE_STR;
-    else
-    if (mm_is_dense(matcode))
-        types[1] = MM_DENSE_STR;
-    else
-        return NULL;
-
-    /* check for element data type */
-    if (mm_is_real(matcode))
-        types[2] = MM_REAL_STR;
-    else
-    if (mm_is_complex(matcode))
-        types[2] = MM_COMPLEX_STR;
-    else
-    if (mm_is_pattern(matcode))
-        types[2] = MM_PATTERN_STR;
-    else
-    if (mm_is_integer(matcode))
-        types[2] = MM_INT_STR;
-    else
-        return NULL;
-
-
-    /* check for symmetry type */
-    if (mm_is_general(matcode))
-        types[3] = MM_GENERAL_STR;
-    else
-    if (mm_is_symmetric(matcode))
-        types[3] = MM_SYMM_STR;
-    else
-    if (mm_is_hermitian(matcode))
-        types[3] = MM_HERM_STR;
-    else
-    if (mm_is_skew(matcode))
-        types[3] = MM_SKEW_STR;
-    else
-        return NULL;
-
-    sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
-    return mm_strdup(buffer);
-
+	char buffer[MM_MAX_LINE_LENGTH];
+	char *types[4];
+	/*	char *mm_strdup(const char *); */
+
+	/* check for MTX type */
+	if (mm_is_matrix(matcode))
+		types[0] = MM_MTX_STR;
+
+	/* check for CRD or ARR matrix */
+	if (mm_is_sparse(matcode))
+		types[1] = MM_SPARSE_STR;
+	else if (mm_is_dense(matcode))
+		types[1] = MM_DENSE_STR;
+	else
+		return NULL;
+
+	/* check for element data type */
+	if (mm_is_real(matcode))
+		types[2] = MM_REAL_STR;
+	else if (mm_is_complex(matcode))
+		types[2] = MM_COMPLEX_STR;
+	else if (mm_is_pattern(matcode))
+		types[2] = MM_PATTERN_STR;
+	else if (mm_is_integer(matcode))
+		types[2] = MM_INT_STR;
+	else
+		return NULL;
+
+	/* check for symmetry type */
+	if (mm_is_general(matcode))
+		types[3] = MM_GENERAL_STR;
+	else if (mm_is_symmetric(matcode))
+		types[3] = MM_SYMM_STR;
+	else if (mm_is_hermitian(matcode))
+		types[3] = MM_HERM_STR;
+	else if (mm_is_skew(matcode))
+		types[3] = MM_SKEW_STR;
+	else
+		return NULL;
+
+	sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
+	return mm_strdup(buffer);
 }

+ 0 - 1
examples/spmv/spmv_kernels.c

@@ -27,7 +27,6 @@ void spmv_kernel_opencl(void *descr[], void *args)
 {
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 	int id, devid, err, n;
 
 	int nnz = (int) STARPU_CSR_GET_NNZ(descr[0]);

+ 0 - 1
examples/stencil/life_opencl.c

@@ -113,7 +113,6 @@ opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int
   clSetKernelArg(kernel, 7, sizeof(ldz), &ldz);
   clSetKernelArg(kernel, 8, sizeof(iter), &iter);
 
-  cl_event ev;
   err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
   if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 }

+ 0 - 1
examples/stencil/shadow_opencl.c

@@ -109,7 +109,6 @@ opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz,
         clSetKernelArg(kernel, 6, sizeof(ldz), &ldz);
         clSetKernelArg(kernel, 7, sizeof(i), &i);
 
-        cl_event ev;
         err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 }

+ 1 - 1
examples/stencil/stencil-blocks.c

@@ -395,7 +395,7 @@ void check(int rank)
 				for (y = 0; y < sizey; y++)
 					for (z = 0; z < size_bz; z++)
 						sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)];
-			printf("block %d got %d/%d alive\n", bz, sum, sizex*sizey*size_bz);
+			printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz);
 #endif
 		}
 	}

+ 6 - 8
examples/stencil/stencil-kernels.c

@@ -186,9 +186,9 @@ static void update_func_cuda(void *descr[], void *arg)
 	int workerid = starpu_worker_get_id_check();
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	if (block->bz == 0)
-		FPRINTF(stderr,"!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
+		FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid);
 	else
-		DEBUG( "!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
+		DEBUG( "!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid);
 #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
 	int rank = 0;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -257,7 +257,6 @@ static void load_subblock_from_buffer_opencl(struct starpu_block_interface *bloc
 	unsigned offset = firstz*block->ldz;
 	cl_mem block_data = (cl_mem)block->dev_handle;
 	cl_mem boundary_data = (cl_mem)boundary->dev_handle;
-	cl_event event;
 
         cl_command_queue cq;
         starpu_opencl_get_current_queue(&cq);
@@ -274,9 +273,9 @@ static void update_func_opencl(void *descr[], void *arg)
 	int workerid = starpu_worker_get_id_check();
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	if (block->bz == 0)
-		FPRINTF(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
+		FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid);
 	else
-		DEBUG( "!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
+		DEBUG( "!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid);
 #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
 	int rank = 0;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -344,9 +343,9 @@ void update_func_cpu(void *descr[], void *arg)
 	int workerid = starpu_worker_get_id_check();
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	if (block->bz == 0)
-		FPRINTF(stderr,"!!! DO update_func_cpu z %d CPU%d !!!\n", block->bz, workerid);
+		FPRINTF(stderr,"!!! DO update_func_cpu z %u CPU%d !!!\n", block->bz, workerid);
 	else
-		DEBUG( "!!! DO update_func_cpu z %d CPU%d !!!\n", block->bz, workerid);
+		DEBUG( "!!! DO update_func_cpu z %u CPU%d !!!\n", block->bz, workerid);
 #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
 	int rank = 0;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -491,7 +490,6 @@ static void load_subblock_into_buffer_opencl(struct starpu_block_interface *bloc
 
         cl_command_queue cq;
         starpu_opencl_get_current_queue(&cq);
-	cl_event event;
 
         cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL);
 	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);

+ 6 - 6
examples/stencil/stencil.c

@@ -114,10 +114,10 @@ static void parse_args(int argc, char **argv)
 			 fprintf(stderr, "\n");
 			 fprintf(stderr, "Options:\n");
 			 fprintf(stderr, "-b			bind tasks on CPUs/GPUs\n");
-			 fprintf(stderr, "-nbz <n>		Number of blocks on Z axis (%d by default)\n", nbz);
-			 fprintf(stderr, "-size[xyz] <size>	Domain size on x/y/z axis (%dx%dx%d by default)\n", sizex, sizey, sizez);
-			 fprintf(stderr, "-niter <n>		Number of iterations (%d by default)\n", niter);
-			 fprintf(stderr, "-ticks <t>		How often to put ticks in the output (ms, %d by default)\n", ticks);
+			 fprintf(stderr, "-nbz <n>		Number of blocks on Z axis (%u by default)\n", nbz);
+			 fprintf(stderr, "-size[xyz] <size>	Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez);
+			 fprintf(stderr, "-niter <n>		Number of iterations (%u by default)\n", niter);
+			 fprintf(stderr, "-ticks <t>		How often to put ticks in the output (ms, %u by default)\n", ticks);
 			 exit(0);
 		}
 	}
@@ -181,7 +181,7 @@ void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
 		{
 			char name[32];
 			starpu_worker_get_name(worker, name, sizeof(name));
-			FPRINTF(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
+			FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
 		}
 	}
 }
@@ -328,9 +328,9 @@ int main(int argc, char **argv)
 		unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size;
 
 		int iter;
-		unsigned last, bz;
 		for (iter = 0; iter < who_runs_what_len; iter++)
 		{
+			unsigned last, bz;
 			last = 1;
 			for (bz = 0; bz < nbz; bz++)
 			{

+ 3 - 2
examples/tag_example/tag_example2.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2012-2013, 2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -76,12 +76,13 @@ static void tag_cleanup_grid(unsigned iter)
 static int create_task_grid(unsigned iter)
 {
 	unsigned i;
-	int ret;
 
 /*	FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */
 
 	for (i = 0; i < ni; i++)
 	{
+		int ret;
+
 		/* create a new task */
 		struct starpu_task *task = starpu_task_create();
 

+ 3 - 2
examples/tag_example/tag_example3.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2012-2013, 2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -78,12 +78,13 @@ static void tag_cleanup_grid(unsigned iter)
 static int create_task_grid(unsigned iter)
 {
 	int i;
-	int ret;
 
 /*	FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */
 
 	for (i = ni - 1; i > 0; i--)
 	{
+		int ret;
+
 		/* create a new task */
 		struct starpu_task *task = starpu_task_create();
 

+ 2 - 2
examples/tag_example/tag_restartable.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2013, 2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -109,12 +109,12 @@ static void create_task_grid(unsigned iter)
 static int start_task_grid(unsigned iter)
 {
 	unsigned i;
-	int ret;
 
 	/* FPRINTF(stderr, "start grid %d ni %d...\n", iter, ni); */
 
 	for (i = 0; i < ni; i++)
 	{
+		int ret;
 		ret = starpu_task_submit(tasks[iter][i]);
 		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 5 - 5
examples/worker_collections/worker_list_example.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2010-2014  CNRS
+ * Copyright (C) 2010-2014, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -45,7 +45,7 @@ int main()
 	co->init_iterator = worker_list.init_iterator;
 	co->type = STARPU_WORKER_LIST;
 
-	FPRINTF(stderr, "ncpus %d \n", ncpus);
+	FPRINTF(stderr, "ncpus %u\n", ncpus);
 
 	double start_time;
 	double end_time;
@@ -72,19 +72,19 @@ int main()
 	while(co->has_next(co, &it))
 	{
 		pu = co->get_next(co, &it);
-		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
+		FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers);
 	}
 
 	for(i = 0; i < 6; i++)
 	{
 		co->remove(co, i);
-		FPRINTF(stderr, "remove %d out of %d workers\n", i, co->nworkers);
+		FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers);
 	}
 
 	while(co->has_next(co, &it))
 	{
 		pu = co->get_next(co, &it);
-		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
+		FPRINTF(stderr, "pu = %d out of %u workers\n", pu, co->nworkers);
 	}
 
 	FPRINTF(stderr, "timing init = %lf \n", timing);

+ 27 - 0
include/fstarpu_mod.f90

@@ -72,16 +72,20 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED
         type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS
         type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT
+        type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA
 
         ! (some) portable iso_c_binding types
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT
+        type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T
 
         ! (some) native Fortran types
+        type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER
+
         type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER
         type(c_ptr), bind(C) :: FSTARPU_SZ_INT4
         type(c_ptr), bind(C) :: FSTARPU_SZ_INT8
@@ -93,6 +97,8 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION
 
         type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX
+        type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4
+        type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8
 
         interface operator (.ior.)
                 procedure or_cptrs
@@ -1727,6 +1733,7 @@ module fstarpu_mod
                 end subroutine fstarpu_sched_ctx_finished_submit
 
                 ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids);
+                ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids);
 
                 ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id);
                 function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) &
@@ -1867,6 +1874,14 @@ module fstarpu_mod
                         integer(c_int), value, intent(in) :: sched_ctx_id
                 end function fstarpu_sched_ctx_max_priority_is_set
 
+                ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id);
+                function fstarpu_sched_ctx_get_user_data(sched_ctx_id) &
+                                bind(c,name="starpu_sched_ctx_get_user_data")
+                        use iso_c_binding, only: c_int, c_ptr
+                        integer(c_int), value, intent(in) :: sched_ctx_id
+                        type(c_ptr) :: fstarpu_sched_ctx_get_user_data
+                end function fstarpu_sched_ctx_get_user_data
+
                 ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC;
 
                 ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id);
@@ -2160,11 +2175,14 @@ module fstarpu_mod
 
                         real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy
                         real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy
+                        character(c_char) :: FSTARPU_SZ_C_CHAR_dummy
                         integer(c_int) :: FSTARPU_SZ_C_INT_dummy
                         integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy
                         type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy
                         integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy
 
+                        character :: FSTARPU_SZ_CHARACTER_dummy
+
                         integer :: FSTARPU_SZ_INTEGER_dummy
                         integer(4) :: FSTARPU_SZ_INT4_dummy
                         integer(8) :: FSTARPU_SZ_INT8_dummy
@@ -2176,6 +2194,8 @@ module fstarpu_mod
                         double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy
 
                         complex :: FSTARPU_SZ_COMPLEX_dummy
+                        complex(4) :: FSTARPU_SZ_COMPLEX4_dummy
+                        complex(8) :: FSTARPU_SZ_COMPLEX8_dummy
 
                         ! Note: Referencing global C constants from Fortran has
                         ! been found unreliable on some architectures, notably
@@ -2259,15 +2279,20 @@ module fstarpu_mod
                             fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR)
                         FSTARPU_SCHED_CTX_POLICY_INIT    = &
                             fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR)
+                        FSTARPU_SCHED_CTX_USER_DATA    = &
+                            fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR)
 
                         ! Initialize size constants as 'c_ptr'
                         FSTARPU_SZ_C_DOUBLE        = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy))
                         FSTARPU_SZ_C_FLOAT        = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy))
+                        FSTARPU_SZ_C_CHAR        = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy))
                         FSTARPU_SZ_C_INT        = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy))
                         FSTARPU_SZ_C_INTPTR_T   = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy))
                         FSTARPU_SZ_C_PTR        = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy))
                         FSTARPU_SZ_C_SIZE_T        = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy))
 
+                        FSTARPU_SZ_CHARACTER        = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy))
+
                         FSTARPU_SZ_INTEGER         = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy))
                         FSTARPU_SZ_INT4         = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy))
                         FSTARPU_SZ_INT8         = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy))
@@ -2279,6 +2304,8 @@ module fstarpu_mod
                         FSTARPU_SZ_DOUBLE_PRECISION        = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy))
 
                         FSTARPU_SZ_COMPLEX        = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy))
+                        FSTARPU_SZ_COMPLEX4        = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy))
+                        FSTARPU_SZ_COMPLEX8        = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy))
 
                         ! Initialize StarPU
                         if (c_associated(conf)) then 

+ 6 - 5
include/pthread_win32/pthread.h

@@ -1,13 +1,14 @@
-/*
- * StarPU
- * Copyright (C) Université Bordeaux, CNRS 2010 (see AUTHORS file)
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * This program is free software; you can redistribute it and/or modify
+ * Copyright (C) 2010 Université Bordeaux
+ * Copyright (C) 2010  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation; either version 2.1 of the License, or (at
  * your option) any later version.
  *
- * This program is distributed in the hope that it will be useful, but
+ * StarPU is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *

+ 6 - 5
include/pthread_win32/semaphore.h

@@ -1,13 +1,14 @@
-/*
- * StarPU
- * Copyright (C) Université Bordeaux, CNRS 2010 (see AUTHORS file)
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * This program is free software; you can redistribute it and/or modify
+ * Copyright (C) 2010 Université Bordeaux
+ * Copyright (C) 2010  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation; either version 2.1 of the License, or (at
  * your option) any later version.
  *
- * This program is distributed in the hope that it will be useful, but
+ * StarPU is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *

+ 3 - 1
include/starpu_config.h.in

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  * Copyright (C) 2014  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -104,6 +104,8 @@
 #undef STARPU_HAVE_UNSETENV
 #undef STARPU_HAVE_UNISTD_H
 
+#undef STARPU_FXT_LOCK_TRACES
+
 #ifdef _MSC_VER
 typedef long starpu_ssize_t;
 #define __starpu_func__ __FUNCTION__

+ 4 - 0
include/starpu_sched_ctx.h

@@ -32,6 +32,7 @@ extern "C"
 #define STARPU_SCHED_CTX_NESTED                  (6<<16)
 #define STARPU_SCHED_CTX_AWAKE_WORKERS           (7<<16)
 #define STARPU_SCHED_CTX_POLICY_INIT             (8<<16)
+#define STARPU_SCHED_CTX_USER_DATA               (9<<16)
 
 unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...);
 
@@ -62,6 +63,7 @@ void starpu_sched_ctx_stop_task_submission(void);
 void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id);
 
 unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids);
+unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids);
 
 unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id);
 
@@ -102,6 +104,8 @@ int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id);
 
 #define STARPU_DEFAULT_PRIO	0
 
+void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id);
+
 struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC;
 
 void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id);

+ 1 - 1
include/starpu_task.h

@@ -105,7 +105,7 @@ struct starpu_codelet
 	starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS];
 	starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS];
 
-	char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
+	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 
 	int nbuffers;
 	enum starpu_data_access_mode modes[STARPU_NMAXBUFS];

+ 2 - 1
include/starpu_tree.h

@@ -25,7 +25,7 @@ extern "C"
 
 struct starpu_tree
 {
-	struct starpu_tree **nodes;
+	struct starpu_tree *nodes;
 	struct starpu_tree *father;
 	int arity;
 	int id;
@@ -35,6 +35,7 @@ struct starpu_tree
 
 void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited);
 
+void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father);
 void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father);
 
 struct starpu_tree *starpu_tree_get(struct starpu_tree *tree, int id);

+ 2 - 1
include/starpu_worker.h

@@ -55,7 +55,8 @@ enum starpu_worker_collection_type
 
 struct starpu_worker_collection
 {
-	void *workerids;
+	int *workerids;
+	void *collection_private;
 	unsigned nworkers;
 	void *unblocked_workers;
 	unsigned nunblocked_workers;

+ 7 - 2
mic-configure

@@ -96,6 +96,10 @@ then
     dev_list="mic"
 fi
 
+# prepend mic_params with "--with-mpicc=mpicc -mmic", to allow possible override by the user
+mic_params=("--with-mpicc=mpicc -mmic" "${mic_params[@]}")
+mic_params=("--with-mpifort=mpifort -mmic" "${mic_params[@]}")
+
 for arch in $dev_list #host mic
 do
 	# We call the configure script from a build directory further in the
@@ -122,11 +126,12 @@ do
 		    unset CXXLD
 		    unset F77
 		    unset FC
+		    params+=(--disable-fortran)
 		fi
 	fi
 
 	declare -a params
-	params=("--prefix=$prefix/$arch")
+	params=("--prefix=$prefix/$arch" "--disable-fstack-protector-all")
 
 	if [ "$native_mic" -eq "0" ]
 	then
@@ -134,7 +139,7 @@ do
 	fi
 
 	if test x$arch = xmic ; then
-	    params+=(--disable-build-doc --host=$mic_host)
+	    params+=(--host=$mic_host --disable-build-doc)
 	    if [ "$native_mic" -eq "1" ]
 	    then
 		params+=(--enable-maxcpus=250)

+ 5 - 1
mpi/src/starpu_mpi.c

@@ -959,6 +959,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 				{
 					// req->ptr is freed by starpu_data_unpack
 					starpu_data_unpack(req->data_handle, req->ptr, req->count);
+					starpu_memory_deallocate(STARPU_MAIN_RAM, req->count);
 				}
 			}
 			else
@@ -1370,9 +1371,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		}
 
 		/* get one request */
-		struct _starpu_mpi_req *req;
 		while (!_starpu_mpi_req_list_empty(ready_requests))
 		{
+			struct _starpu_mpi_req *req;
 			req = _starpu_mpi_req_list_pop_back(ready_requests);
 
 			/* handling a request is likely to block for a while
@@ -1483,6 +1484,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 						{
 							early_request->count = envelope->size;
 							early_request->ptr = malloc(early_request->count);
+							starpu_memory_allocate(STARPU_MAIN_RAM, early_request->count, STARPU_MEMORY_OVERFLOW);
 
 							STARPU_MPI_ASSERT_MSG(early_request->ptr, "cannot allocate message of size %ld\n", early_request->count);
 						}
@@ -1686,7 +1688,9 @@ int starpu_mpi_initialize_extended(int *rank, int *world_size)
 
 int starpu_mpi_shutdown(void)
 {
+#ifndef STARPU_SIMGRID
 	void *value;
+#endif
 	int rank, world_size;
 
 	/* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */

+ 1 - 2
mpi/src/starpu_mpi_comm.c

@@ -134,12 +134,11 @@ int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope *
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_comms_mutex);
 	while (1)
 	{
-		int flag, res;
-
 		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
 
 		if (_comm->posted)
 		{
+			int flag, res;
 			/* test whether an envelope has arrived. */
 #ifdef STARPU_SIMGRID
 			MSG_process_sleep(0.000001);

+ 1 - 1
src/common/fxt.c

@@ -244,7 +244,7 @@ void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED)
 #endif
 }
 
-void starpu_fxt_trace_user_event_string(const char *s)
+void starpu_fxt_trace_user_event_string(const char *s STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_USE_FXT
 	_STARPU_TRACE_EVENT(s);

+ 16 - 16
src/common/fxt.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -804,41 +804,41 @@ do {										\
 
 #define _STARPU_TRACE_LOCKING_SPINLOCK(file, line)	do {\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
-		const char *file; \
-		file = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(file, line),file); \
+		const char *xfile; \
+		xfile = strrchr(file,'/') + 1; \
+		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
 #define _STARPU_TRACE_SPINLOCK_LOCKED(file, line)		do { \
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
-		const char *file; \
-		file = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(file, line),file); \
+		const char *xfile; \
+		xfile = strrchr(file,'/') + 1; \
+		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
 #define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line)	do { \
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
-		const char *file; \
-		file = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(file, line),file); \
+		const char *xfile; \
+		xfile = strrchr(file,'/') + 1; \
+		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
 #define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line)	do { \
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
-		const char *file; \
-		file = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(file, line),file); \
+		const char *xfile; \
+		xfile = strrchr(file,'/') + 1; \
+		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
 #define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line)	do { \
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
-		const char *file; \
-		file = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(file, line),file); \
+		const char *xfile; \
+		xfile = strrchr(file,'/') + 1; \
+		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 

+ 3 - 2
src/core/debug.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2009-2013  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013, 2015  CNRS
+ * Copyright (C) 2016  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -73,7 +74,7 @@ void _starpu_print_to_logfile(const char *format STARPU_ATTRIBUTE_UNUSED, ...)
 }
 
 /* Record codelet to give ayudame nice function ids starting from 0. */
-#ifdef HAVE_AYUDAME_H
+#if defined(STARPU_USE_AYUDAME1)
 struct ayudame_codelet
 {
 	char *name;
@@ -119,4 +120,4 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex);
 	return i + 1;
 }
-#endif
+#endif /* AYUDAME1 */

+ 265 - 2
src/core/debug.h

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2009-2013, 2016  Université de Bordeaux
  * Copyright (C) 2010, 2011  CNRS
+ * Copyright (C) 2016  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,8 +26,268 @@
 #include <common/config.h>
 #include <core/workers.h>
 
-#ifdef HAVE_AYUDAME_H
-#include <Ayudame.h>
+#if defined(STARPU_USE_AYUDAME1)
+/* Ayudame 1 API */
+# include <Ayudame.h>
+# ifndef AYU_RT_STARPU
+#  define AYU_RT_STARPU 4
+# endif
+# define STARPU_AYU_EVENT AYU_event
+
+# define STARPU_AYU_PREINIT() \
+	if (AYU_event) \
+	{ \
+		enum ayu_runtime_t ayu_rt = AYU_RT_STARPU; \
+		AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt); \
+	}
+
+# define STARPU_AYU_INIT() \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_INIT, 0, NULL); \
+	}
+
+# define STARPU_AYU_FINISH() \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_FINISH, 0, NULL); \
+	}
+
+# define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \
+	if (AYU_event) \
+	{ \
+		uintptr_t __AYU_data[3] = { (previous), (uintptr_t) (handle), (uintptr_t) (handle) }; \
+		AYU_event(AYU_ADDDEPENDENCY, (job_id), __AYU_data); \
+	}
+
+# define STARPU_AYU_REMOVETASK(job_id) \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_REMOVETASK, (job_id), NULL); \
+	}
+
+# define STARPU_AYU_ADDTASK(job_id, task) \
+	if (AYU_event) \
+	{ \
+		int64_t __AYU_data[2] = { \
+			((struct starpu_task *)(task))!=NULL?_starpu_ayudame_get_func_id(((struct starpu_task *)(task))->cl):0, \
+			((struct starpu_task *)(task))!=NULL?((struct starpu_task *)(task))->priority-STARPU_MIN_PRIO:0 \
+		}; \
+		AYU_event(AYU_ADDTASK, (job_id), __AYU_data); \
+	}
+
+# define STARPU_AYU_PRERUNTASK(job_id, workerid) \
+	if (AYU_event) \
+	{ \
+		intptr_t __id = (workerid); \
+		AYU_event(AYU_PRERUNTASK, (job_id), &__id); \
+	}
+
+# define STARPU_AYU_RUNTASK(job_id) \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_RUNTASK, (job_id), NULL); \
+	}
+
+# define STARPU_AYU_POSTRUNTASK(job_id) \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_POSTRUNTASK, (job_id), NULL); \
+	}
+
+# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \
+	if (AYU_event) \
+	{ \
+		intptr_t __id = (worker_id); \
+		AYU_event(AYU_ADDTASKTOQUEUE, (job_id), &__id); \
+	}
+
+# define STARPU_AYU_BARRIER() \
+	if (AYU_event) \
+	{ \
+		AYU_event(AYU_BARRIER, 0, NULL); \
+	}
+
+#elif defined(STARPU_USE_AYUDAME2)
+/* Ayudame 2 API */
+# include <ayudame.h>
+# define STARPU_AYU_EVENT ayu_event
+
+# define STARPU_AYU_PREINIT()
+
+# define STARPU_AYU_INIT()
+
+# define STARPU_AYU_FINISH() \
+	if (ayu_event){ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		ayu_event(AYU_FINISH, __data); \
+	}
+
+# define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \
+	if (ayu_event) \
+	{ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		uint64_t __dep_id=0; \
+		__dep_id |= (previous) << 0; \
+		__dep_id |= (job_id) << 24; \
+		__dep_id |= (uintptr_t) (handle) << 48; \
+		__data.common.client_id = __cli_id; \
+		__data.add_dependency.dependency_id = __dep_id; \
+		__data.add_dependency.from_id=(previous); \
+		__data.add_dependency.to_id=(job_id); \
+		__data.add_dependency.dependency_label = "dep"; \
+		ayu_event(AYU_ADDDEPENDENCY, __data); \
+		ayu_wipe_data(&__data); \
+		\
+		char __buf[32]; \
+		snprintf(__buf, 32, "%llu", (unsigned long long)(uintptr_t) (handle)); \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = __dep_id; \
+		__data.set_property.key = "dep_address_value"; \
+		__data.set_property.value = __buf; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+	}
+
+# define STARPU_AYU_REMOVETASK(job_id) \
+	if (ayu_event) \
+	{ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "state"; \
+		__data.set_property.value = "finished"; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+	}
+
+# define STARPU_AYU_ADDTASK(job_id, task) \
+	if (ayu_event) \
+	{ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.add_task.task_id = (job_id); \
+		__data.add_task.scope_id = 0; \
+		__data.add_task.task_label = "task"; \
+		ayu_event(AYU_ADDTASK, __data); \
+		ayu_wipe_data(&__data); \
+		\
+		if ((task) != NULL) \
+		{ \
+			char __buf[32]; \
+			snprintf(__buf, 32, "%d", ((struct starpu_task *)(task))->priority); \
+			__data.common.client_id = __cli_id; \
+			__data.set_property.property_owner_id = (job_id); \
+			__data.set_property.key = "priority"; \
+			__data.set_property.value = __buf; \
+			ayu_event(AYU_SETPROPERTY, __data); \
+			ayu_wipe_data(&__data); \
+			\
+			const char *__name = ((struct starpu_task *)(task))->name != NULL?((struct starpu_task *)(task))->name: \
+			             ((struct starpu_task *)(task))->cl->name != NULL?((struct starpu_task *)(task))->cl->name:"<no_name>"; \
+			__data.common.client_id = __cli_id; \
+			__data.set_property.property_owner_id = (job_id); \
+			__data.set_property.key = "function_name"; \
+			__data.set_property.value = __name; \
+			ayu_event(AYU_SETPROPERTY, __data); \
+			ayu_wipe_data(&__data); \
+		} \
+	}
+
+# define STARPU_AYU_PRERUNTASK(job_id, workerid) \
+	if (ayu_event) \
+	{ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "state"; \
+		__data.set_property.value = "running"; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+		\
+		char __buf[32]; \
+		snprintf(__buf, 32, "%d", (workerid)); \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "worker"; \
+		__data.set_property.value = __buf; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+	}
+
+# define STARPU_AYU_RUNTASK(job_id) \
+	if (ayu_event) { \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "state"; \
+		__data.set_property.value = "running"; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+	}
+
+# define STARPU_AYU_POSTRUNTASK(job_id) \
+	if (ayu_event) \
+	{ \
+		/* TODO ADD thread id core id etc */ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "state"; \
+		__data.set_property.value = "finished"; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+	}
+
+# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \
+	if (ayu_event) \
+	{ \
+		ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \
+		ayu_event_data_t __data; \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "state"; \
+		__data.set_property.value = "queued"; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+		\
+		char __buf[32]; \
+		snprintf(__buf, 32, "%d", (int)(worker_id)); \
+		__data.common.client_id = __cli_id; \
+		__data.set_property.property_owner_id = (job_id); \
+		__data.set_property.key = "worker"; \
+		__data.set_property.value = __buf; \
+		ayu_event(AYU_SETPROPERTY, __data); \
+		ayu_wipe_data(&__data); \
+ 	}
+
+# define STARPU_AYU_BARRIER() \
+	if (ayu_event) \
+	{ \
+		/* How to generate a barrier event with Ayudame 2? */ \
+	}
+#else
+# define STARPU_AYU_EVENT (0)
+# define STARPU_AYU_PREINIT()
+# define STARPU_AYU_INIT()
+# define STARPU_AYU_FINISH()
+# define STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job)
+# define STARPU_AYU_REMOVETASK(job_id)
+# define STARPU_AYU_ADDTASK(job_id, task)
+# define STARPU_AYU_PRERUNTASK(job_id, workerid)
+# define STARPU_AYU_RUNTASK(job_id)
+# define STARPU_AYU_POSTRUNTASK(job_id)
+# define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id)
+# define STARPU_AYU_BARRIER()
+
 #endif
 
 /* Create a file that will contain StarPU's log */
@@ -41,8 +302,10 @@ void _starpu_print_to_logfile(const char *format, ...) STARPU_ATTRIBUTE_FORMAT(p
 /* Tell gdb whether FXT is compiled in or not */
 extern int _starpu_use_fxt;
 
+#if defined(STARPU_USE_AYUDAME1)
 /* Get an Ayudame id for CL */
 int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl);
+#endif
 
 void _starpu_watchdog_init(void);
 void _starpu_watchdog_shutdown(void);

+ 4 - 3
src/core/dependencies/data_arbiter_concurrency.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015  Université de Bordeaux
+ * Copyright (C) 2015-2016  Université de Bordeaux
  * Copyright (C) 2015  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -127,7 +127,7 @@ struct LockOrDelegateListNode
  */
 static int _starpu_LockOrDelegatePostOrPerform(starpu_arbiter_t arbiter, void (*func)(void*), void* data)
 {
-	struct LockOrDelegateListNode* newNode = malloc(sizeof(*newNode)), *iter;
+	struct LockOrDelegateListNode* newNode = malloc(sizeof(*newNode)), *iter, *next;
 	int did = 0;
 	STARPU_ASSERT(newNode);
 	newNode->data = data;
@@ -152,8 +152,9 @@ static int _starpu_LockOrDelegatePostOrPerform(starpu_arbiter_t arbiter, void (*
 		while (iter != NULL)
 		{
 			(*iter->func)(iter->data);
+			next = iter->next;
 			free(iter);
-			iter = iter->next;
+			iter = next;
 		}
 
 		/* And then do our job */

+ 3 - 10
src/core/dependencies/implicit_data_deps.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013, 2015  CNRS
+ * Copyright (C) 2016  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,13 +33,7 @@ static void _starpu_add_ghost_dependency(starpu_data_handle_t handle STARPU_ATTR
 {
 	struct _starpu_job *next_job = _starpu_get_job_associated_to_task(next);
 	_starpu_bound_job_id_dep(handle, next_job, previous);
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-	{
-		uintptr_t AYU_data[3] = { previous, (uintptr_t) handle, (uintptr_t) handle };
-		AYU_event(AYU_ADDDEPENDENCY, next_job->job_id, AYU_data);
-	}
-#endif
+	STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job->job_id);
 }
 
 static void _starpu_add_dependency(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, struct starpu_task *previous STARPU_ATTRIBUTE_UNUSED, struct starpu_task *next STARPU_ATTRIBUTE_UNUSED)
@@ -81,9 +76,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task
 #else
 		_starpu_bound_recording
 #endif
-#ifdef HAVE_AYUDAME_H
-		|| AYU_event
-#endif
+		|| STARPU_AYU_EVENT
 		) && handle->last_submitted_ghost_sync_id_is_valid)
 	{
 		_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_sync_id,

+ 19 - 46
src/core/dependencies/tags.c

@@ -1,7 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux
+ * Copyright (C) 2009-2013, 2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2016  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -26,7 +27,7 @@
 #include <common/uthash.h>
 #include <core/debug.h>
 
-#define AYUDAME_OFFSET 4000000000000000000ULL
+#define STARPU_AYUDAME_OFFSET 4000000000000000000ULL
 
 struct _starpu_tag_table
 {
@@ -139,11 +140,8 @@ void starpu_tag_remove(starpu_tag_t id)
 {
 	struct _starpu_tag_table *entry;
 
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-		AYU_event(AYU_REMOVETASK, id + AYUDAME_OFFSET, NULL);
-#endif
-
+	STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET);
+	STARPU_AYU_REMOVETASK(id + STARPU_AYUDAME_OFFSET);
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
 
 	HASH_FIND_UINT64_T(tag_htbl, &id, entry);
@@ -200,14 +198,8 @@ static struct _starpu_tag *_gettag_struct(starpu_tag_t id)
 
 		HASH_ADD_UINT64_T(tag_htbl, id, entry2);
 
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event)
-		{
-			int64_t AYU_data[2] = {0, 0};
-			STARPU_ASSERT(id < AYUDAME_OFFSET);
-			AYU_event(AYU_ADDTASK, id + AYUDAME_OFFSET, AYU_data);
-		}
-#endif
+		STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET);
+		STARPU_AYU_ADDTASK(id + STARPU_AYUDAME_OFFSET, NULL);
 	}
 
 	return tag;
@@ -241,14 +233,9 @@ void _starpu_tag_set_ready(struct _starpu_tag *tag)
 	_starpu_enforce_deps_starting_from_task(j);
 
 	_starpu_spin_lock(&tag->lock);
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-	{
-		intptr_t id = 0;
-		AYU_event(AYU_PRERUNTASK, tag->id + AYUDAME_OFFSET, &id);
-		AYU_event(AYU_POSTRUNTASK, tag->id + AYUDAME_OFFSET, NULL);
-	}
-#endif
+	STARPU_ASSERT(!STARPU_AYU_EVENT || tag->id < STARPU_AYUDAME_OFFSET);
+	STARPU_AYU_PRERUNTASK(tag->id + STARPU_AYUDAME_OFFSET, -1);
+	STARPU_AYU_POSTRUNTASK(tag->id + STARPU_AYUDAME_OFFSET);
 }
 
 /* the lock must be taken ! */
@@ -327,15 +314,9 @@ void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 	if (job->task->regenerate || job->submitted == 2 ||
 			tag->state != STARPU_DONE)
 		tag->state = STARPU_ASSOCIATED;
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-	{
-		uintptr_t AYU_data1[3] = {id+AYUDAME_OFFSET, 0, 0};
-		uintptr_t AYU_data2[3] = {job->job_id, 0, 0};
-		AYU_event(AYU_ADDDEPENDENCY, job->job_id, AYU_data1);
-		AYU_event(AYU_ADDDEPENDENCY, id+AYUDAME_OFFSET, AYU_data2);
-	}
-#endif
+	STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET);
+	STARPU_AYU_ADDDEPENDENCY(id+STARPU_AYUDAME_OFFSET, 0, job->job_id);
+	STARPU_AYU_ADDDEPENDENCY(job->job_id, 0, id+STARPU_AYUDAME_OFFSET);
 	_starpu_spin_unlock(&tag->lock);
 }
 
@@ -366,13 +347,9 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 		_starpu_spin_lock(&tag_dep->lock);
 		_starpu_spin_lock(&tag_child->lock);
 		_starpu_tag_add_succ(tag_dep, cg);
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event)
-		{
-			uintptr_t AYU_data[3] = {dep_id+AYUDAME_OFFSET, 0, 0};
-			AYU_event(AYU_ADDDEPENDENCY, id+AYUDAME_OFFSET, AYU_data);
-		}
-#endif
+		STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET);
+		STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET);
+		STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET);
 		_starpu_spin_unlock(&tag_child->lock);
 		_starpu_spin_unlock(&tag_dep->lock);
 	}
@@ -408,13 +385,9 @@ void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
 		_starpu_spin_lock(&tag_dep->lock);
 		_starpu_spin_lock(&tag_child->lock);
 		_starpu_tag_add_succ(tag_dep, cg);
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event)
-		{
-			uintptr_t AYU_data[3] = {dep_id+AYUDAME_OFFSET, 0, 0};
-			AYU_event(AYU_ADDDEPENDENCY, id+AYUDAME_OFFSET, AYU_data);
-		}
-#endif
+		STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET);
+		STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET);
+		STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET);
 		_starpu_spin_unlock(&tag_child->lock);
 		_starpu_spin_unlock(&tag_dep->lock);
 	}

+ 3 - 6
src/core/dependencies/task_deps.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
- * Copyright (C) 2014  INRIA
+ * Copyright (C) 2014, 2016  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -112,13 +112,10 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 
 		_STARPU_TRACE_TASK_DEPS(dep_job, job);
 		_starpu_bound_task_dep(job, dep_job);
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event && check)
+		if (check)
 		{
-			uintptr_t AYU_data[3] = {dep_job->job_id, 0, 0};
-			AYU_event(AYU_ADDDEPENDENCY, job->job_id, AYU_data);
+			STARPU_AYU_ADDDEPENDENCY(dep_job->job_id, 0, job->job_id);
 		}
-#endif
 		if (_starpu_graph_record)
 			_starpu_graph_add_job_dep(job, dep_job);
 

+ 6 - 27
src/core/jobs.c

@@ -3,7 +3,7 @@
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
  * Copyright (C) 2011  Télécom-SudParis
- * Copyright (C) 2011, 2014  INRIA
+ * Copyright (C) 2011, 2014, 2016  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -89,21 +89,11 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 #ifndef STARPU_USE_FXT
 	if (_starpu_bound_recording || _starpu_top_status_get() ||
 		_starpu_task_break_on_push != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_sched != -1
-#ifdef HAVE_AYUDAME_H
-		|| AYU_event
-#endif
-			)
+		|| STARPU_AYU_EVENT)
 #endif
 	{
 		job->job_id = STARPU_ATOMIC_ADDL(&job_cnt, 1);
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event)
-		{
-			/* Declare task to Ayudame */
-			int64_t AYU_data[2] = {_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
-			AYU_event(AYU_ADDTASK, job->job_id, AYU_data);
-		}
-#endif
+		STARPU_AYU_ADDTASK(job->job_id, task);
 	}
 	if (max_memory_use)
 	{
@@ -418,7 +408,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * data as it's not linked to an actual worker */
 	/* control task should not execute post_exec_hook */
-	if(j->task_size == 1 && task->cl != NULL && !j->internal
+	if(j->task_size == 1 && task->cl != NULL && task->cl->where != STARPU_NOWHERE && !j->internal
 #ifdef STARPU_OPENMP
 	/* If this is a continuation, we do not execute the post_exec_hook. The
 	 * post_exec_hook will be run only when the continued task fully
@@ -468,11 +458,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		j->terminated = 2;
 	}
 	STARPU_PTHREAD_COND_BROADCAST(&j->sync_cond);
-
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event) AYU_event(AYU_REMOVETASK, j->job_id, NULL);
-#endif
-
+	STARPU_AYU_REMOVETASK(j->job_id);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
 	if (detach && !continuation)
@@ -491,14 +477,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		STARPU_ASSERT_MSG((detach && !destroy && !task->synchronous)
 				|| continuation
 				, "Regenerated task must be detached (was %d), and not have detroy=1 (was %d) or synchronous=1 (was %d)", detach, destroy, task->synchronous);
-
-#ifdef HAVE_AYUDAME_H
-		if (AYU_event)
-		{
-			int64_t AYU_data[2] = {j->exclude_from_dag?0:_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
-			AYU_event(AYU_ADDTASK, j->job_id, AYU_data);
-		}
-#endif
+		STARPU_AYU_ADDTASK(j->job_id, j->exclude_from_dag?NULL:task);
 
 		{
 #ifdef STARPU_OPENMP

+ 1 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -50,7 +50,7 @@
 #endif
 
 #define SIZE	(32*1024*1024*sizeof(char))
-#define NITER	128
+#define NITER	32
 
 #ifndef STARPU_SIMGRID
 static void _starpu_bus_force_sampling(void);

+ 13 - 8
src/core/perfmodel/perfmodel_print.c

@@ -295,22 +295,27 @@ int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t
 	{
 		struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
 		int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
-		struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][0];
-		struct starpu_perfmodel_history_list *ptr;
+		struct starpu_perfmodel_per_arch *arch_model;
+		struct starpu_perfmodel_history_list *ptr = NULL;
 
-		for (ptr = arch_model->list; ptr; ptr = ptr->next)
+		if (comb >= 0 && model->state->per_arch[comb])
 		{
-			struct starpu_perfmodel_history_entry *entry = ptr->entry;
-			if (entry->footprint == footprint)
+			arch_model = &model->state->per_arch[comb][0];
+
+			for (ptr = arch_model->list; ptr; ptr = ptr->next)
 			{
-				fprintf(output, "%s%e", workerid?" ":"", entry->mean);
-				break;
+				struct starpu_perfmodel_history_entry *entry = ptr->entry;
+				if (entry->footprint == footprint)
+				{
+					fprintf(output, "%s%e", workerid?" ":"", entry->mean);
+					break;
+				}
 			}
 		}
 		if (!ptr)
 		{
 			/* Didn't find any entry :/ */
-			fprintf(output, "%sNaN", workerid?" ":"");
+			fprintf(output, "%sinf", workerid?" ":"");
 		}
 	}
 	return 0;

+ 44 - 23
src/core/sched_ctx.c

@@ -450,9 +450,10 @@ static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
 	{
-		worker = _starpu_get_worker_struct(workers->get_next(workers, &it));
-		if(!starpu_worker_is_combined_worker(worker->workerid))
+		unsigned workerid = workers->get_next(workers, &it);
+		if(!starpu_worker_is_combined_worker(workerid))
 		{
+			worker = _starpu_get_worker_struct(workerid);
 			hwloc_bitmap_or(sched_ctx->hwloc_workers_set,
 					sched_ctx->hwloc_workers_set,
 					worker->hwloc_cpu_set);
@@ -469,7 +470,8 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 						   int min_prio_set, int min_prio,
 						   int max_prio_set, int max_prio,
 						   unsigned awake_workers,
-						   void (*sched_policy_init)(void))
+						   void (*sched_policy_init)(void),
+						   void * user_data)
 {
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
 
@@ -515,6 +517,7 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 	sched_ctx->perf_arch.devices = NULL;
 	sched_ctx->perf_arch.ndevices = 0;
 	sched_ctx->init_sched = sched_policy_init;
+	sched_ctx->user_data = user_data;
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -713,7 +716,7 @@ unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const
 	for(i = 0; i < nw; i++)
 		printf("%d ", workers[i]);
 	printf("\n");
-	sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1, NULL);
+	sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1, NULL, NULL);
 	sched_ctx->min_ncpus = min_ncpus;
 	sched_ctx->max_ncpus = max_ncpus;
 	sched_ctx->min_ngpus = min_ngpus;
@@ -739,6 +742,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 	int max_prio_set = 0;
 	int min_prio = 0;
 	int max_prio = 0;
+	void *user_data = NULL;
 	struct starpu_sched_policy *sched_policy = NULL;
 	unsigned hierarchy_level = 0;
 	unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS;
@@ -784,6 +788,10 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 		{
 			init_sched = va_arg(varg_list, void(*)(void));
 		}
+		else if (arg_type == STARPU_SCHED_CTX_USER_DATA)
+		{
+			user_data = va_arg(varg_list, void *);
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -795,7 +803,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 	if (workerids && nworkers != -1)
 	{
 		/* Make sure the user doesn't use invalid worker IDs. */
-		unsigned num_workers = starpu_worker_get_count();
+		int num_workers = starpu_worker_get_count();
 		int i;
 		for (i = 0; i < nworkers; i++)
 		{
@@ -808,7 +816,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 	}
 
 	struct _starpu_sched_ctx *sched_ctx = NULL;
-	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched);
+	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data);
 	sched_ctx->hierarchy_level = hierarchy_level;
 	sched_ctx->nesting_sched_ctx = nesting_sched_ctx;
 
@@ -832,6 +840,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 	int max_prio_set = 0;
 	int min_prio = 0;
 	int max_prio = 0;
+	void *user_data = NULL;
 	struct starpu_sched_policy *sched_policy = NULL;
 	unsigned hierarchy_level = 0;
 	unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS;
@@ -888,6 +897,11 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 			arg_i++;
 			init_sched = arglist[arg_i];
 		}
+		else if (arg_type == STARPU_SCHED_CTX_USER_DATA)
+		{
+			arg_i++;
+			user_data = arglist[arg_i];
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
@@ -898,7 +912,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 	if (workerids && nworkers != -1)
 	{
 		/* Make sure the user doesn't use invalid worker IDs. */
-		unsigned num_workers = starpu_worker_get_count();
+		int num_workers = starpu_worker_get_count();
 		int i;
 		for (i = 0; i < nworkers; i++)
 		{
@@ -911,7 +925,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 	}
 
 	struct _starpu_sched_ctx *sched_ctx = NULL;
-	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched);
+	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data);
 	sched_ctx->hierarchy_level = hierarchy_level;
 	sched_ctx->nesting_sched_ctx = nesting_sched_ctx;
 
@@ -1651,6 +1665,13 @@ void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f)
 	free(workerids);
 }
 
+unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	*workerids = sched_ctx->workers->workerids;
+	return sched_ctx->workers->nworkers;
+}
+
 unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
@@ -1757,17 +1778,11 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
         struct starpu_worker_collection *workers = sched_ctx->workers;
 	if(workers)
 	{
-		int worker;
-
-		struct starpu_sched_ctx_iterator it;
+		unsigned i;
 
-		workers->init_iterator(workers, &it);
-		while(workers->has_next(workers, &it))
-		{
-			worker = workers->get_next(workers, &it);
-			if(worker == workerid)
+		for (i = 0; i < workers->nworkers; i++)
+			if (workerid == workers->workerids[i])
 				return 1;
-		}
 	}
 	return 0;
 }
@@ -1777,12 +1792,11 @@ unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype ar
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	int worker;
 
-	struct starpu_sched_ctx_iterator it;
+	unsigned i;
 
-	workers->init_iterator(workers, &it);
-	while(workers->has_next(workers, &it))
+	for (i = 0; i < workers->nworkers; i++)
 	{
-		worker = workers->get_next(workers, &it);
+		worker = workers->workerids[i];
 		enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
 		if(curr_arch == arch)
 			return 1;
@@ -2164,7 +2178,7 @@ void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task,
 	if (_starpu_get_nsched_ctxs() > 1)
 	{
 		int curr_workerid = starpu_worker_get_id();
-		struct _starpu_worker *curr_worker_str, *worker_str;
+		struct _starpu_worker *curr_worker_str = NULL, *worker_str;
 		if(curr_workerid != -1)
 		{
 			curr_worker_str = _starpu_get_worker_struct(curr_workerid);
@@ -2201,7 +2215,7 @@ void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, uns
 	if (_starpu_get_nsched_ctxs() > 1)
 	{
 		int curr_workerid = starpu_worker_get_id();
-		struct _starpu_worker *curr_worker_str, *worker_str;
+		struct _starpu_worker *curr_worker_str = NULL, *worker_str;
 		if(curr_workerid != -1)
 		{
 			curr_worker_str = _starpu_get_worker_struct(curr_workerid);
@@ -2575,3 +2589,10 @@ unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *
 	*awake_workers = sched_ctx->awake_workers;
 	return sched_ctx->sched_policy != NULL;
 }
+
+void *starpu_sched_ctx_get_used_data(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	STARPU_ASSERT(sched_ctx != NULL);
+	return sched_ctx->user_data;
+}

+ 4 - 1
src/core/sched_ctx.h

@@ -51,6 +51,9 @@ struct _starpu_sched_ctx
 	/* data necessary for the policy */
 	void *policy_data;
 
+	/* pointer for application use */
+	void *user_data;
+
 	struct starpu_worker_collection *workers;
 
 	/* we keep an initial sched which we never delete */
@@ -181,7 +184,7 @@ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config);
 /* allocate all structures belonging to a context */
 struct _starpu_sched_ctx*  _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name,
 						    int min_prio_set, int min_prio,
-						    int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_init)(void));
+						    int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_init)(void), void *user_data);
 
 /* delete all sched_ctx */
 void _starpu_delete_all_sched_ctxs();

+ 2 - 9
src/core/sched_policy.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010-2015  CNRS
- * Copyright (C) 2011  INRIA
+ * Copyright (C) 2011, 2016  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -413,14 +413,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 
 	unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task);
 	task->status = STARPU_TASK_READY;
-
-#ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-	{
-		intptr_t id = -1;
-		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
-	}
-#endif
+	STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1);
 	/* if the context does not have any workers save the tasks in a temp list */
 	if(!sched_ctx->is_initial_sched)
 	{

+ 0 - 0
src/core/simgrid.h


Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff