Browse Source

merge from trunk

Corentin Salingue 8 years ago
parent
commit
a4530061c2
60 changed files with 487 additions and 179 deletions
  1. 2 0
      ChangeLog
  2. 1 1
      configure.ac
  3. 3 3
      doc/doxygen/chapters/301_tasks.doxy
  4. 15 0
      doc/doxygen/chapters/390_faq.doxy
  5. 15 0
      doc/doxygen/chapters/410_mpi_support.doxy
  6. 29 0
      doc/doxygen/chapters/501_environment_variables.doxy
  7. 9 0
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  8. 4 4
      doc/doxygen/chapters/api/insert_task.doxy
  9. 3 1
      examples/Makefile.am
  10. 2 2
      examples/mlr/mlr.c
  11. 3 3
      examples/pi/pi.c
  12. 2 2
      examples/pi/pi_redux.c
  13. 3 1
      examples/sched_ctx/nested_sched_ctxs.c
  14. 5 3
      examples/sched_ctx/parallel_code.c
  15. 3 3
      examples/sched_ctx/two_cpu_contexts.c
  16. 3 1
      examples/spmv/matrix_market/example_read.c
  17. 5 3
      examples/stencil/implicit-stencil-kernels.c
  18. 1 1
      examples/stencil/life_opencl.c
  19. 5 1
      examples/stencil/stencil-kernels.c
  20. 3 1
      examples/stencil/stencil-tasks.c
  21. 3 3
      gcc-plugin/examples/cholesky/cholesky.c
  22. 3 3
      include/starpu_rand.h
  23. 4 1
      include/starpu_task.h
  24. 26 1
      mpi/src/starpu_mpi.c
  25. 2 0
      mpi/src/starpu_mpi_private.h
  26. 1 1
      socl/examples/basicsplit/basicsplit.c
  27. 6 3
      socl/examples/clinfo/clinfo.c
  28. 2 2
      socl/src/cl_createbuffer.c
  29. 2 1
      src/common/utils.h
  30. 2 2
      src/core/dependencies/tags.c
  31. 1 1
      src/core/sched_ctx.c
  32. 15 9
      src/core/sched_policy.c
  33. 7 4
      src/core/simgrid.c
  34. 7 2
      src/core/topology.c
  35. 43 10
      src/core/workers.c
  36. 13 1
      src/datawizard/memalloc.c
  37. 4 1
      src/datawizard/memalloc.h
  38. 2 3
      src/drivers/cuda/driver_cuda.c
  39. 5 33
      src/drivers/driver_common/driver_common.c
  40. 7 0
      src/sched_policies/component_perfmodel_select.c
  41. 2 2
      src/sched_policies/component_worker.c
  42. 8 2
      src/sched_policies/deque_modeling_policy_data_aware.c
  43. 2 2
      src/sched_policies/graph_test_policy.c
  44. 8 2
      src/sched_policies/parallel_heft.c
  45. 28 4
      src/sched_policies/work_stealing_policy.c
  46. 19 1
      src/util/misc.c
  47. 6 3
      src/util/starpu_task_insert.c
  48. 3 1
      tests/Makefile.am
  49. 32 22
      tests/coverage/coverage.sh
  50. 4 0
      tests/datawizard/allocate.c
  51. 2 2
      tests/datawizard/simgrid-locality.c
  52. 4 0
      tests/disk/mem_reclaim.c
  53. 2 3
      tests/loader.c
  54. 10 10
      tests/main/combined_workers/bfs/bfs.cpp
  55. 5 5
      tests/main/insert_task_value.c
  56. 6 3
      tools/Makefile.am
  57. 66 0
      tools/cppcheck/suppressions.txt
  58. 5 3
      tools/starpu_fxt_stats.c
  59. 3 2
      tools/starpu_lp2paje.c
  60. 1 1
      tools/starpu_machine_display.c

+ 2 - 0
ChangeLog

@@ -42,6 +42,8 @@ New features:
     files.
   * Add STARPU_FXT_TRACE environment variable.
   * Add starpu_data_set_user_data and starpu_data_get_user_data.
+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
+    execution of just one MPI node.
 
 StarPU 1.2.0 (svn revision 18521)
 ==============================================

+ 1 - 1
configure.ac

@@ -86,7 +86,7 @@ AC_OPENMP
 #c++11 detection
 AX_CXX_COMPILE_STDCXX(11,noext,optional)
 
-AC_SUBST([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
+AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11)
 AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
 if test $HAVE_CXX11 -eq 1; then
   AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])

+ 3 - 3
doc/doxygen/chapters/301_tasks.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -371,7 +371,7 @@ void func_cpu(void *descr[], void *_args)
         int ifactor;
         float ffactor;
 
-        starpu_codelet_unpack_args(_args, &ifactor, NULL);
+        starpu_codelet_unpack_args(_args, &ifactor, 0);
         starpu_codelet_unpack_args(_args, &ifactor, &ffactor);
 }
 \endcode
@@ -383,7 +383,7 @@ void func_cpu(void *descr[], void *_args)
         float ffactor;
 	char buffer[100];
 
-        starpu_codelet_unpack_args_and_copyleft(_args, buffer, 100, &ifactor, NULL);
+        starpu_codelet_unpack_args_and_copyleft(_args, buffer, 100, &ifactor, 0);
         starpu_codelet_unpack_args(buffer, &ffactor);
 }
 \endcode

+ 15 - 0
doc/doxygen/chapters/390_faq.doxy

@@ -226,6 +226,21 @@ starpu_resume();
 starpu_shutdown();
 \endcode
 
+\section GPUEatingCores When running with CUDA or OpenCL devices, I am seeing less CPU cores
+
+Yes, this is on purpose.
+
+Since GPU devices are way faster than CPUs, StarPU needs to react quickly when
+a task is finished, to feed the GPU with another task (StarPU actually submits
+a couple of tasks in advance so as to pipeline this, but filling the pipeline
+still has to be happening often enough), and thus it has to dedicate threads for
+this, and this is a very CPU-consuming duty. StarPU thus dedicates one CPU core
+for driving each GPU.
+
+Such dedication is also useful when a codelet is hybrid, i.e. while kernels are
+running on the GPU, the codelet can run some computation, which thus be run by
+the CPU core instead of driving the GPU.
+
 \section CUDADrivers StarPU does not see my CUDA device
 
 First make sure that CUDA is properly running outside StarPU: build and

+ 15 - 0
doc/doxygen/chapters/410_mpi_support.doxy

@@ -501,6 +501,21 @@ If the distribution function is not too complex and the compiler is very good,
 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
 the cost of task submission.
 
+To estimate quickly how long task submission takes, and notably how much pruning
+saves, a quick and easy way is to measure the submission time of just one of the
+MPI nodes. This can be achieved by running the application on just one MPI node
+with the following environment variables:
+
+\code
+export STARPU_DISABLE_KERNELS=1
+export STARPU_MPI_FAKE_RANK=2
+export STARPU_MPI_FAKE_SIZE=1024
+\endcode
+
+Here we have disabled the kernel function call to skip the actual computation
+time and only keep submission time, and we have asked StarPU to fake running on
+MPI node 2 out of 1024 nodes.
+
 A function starpu_mpi_task_build() is also provided with the aim to
 only construct the task structure. All MPI nodes need to call the
 function, only the node which is to execute the task will return a

+ 29 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 </dd>
 
+<dt>STARPU_MPI_FAKE_SIZE</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_SIZE
+\addindex __env__STARPU_MPI_FAKE_SIZE
+Setting to a number makes StarPU believe that there are as many MPI nodes, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
+<dt>STARPU_MPI_FAKE_RANK</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_RANK
+\addindex __env__STARPU_MPI_FAKE_RANK
+Setting to a number makes StarPU believe that it runs the given MPI node, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dd>
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST
@@ -743,6 +763,15 @@ GPUs (or in main memory, when using out of core), when performing an asynchronou
 writeback pass. The default is 10%.
 </dd>
 
+<dt>STARPU_DIDUSE_BARRIER</dt>
+<dd>
+\anchor STARPU_DIDUSE_BARRIER
+\addindex __env__STARPU_DIDUSE_BARRIER
+When set to 1, StarPU will never evict a piece of data if it has not been used
+by at least one task. This avoids odd behaviors under high memory pressure, but
+can lead to deadlocks, so is to be considered experimental only.
+</dd>
+
 <dt>STARPU_DISK_SWAP</dt>
 <dd>
 \anchor STARPU_DISK_SWAP

+ 9 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -853,6 +853,15 @@ This function returns the task currently executed by the
 worker, or <c>NULL</c> if it is called either from a thread that is not a
 task or simply because there is no task being executed at the moment.
 
+\fn const char *starpu_task_get_name(struct starpu_task *task)
+\ingroup API_Codelet_And_Tasks
+This function returns the name of \p task, i.e. either its task->name field, or
+the name of the corresponding performance model.
+
+\fn const char *starpu_task_get_model_name(struct starpu_task *task)
+\ingroup API_Codelet_And_Tasks
+This function returns the name of the performance model of \p task.
+
 \fn void starpu_codelet_display_stats(struct starpu_codelet *cl)
 \ingroup API_Codelet_And_Tasks
 Output on stderr some statistics on the codelet \p cl.

+ 4 - 4
doc/doxygen/chapters/api/insert_task.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -140,13 +140,13 @@ starpu_codelet_unpack_args().
 \ingroup API_Insert_Task
 Retrieve the arguments of type ::STARPU_VALUE associated to a
 task automatically created using the function starpu_task_insert(). If
-some parameter is NULL, unpacking will stop there and ignore the remaining
+any parameter's value is 0, unpacking will stop there and ignore the remaining
 parameters.
 
 \fn void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...)
 \ingroup API_Insert_Task
-Similar to starpu_codelet_unpack_args(), but if some parameter is
-NULL, copy the part of cl_arg that has not been read in buffer which
+Similar to starpu_codelet_unpack_args(), but if any parameter is
+0, copy the part of cl_arg that has not been read in buffer which
 can then be used in a later call to one of the unpack functions.
 
 \fn struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...)

+ 3 - 1
examples/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
+# Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
 # Copyright (C) 2011  Télécom-SudParis
 # Copyright (C) 2011-2012  INRIA
@@ -23,8 +23,10 @@ include $(top_srcdir)/starpu.mk
 if STARPU_SIMGRID
 STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
 STARPU_HOSTNAME=mirage
+MALLOC_PERTURB_=0
 export STARPU_PERF_MODEL_DIR
 export STARPU_HOSTNAME
+export MALLOC_PERTURB_
 endif
 
 AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused

+ 2 - 2
examples/mlr/mlr.c

@@ -119,14 +119,14 @@ static struct starpu_perfmodel cl_model_final = {
 
 static struct starpu_codelet cl_init = {
 	.cpu_funcs = { cpu_func },
-	.cpu_funcs_name = { "mlr_codelet_init" },
+	.cpu_funcs_name = { "cpu_func" },
 	.nbuffers = 0,
 	.model = &cl_model_init,
 };
 
 static struct starpu_codelet cl_final = {
 	.cpu_funcs = { cpu_func },
-	.cpu_funcs_name = { "mlr_codelet_final" },
+	.cpu_funcs_name = { "cpu_func" },
 	.nbuffers = 0,
 	.model = &cl_model_final,
 };

+ 3 - 3
examples/pi/pi.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -146,7 +146,7 @@ int main(int argc, char **argv)
 	starpu_vector_data_register(&sobol_qrng_direction_handle, STARPU_MAIN_RAM,
 		(uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned));
 
-	unsigned *cnt_array = malloc(ntasks*sizeof(unsigned));
+	unsigned *cnt_array = calloc(ntasks, sizeof(unsigned));
 	STARPU_ASSERT(cnt_array);
 	starpu_data_handle_t cnt_array_handle;
 	starpu_vector_data_register(&cnt_array_handle, STARPU_MAIN_RAM, (uintptr_t)cnt_array, ntasks, sizeof(unsigned));
@@ -161,7 +161,7 @@ int main(int argc, char **argv)
 		.filter_func = starpu_vector_filter_block,
 		.nchildren = ntasks
 	};
-	
+
 	starpu_data_partition(cnt_array_handle, &f);
 
 	double start;

+ 2 - 2
examples/pi/pi_redux.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2015  Université de Bordeaux
+ * Copyright (C) 2010-2015, 2017  Université de Bordeaux
  * Copyright (C) 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -158,7 +158,7 @@ void pi_func_cpu(void *descr[], void *cl_arg STARPU_ATTRIBUTE_UNUSED)
 	unsigned short *worker_xsub;
 	worker_xsub = &xsubi[PADDING*workerid];
 
-	struct drand48_data *buffer;
+	starpu_drand48_data *buffer;
 	buffer = &randbuffer[PADDING*workerid];
 
 	unsigned long local_cnt = 0;

+ 3 - 1
examples/sched_ctx/nested_sched_ctxs.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2010-2014, 2016  CNRS
+ * Copyright (C) 2010-2014, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -51,8 +51,10 @@ int parallel_code(int sched_ctx)
 static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 	int w = starpu_worker_get_id();
+	(void) w;
 	unsigned sched_ctx = (uintptr_t)arg;
 	int n = parallel_code(sched_ctx);
+	(void) n;
 	//printf("w %d executed %d it \n", w, n);
 }
 

+ 5 - 3
examples/sched_ctx/parallel_code.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2014, 2016  Université de Bordeaux
- * Copyright (C) 2010-2016  CNRS
+ * Copyright (C) 2010-2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -87,8 +87,8 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
-	int nprocs1 = 1;
-	int nprocs2 = 1;
+	int nprocs1;
+	int nprocs2;
 	int *procs1, *procs2;
 
 #ifdef STARPU_USE_CPU
@@ -103,6 +103,8 @@ int main(int argc, char **argv)
 	for(j = nprocs1; j < nprocs1+nprocs2; j++)
 		procs2[k++] = j;
 #else
+	nprocs1 = 1;
+	nprocs2 = 1;
 	procs1 = (int*)malloc(nprocs1*sizeof(int));
 	procs2 = (int*)malloc(nprocs2*sizeof(int));
 	procs1[0] = 0;

+ 3 - 3
examples/sched_ctx/two_cpu_contexts.c

@@ -85,14 +85,14 @@ int main(int argc, char **argv)
 	for (i=0; i < n; i++)
 	{
 		int arg_id = 1*1000 + i;
-		ret = starpu_insert_task(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx1, NULL);
+		ret = starpu_task_insert(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx1, 0);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
 	for (i=0; i < n; i++)
 	{
 		int arg_id = 2*1000 + i;
-		ret = starpu_insert_task(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx2, NULL);
+		ret = starpu_task_insert(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx2, 0);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
@@ -101,7 +101,7 @@ int main(int argc, char **argv)
 	for (i=0; i < n; i++)
 	{
 		int arg_id = 1*10000 + i;
-		ret = starpu_insert_task(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx1, NULL);
+		ret = starpu_task_insert(&sched_ctx_codelet, STARPU_VALUE, &arg_id, sizeof(int), STARPU_SCHED_CTX, sched_ctx1, 0);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 

+ 3 - 1
examples/spmv/matrix_market/example_read.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  CNRS
+ * Copyright (C) 2010, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -13,6 +13,7 @@
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
+
 #include "mm_to_bcsr.h"
 
 int main(int argc, char *argv[])
@@ -30,6 +31,7 @@ int main(int argc, char *argv[])
 
 	bcsr_t *bcsr;
 	bcsr = mm_file_to_bcsr(argv[1], c, r);
+	(void) bcsr;
 
 	return 0;
 }

+ 5 - 3
examples/stencil/implicit-stencil-kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2012, 2013, 2016  CNRS
+ * Copyright (C) 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -571,6 +571,7 @@ static void dummy_func_bottom_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *
 	unsigned z;
 	starpu_codelet_unpack_args(arg, &z);
 	struct block_description *block = get_block_description(z);
+	(void) block;
 
 	int workerid = starpu_worker_get_id_check();
 	bottom_per_worker[workerid]++;
@@ -605,9 +606,10 @@ static void dummy_func_top_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *a
 /* bottom save, OPENCL version */
 static void dummy_func_bottom_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
-    unsigned z;
-    starpu_codelet_unpack_args(arg, &z);
+	unsigned z;
+	starpu_codelet_unpack_args(arg, &z);
 	struct block_description *block = get_block_description(z);
+	(void) block;
 
 	int workerid = starpu_worker_get_id_check();
 	bottom_per_worker[workerid]++;

+ 1 - 1
examples/stencil/life_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
- * Copyright (C) 2016  CNRS
+ * Copyright (C) 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 5 - 1
examples/stencil/stencil-kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2012, 2013, 2016  CNRS
+ * Copyright (C) 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -520,6 +520,7 @@ void dummy_func_top_cpu(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 void dummy_func_bottom_cpu(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 	struct block_description *block = (struct block_description *) arg;
+	(void) block;
 	int workerid = starpu_worker_get_id_check();
 	bottom_per_worker[workerid]++;
 
@@ -550,6 +551,7 @@ static void dummy_func_top_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg
 static void dummy_func_bottom_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 	struct block_description *block = (struct block_description *) arg;
+	(void) block;
 	int workerid = starpu_worker_get_id_check();
 	bottom_per_worker[workerid]++;
 
@@ -565,6 +567,7 @@ static void dummy_func_bottom_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *
 static void dummy_func_top_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 	struct block_description *block = (struct block_description *) arg;
+	(void) block;
 	int workerid = starpu_worker_get_id_check();
 	top_per_worker[workerid]++;
 
@@ -581,6 +584,7 @@ static void dummy_func_top_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *a
 static void dummy_func_bottom_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 	struct block_description *block = (struct block_description *) arg;
+	(void) block;
 	int workerid = starpu_worker_get_id_check();
 	bottom_per_worker[workerid]++;
 

+ 3 - 1
examples/stencil/stencil-tasks.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2013-2015  Université de Bordeaux
- * Copyright (C) 2012, 2013, 2015  CNRS
+ * Copyright (C) 2012, 2013, 2015, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -78,6 +78,7 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, int local
 static void send_done(void *arg)
 {
 	uintptr_t z = (uintptr_t) arg;
+	(void) z;
 	DEBUG("DO SEND %d\n", (int)z);
 }
 
@@ -106,6 +107,7 @@ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int lo
 static void recv_done(void *arg)
 {
 	uintptr_t z = (uintptr_t) arg;
+	(void) z;
 	DEBUG("DO RECV %d\n", (int)z);
 }
 

+ 3 - 3
gcc-plugin/examples/cholesky/cholesky.c

@@ -137,7 +137,7 @@ int main(int argc, char **argv)
 		{
 			for(x=0 ; x<nblocks ; x++)
 			{
-                                printf("Block %d,%d :\n", x, y);
+                                printf("Block %u,%u :\n", x, y);
 				for (j = 0; j < BLOCKSIZE; j++)
 				{
 					for (i = 0; i < BLOCKSIZE; i++)
@@ -163,7 +163,7 @@ int main(int argc, char **argv)
 		{
 			for(x=0 ; x<nblocks ; x++)
 			{
-                                printf("Block %d,%d :\n", x, y);
+                                printf("Block %u,%u :\n", x, y);
 				for (j = 0; j < BLOCKSIZE; j++)
 				{
 					for (i = 0; i < BLOCKSIZE; i++)
@@ -237,7 +237,7 @@ int main(int argc, char **argv)
 									float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
 									float err = abs(test_mat[j +i*size] - orig);
 									if (err > 0.00001) {
-										fprintf(stderr, "Error[%d, %d] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
+										fprintf(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
 										correctness = 0;
 										break;
 									}

+ 3 - 3
include/starpu_rand.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012, 2013  CNRS
+ * Copyright (C) 2012, 2013, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -48,7 +48,7 @@ typedef int starpu_drand48_data;
 #    define starpu_srand48_r(seed, buffer)		srand48(starpu_seed(seed))
 #    define starpu_drand48_r(buffer, result)		do {*(result) = drand48(); } while (0)
 #    define starpu_lrand48_r(buffer, result)		do {*(result) = lrand48(); } while (0)
-#    define starpu_erand48_r(xsubi, buffer, result)	do {*(result) = erand48(xsubi); } while (0)
+#    define starpu_erand48_r(xsubi, buffer, result)	do {(void) buffer; *(result) = erand48(xsubi); } while (0)
 #  endif
 #else
 typedef int starpu_drand48_data;
@@ -57,7 +57,7 @@ typedef int starpu_drand48_data;
 #  define starpu_lrand48() 				rand()
 #  define starpu_erand48(xsubi)				starpu_drand48()
 #  define starpu_srand48_r(seed, buffer) 		srand(starpu_seed(seed))
-#  define starpu_erand48_r(xsubi, buffer, result)	do {*(result) = ((double)(rand()) / RAND_MAX);} while (0)
+#  define starpu_erand48_r(xsubi, buffer, result)	do {(void) xsubi; (void) buffer; *(result) = ((double)(rand()) / RAND_MAX);} while (0)
 #endif
 
 #ifdef __cplusplus

+ 4 - 1
include/starpu_task.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011, 2014, 2016  INRIA
@@ -322,6 +322,9 @@ void starpu_codelet_display_stats(struct starpu_codelet *cl);
 
 struct starpu_task *starpu_task_get_current(void);
 
+const char *starpu_task_get_model_name(struct starpu_task *task);
+const char *starpu_task_get_name(struct starpu_task *task);
+
 void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid);
 void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size);
 

+ 26 - 1
mpi/src/starpu_mpi.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2016  Inria
  *
@@ -77,6 +77,8 @@ static int running = 0;
 static int _mpi_world_size;
 static int _mpi_world_rank;
 #endif
+int _starpu_mpi_fake_world_size = -1;
+int _starpu_mpi_fake_world_rank = -1;
 
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 	_STARPU_MPI_LOG_OUT();
 }
 
+static void nop_acquire_cb(void *arg)
+{
+	starpu_data_release(arg);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
 							      int srcdst, int data_tag, MPI_Comm comm,
 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 {
 	struct _starpu_mpi_req *req;
 
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
+		return NULL;
+	}
+
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_mpi_world_size = worldsize;
 	_mpi_world_rank = rank;
 #endif
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
 
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 {
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		*size = _starpu_mpi_fake_world_size;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*size = _mpi_world_size;
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
 {
+	if (_starpu_mpi_fake_world_rank != -1)
+	{
+		*rank = _starpu_mpi_fake_world_rank;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*rank = _mpi_world_rank;

+ 2 - 0
mpi/src/starpu_mpi_private.h

@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 void _starpu_mpi_set_debug_level_min(int level);
 void _starpu_mpi_set_debug_level_max(int level);
 #endif
+extern int _starpu_mpi_fake_world_size;
+extern int _starpu_mpi_fake_world_rank;
 
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)

+ 1 - 1
socl/examples/basicsplit/basicsplit.c

@@ -203,7 +203,7 @@ int main(int UNUSED(argc), char** UNUSED(argv)) {
   cl_uint niter = 15;
 
    for (i=0; i<niter; i++) {
-      printf("Iteration %d...\n", i);
+      printf("Iteration %u...\n", i);
      add(cq, SIZE, s1, s2, d, 0, NULL, NULL);
       printf("Finishing iteration...\n");
       clFinish(cq);

+ 6 - 3
socl/examples/clinfo/clinfo.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010,2011 University of Bordeaux
+ * Copyright (C) 2010,2011, 2017 University of Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -92,7 +92,7 @@ main(void) {
          printf("  Plaform Name:\t\t\t\t\t %s\n", str);
 
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
-         checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
+         checkErr(err, "clGetDeviceIDs(CL_DEVICE_TYPE_ALL)");
          if (num_devices == 0) {
             printf("  No devices found\n");
             continue;
@@ -101,7 +101,10 @@ main(void) {
          cl_device_id devices[num_devices];
 
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
-         checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
+         if (err == CL_DEVICE_NOT_FOUND)
+            num_devices = 0;
+         else
+            checkErr(err, "clGetDeviceIDs(CL_DEVICE_TYPE_ALL)");
 
          printf("  Number of devices:\t\t\t\t %d\n", num_devices);
          {

+ 2 - 2
socl/src/cl_createbuffer.c

@@ -127,8 +127,8 @@ soclCreateBuffer(cl_context   context,
    }
 
    // Access mode
-   mem->mode = flags & CL_MEM_READ_ONLY  ? CL_MEM_READ_ONLY :
-               flags & CL_MEM_WRITE_ONLY ? CL_MEM_WRITE_ONLY : CL_MEM_READ_WRITE;
+   mem->mode = (flags & CL_MEM_READ_ONLY) ? CL_MEM_READ_ONLY :
+	   (flags & CL_MEM_WRITE_ONLY) ? CL_MEM_WRITE_ONLY : CL_MEM_READ_WRITE;
 
    // Perform data copy if necessary
    if (flags & CL_MEM_COPY_HOST_PTR)

+ 2 - 1
src/common/utils.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -107,6 +107,7 @@
 #endif
 
 /* TODO: cache */
+#define _STARPU_MSG(fmt, ...) do { fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); } while(0)
 #define _STARPU_DISP(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); }} while(0)
 #define _STARPU_ERROR(fmt, ...)                                                  \
 	do {                                                                          \

+ 2 - 2
src/core/dependencies/tags.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2013, 2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2016  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -108,7 +108,7 @@ static void _starpu_tag_free(void *_tag)
 			struct _starpu_cg *cg = tag->tag_successors.succ[succ];
 
 			unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1);
-			unsigned remaining STARPU_ATTRIBUTE_UNUSED = STARPU_ATOMIC_ADD(&cg->remaining, -1);
+			unsigned STARPU_ATTRIBUTE_UNUSED remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1);
 
 			if (!ntags && (cg->cg_type == STARPU_CG_TAG))
 				/* Last tag this cg depends on, cg becomes unreferenced */

+ 1 - 1
src/core/sched_ctx.c

@@ -2682,7 +2682,7 @@ unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *
 	return sched_ctx->sched_policy != NULL;
 }
 
-void *starpu_sched_ctx_get_used_data(unsigned sched_ctx_id)
+void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	STARPU_ASSERT(sched_ctx != NULL);

+ 15 - 9
src/core/sched_policy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
- * Copyright (C) 2010-2016  CNRS
+ * Copyright (C) 2010-2017  CNRS
  * Copyright (C) 2011, 2016  INRIA
  * Copyright (C) 2016  Uppsala University
  *
@@ -115,9 +115,12 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 	if (!policy_name)
 		return NULL;
 
-	if (strncmp(policy_name, "heft", 5) == 0)
+	if (strcmp(policy_name, "") == 0)
+		return NULL;
+
+	if (strncmp(policy_name, "heft", 4) == 0)
 	{
-		_STARPU_DISP("Warning: heft is now called \"dmda\".\n");
+		_STARPU_MSG("Warning: heft is now called \"dmda\".\n");
 		return &_starpu_sched_dmda_policy;
 	}
 
@@ -135,7 +138,7 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 		}
 	}
 	if (strcmp(policy_name, "help") != 0)
-	     fprintf(stderr, "Warning: scheduling policy \"%s\" was not found, try \"help\" to get a list\n", policy_name);
+		_STARPU_MSG("Warning: scheduling policy '%s' was not found, try 'help' to get a list\n", policy_name);
 
 	/* nothing was found */
 	return NULL;
@@ -167,8 +170,12 @@ struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_c
 	if(required_policy)
 		selected_policy = find_sched_policy_from_name(required_policy);
 
+	/* If there is a policy that matches the required name, return it */
+	if (selected_policy)
+		return selected_policy;
+
 	/* First, we check whether the application explicitely gave a scheduling policy or not */
-	if (!selected_policy && user_conf && (user_conf->sched_policy))
+	if (user_conf && (user_conf->sched_policy))
 		return user_conf->sched_policy;
 
 	/* Otherwise, we look if the application specified the name of a policy to load */
@@ -176,15 +183,14 @@ struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_c
 	sched_pol_name = starpu_getenv("STARPU_SCHED");
 	if (sched_pol_name == NULL && user_conf && user_conf->sched_policy_name)
 		sched_pol_name = user_conf->sched_policy_name;
-
-	if (!selected_policy && sched_pol_name)
+	if (sched_pol_name)
 		selected_policy = find_sched_policy_from_name(sched_pol_name);
 
-	/* Perhaps there was no policy that matched the name */
+	/* If there is a policy that matches the name, return it */
 	if (selected_policy)
 		return selected_policy;
 
-	/* If no policy was specified, we use the greedy policy as a default */
+	/* If no policy was specified, we use the eager policy by default */
 	return &_starpu_sched_eager_policy;
 }
 

+ 7 - 4
src/core/simgrid.c

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012-2016  Université de Bordeaux
+ * Copyright (C) 2012-2017  Université de Bordeaux
  * Copyright (C) 2016  	    Inria
- * Copyright (C) 2016  	    CNRS
+ * Copyright (C) 2016, 2017  	    CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,6 +25,9 @@
 #include <core/perfmodel/perfmodel.h>
 #include <core/workers.h>
 #include <core/simgrid.h>
+#if defined(HAVE_SG_LINK_NAME) && (SIMGRID_VERSION_MAJOR >= 4 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 13))
+#include <simgrid/simdag.h>
+#endif
 
 #ifdef STARPU_SIMGRID
 #include <sys/resource.h>
@@ -101,7 +104,7 @@ int _starpu_simgrid_get_nbhosts(const char *prefix)
 		STARPU_ASSERT(starpu_mpi_world_rank);
 		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%u", starpu_mpi_world_rank());
 		hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name));
-		len = snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix);
+		snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix);
 		prefix = new_prefix;
 		len = strlen(prefix);
 	}
@@ -719,7 +722,7 @@ _starpu_simgrid_get_memnode_host(unsigned node)
 
 void _starpu_simgrid_count_ngpus(void)
 {
-#if defined(HAVE_SG_LINK_NAME) && SIMGRID_VERSION_MAJOR >= 4 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 13)
+#if defined(HAVE_SG_LINK_NAME) && (SIMGRID_VERSION_MAJOR >= 4 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 13))
 	unsigned src, dst;
 	msg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
 

+ 7 - 2
src/core/topology.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016 CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 CNRS
  * Copyright (C) 2011, 2016  INRIA
  * Copyright (C) 2016  Uppsala University
  *
@@ -128,6 +128,7 @@ _starpu_get_worker_from_driver(struct starpu_driver *d)
 #endif
 
 			default:
+				(void) worker;
 				_STARPU_DEBUG("Invalid device type\n");
 				return NULL;
 			}
@@ -530,7 +531,7 @@ _starpu_deallocate_topology_userdata(hwloc_obj_t obj)
 	STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1);
 	free(data);
 	for (i = 0; i < obj->arity; i++)
-		_starpu_allocate_topology_userdata(obj->children[i]);
+		_starpu_deallocate_topology_userdata(obj->children[i]);
 }
 #endif
 #endif
@@ -556,7 +557,11 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 #ifndef STARPU_SIMGRID
 #ifdef STARPU_HAVE_HWLOC
 	hwloc_topology_init(&topology->hwtopology);
+#if HWLOC_API_VERSION >= 0x20000
+	hwloc_topology_set_io_types_filter(topology->hwtopology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
+#else
 	hwloc_topology_set_flags(topology->hwtopology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
+#endif
 	hwloc_topology_load(topology->hwtopology);
 	_starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
 #endif

+ 43 - 10
src/core/workers.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2009-2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011  INRIA
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011-2012, 2016  INRIA
@@ -21,6 +21,9 @@
 
 #include <stdlib.h>
 #include <stdio.h>
+#ifdef __linux__
+#include <sys/utsname.h>
+#endif
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/graph.h>
@@ -1145,18 +1148,27 @@ static void _fill_tree(struct starpu_tree *tree, hwloc_obj_t curr_obj, unsigned
 static void _starpu_build_tree(void)
 {
 #ifdef STARPU_HAVE_HWLOC
+	hwloc_topology_t cpu_topo;
 	struct starpu_tree *tree;
 	_STARPU_MALLOC(tree, sizeof(struct starpu_tree));
 	_starpu_config.topology.tree = tree;
 
-	hwloc_obj_t root = hwloc_get_root_obj(_starpu_config.topology.hwtopology);
+	hwloc_topology_init(&cpu_topo);
+#if HWLOC_API_VERSION >= 0x20000
+	hwloc_topology_set_all_types_filter(cpu_topo, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
+#else
+	hwloc_topology_ignore_all_keep_structure(cpu_topo);
+#endif
+	hwloc_topology_load(cpu_topo);
+	hwloc_obj_t root = hwloc_get_root_obj(cpu_topo);
 
 /* 	char string[128]; */
 /* 	hwloc_obj_snprintf(string, sizeof(string), topology, root, "#", 0); */
 /* 	printf("%*s%s %d is_pu = %d \n", 0, "", string, root->logical_index, root->type == HWLOC_OBJ_PU); */
 
-	/* level, is_pu, is in the tree (it will be true only after add*/
-	_fill_tree(tree, root, 0, _starpu_config.topology.hwtopology, NULL);
+	/* level, is_pu, is in the tree (it will be true only after add) */
+	_fill_tree(tree, root, 0, cpu_topo, NULL);
+	hwloc_topology_destroy(cpu_topo);
 #endif
 }
 
@@ -1265,6 +1277,15 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 #ifdef STARPU_MODEL_DEBUG
 	_STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
 #endif
+#ifdef __linux__
+	{
+		struct utsname buf;
+		if (uname(&buf) == 0
+		 && (!strncmp(buf.release, "4.7.", 4)
+		  || !strncmp(buf.release, "4.8.", 4)))
+			_STARPU_DISP("Warning: This system is running a 4.7 or 4.8 kernel. These have a severe scheduling performance regression issue, please upgrade to at least 4.9.\n");
+	}
+#endif
 #endif
 
 	if (starpu_getenv("STARPU_ENABLE_STATS"))
@@ -1759,6 +1780,13 @@ int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
         case STARPU_MPI_WORKER:
             return _starpu_config.topology.nmpidevices;
 
+		case STARPU_ANY_WORKER:
+			return _starpu_config.topology.ncpus+
+			       _starpu_config.topology.ncudagpus+
+			       _starpu_config.topology.nopenclgpus+
+			       _starpu_config.topology.nmicdevices+
+			       _starpu_config.topology.nsccdevices+
+		           _starpu_config.topology.nmpidevices;
 		default:
 			return -EINVAL;
 	}
@@ -1953,7 +1981,7 @@ unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *wo
 	unsigned id;
 	for (id = 0; id < nworkers; id++)
 	{
-		if (starpu_worker_get_type(id) == type)
+		if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
 		{
 			/* Perhaps the array is too small ? */
 			if (cnt >= maxsize)
@@ -1975,7 +2003,7 @@ int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
 	unsigned id;
 	for (id = 0; id < nworkers; id++)
 	{
-		if (starpu_worker_get_type(id) == type)
+		if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
 		{
 			if (num == cnt)
 				return id;
@@ -2069,7 +2097,8 @@ int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum star
 	unsigned id;
 	for (id = 0; id < nworkers; id++)
 	{
-		if (_starpu_config.workers[id].devid == devid && _starpu_config.workers[id].arch == type)
+		if (_starpu_config.workers[id].devid == devid &&
+		    (type == STARPU_ANY_WORKER || _starpu_config.workers[id].arch == type))
 			workerids[nw++] = id;
 	}
 	return nw;
@@ -2129,7 +2158,7 @@ int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *worker
 	unsigned id;
 	for (id = 0; id < nworkers; id++)
 	{
-		if (starpu_worker_get_type(id) == type)
+		if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
 		{
 			/* Perhaps the array is too small ? */
 			if (cnt >= maxsize)
@@ -2150,7 +2179,7 @@ int starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, in
 
 	for (id = 0; id < nworkers; id++)
 	{
-		if (starpu_worker_get_type(id) == type)
+		if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
 		{
 			/* Perhaps the array is too small ? */
 			if (cnt >= maxsize)
@@ -2213,6 +2242,7 @@ starpu_driver_run(struct starpu_driver *d)
 		return _starpu_run_opencl(worker);
 #endif
 	default:
+		(void) worker;
 		_STARPU_DEBUG("Invalid device type\n");
 		return -EINVAL;
 	}
@@ -2239,6 +2269,7 @@ starpu_driver_init(struct starpu_driver *d)
 		return _starpu_opencl_driver_init(worker);
 #endif
 	default:
+		(void) worker;
 		return -EINVAL;
 	}
 }
@@ -2264,6 +2295,7 @@ starpu_driver_run_once(struct starpu_driver *d)
 		return _starpu_opencl_driver_run_once(worker);
 #endif
 	default:
+		(void) worker;
 		return -EINVAL;
 	}
 }
@@ -2289,6 +2321,7 @@ starpu_driver_deinit(struct starpu_driver *d)
 		return _starpu_opencl_driver_deinit(worker);
 #endif
 	default:
+		(void) worker;
 		return -EINVAL;
 	}
 }

+ 13 - 1
src/datawizard/memalloc.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2016  Inria
  *
@@ -33,6 +33,9 @@ static unsigned target_clean_p;
 /* Whether CPU memory has been explicitly limited by user */
 static int limit_cpu_mem;
 
+/* Prevent memchunks from being evicted from memory before they are actually used */
+static int diduse_barrier;
+
 /* This per-node RW-locks protect mc_list and memchunk_cache entries */
 /* Note: handle header lock is always taken before this (normal add/remove case) */
 static struct _starpu_spinlock mc_lock[STARPU_MAXNODES];
@@ -133,6 +136,7 @@ void _starpu_init_mem_chunk_lists(void)
 	minimum_clean_p = starpu_get_env_number_default("STARPU_MINIMUM_CLEAN_BUFFERS", 5);
 	target_clean_p = starpu_get_env_number_default("STARPU_TARGET_CLEAN_BUFFERS", 10);
 	limit_cpu_mem = starpu_get_env_number("STARPU_LIMIT_CPU_MEM");
+	diduse_barrier = starpu_get_env_number_default("STARPU_DIDUSE_BARRIER", 0);
 }
 
 void _starpu_deinit_mem_chunk_lists(void)
@@ -443,6 +447,10 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 	if ((int) node == handle->home_node)
 		return 0;
 
+	if (diduse_barrier && !mc->diduse)
+		/* Hasn't been used yet, avoid evicting it */
+		return 0;
+
 	/* REDUX memchunk */
 	if (mc->relaxed_coherency == 2)
 	{
@@ -1160,6 +1168,7 @@ static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_repli
 	mc->chunk_interface = NULL;
 	mc->size_interface = interface_size;
 	mc->remove_notify = NULL;
+	mc->diduse = 0;
 
 	return mc;
 }
@@ -1455,6 +1464,7 @@ void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
 		/* user-allocated memory */
 		return;
 	_starpu_spin_lock(&mc_lock[node]);
+	mc->diduse = 1;
 	MC_LIST_ERASE(node, mc);
 	MC_LIST_PUSH_BACK(node, mc);
 	_starpu_spin_unlock(&mc_lock[node]);
@@ -1468,6 +1478,8 @@ void _starpu_memchunk_wont_use(struct _starpu_mem_chunk *mc, unsigned node)
 		/* user-allocated memory */
 		return;
 	_starpu_spin_lock(&mc_lock[node]);
+	/* Avoid preventing it from being evicted */
+	mc->diduse = 1;
 	MC_LIST_ERASE(node, mc);
 	/* Caller will schedule a clean transfer */
 	mc->clean = 1;

+ 4 - 1
src/datawizard/memalloc.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
+ * Copyright (C) 2009-2010, 2012-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -73,6 +73,9 @@ LIST_TYPE(_starpu_mem_chunk,
 	 * remove this entry from the mc_list, so we know we have to restart
 	 * from zero. This is protected by the corresponding mc_lock.  */
 	struct _starpu_mem_chunk **remove_notify;
+
+	/* Was this chunk used since it got allocated?  */
+	int diduse;
 )
 
 void _starpu_init_mem_chunk_lists(void);

+ 2 - 3
src/drivers/cuda/driver_cuda.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016  Uppsala University
  *
@@ -267,8 +267,6 @@ done:
 
 static void init_device_context(unsigned devid, unsigned memnode)
 {
-	unsigned i;
-
 #ifndef STARPU_SIMGRID
 	cudaError_t cures;
 
@@ -351,6 +349,7 @@ static void init_device_context(unsigned devid, unsigned memnode)
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 
+	unsigned i;
 	for (i = 0; i < ncudagpus; i++)
 	{
 		cures = starpu_cudaStreamCreate(&in_peer_transfer_streams[i][devid]);

+ 5 - 33
src/drivers/driver_common/driver_common.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010-2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2014, 2016  INRIA
  *
@@ -428,10 +428,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int w
 		_starpu_worker_set_status_sleeping(workerid);
 
 		if (_starpu_worker_can_block(memnode, worker)
-#ifndef STARPU_SIMGRID
-				&& !_starpu_sched_ctx_last_worker_awake(worker)
-#endif
-				)
+			&& !_starpu_sched_ctx_last_worker_awake(worker))
 		{
 			STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
@@ -440,18 +437,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int w
 		{
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 			if (_starpu_machine_is_running())
-			{
 				_starpu_exponential_backoff(worker);
-#ifdef STARPU_SIMGRID
-				static int warned;
-				if (!warned)
-				{
-					warned = 1;
-					_STARPU_DISP("Has to make simgrid spin for CPU idle time.  You can try to pass --enable-blocking-drivers to ./configure to avoid this\n");
-				}
-				MSG_process_sleep(0.000010);
-#endif
-			}
 		}
 
 		return NULL;
@@ -583,10 +569,7 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 		_starpu_worker_set_status_sleeping(workerid);
 
 		if (_starpu_worker_can_block(memnode, worker)
-#ifndef STARPU_SIMGRID
-				&& !_starpu_sched_ctx_last_worker_awake(worker)
-#endif
-				)
+				&& !_starpu_sched_ctx_last_worker_awake(worker))
 		{
 			STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
@@ -595,25 +578,14 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 		{
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 			if (_starpu_machine_is_running())
-			{
 				_starpu_exponential_backoff(worker);
-#ifdef STARPU_SIMGRID
-				static int warned;
-				if (!warned)
-				{
-					warned = 1;
-					_STARPU_DISP("Has to make simgrid spin for CPU idle time.  You can try to pass --enable-blocking-drivers to ./configure to avoid this\n");
-				}
-				MSG_process_sleep(0.000010);
-#endif
-			}
 		}
 		return 0;
 	}
 
 	_starpu_worker_set_status_wakeup(workerid);
 	worker->spinning_backoff = BACKOFF_MIN;
-#endif /* STARPU_SIMGRID */
+#endif /* !STARPU_SIMGRID */
 
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[0].sched_mutex);
 #endif /* !STARPU_NON_BLOCKING_DRIVERS */

+ 7 - 0
src/sched_policies/component_perfmodel_select.c

@@ -44,7 +44,14 @@ static int perfmodel_select_push_task(struct starpu_sched_component * component,
 	if(can_execute)
 	{
 		if(isnan(length))
+		{
+			static int warned;
+			if (!warned) {
+				warned = 1;
+				_STARPU_DISP("Warning: performance model for %s not finished calibrating, using a dumb scheduling heuristic for now\n",starpu_task_get_name(task));
+			}
 			return starpu_sched_component_push_task(component,data->calibrator_component,task);
+		}
 		if(_STARPU_IS_ZERO(length))
 			return starpu_sched_component_push_task(component,data->no_perfmodel_component,task);
 		return starpu_sched_component_push_task(component,data->perfmodel_component,task);

+ 2 - 2
src/sched_policies/component_worker.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011-2013  INRIA
  * Copyright (C) 2013  Simon Archipoff
@@ -683,7 +683,7 @@ static int combined_worker_push_task(struct starpu_sched_component * component,
 		task_alias[i]->task->workerid = combined_worker->combined_workerid[i];
 		task_alias[i]->left = task_alias[i-1];
 		task_alias[i - 1]->right = task_alias[i];
-		task_alias[i]->pntasks = &task_alias[0]->ntasks;
+		task_alias[i]->pntasks = &(task_alias[0]->ntasks);
 	}
 
 	starpu_pthread_mutex_t * mutex_to_unlock = NULL;

+ 8 - 2
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011-2012, 2016  INRIA
@@ -562,11 +562,17 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 				best_impl = nimpl;
 			}
 
-			if (isnan(local_length))
+			if (isnan(local_length)) {
 				/* we are calibrating, we want to speed-up calibration time
 				 * so we privilege non-calibrated tasks (but still
 				 * greedily distribute them to avoid dumb schedules) */
+				static int warned;
+				if (!warned) {
+					warned = 1;
+					_STARPU_DISP("Warning: performance model for %s not finished calibrating on worker %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), worker);
+				}
 				calibrating = 1;
+			}
 
 			if (isnan(local_length) || _STARPU_IS_ZERO(local_length))
 				/* there is no prediction available for that task

+ 2 - 2
src/sched_policies/graph_test_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010-2013, 2016  CNRS
  * Copyright (C) 2011  INRIA
  *
@@ -124,7 +124,7 @@ static struct _starpu_prio_deque *select_prio(unsigned sched_ctx_id, struct _sta
 		if (length == 0.)
 		{
 			_STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n",
-					_starpu_job_get_task_name(_starpu_get_job_associated_to_task(task)));
+					starpu_task_get_name(task));
 			power = 0.;
 		}
 		else

+ 8 - 2
src/sched_policies/parallel_heft.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012 INRIA
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016  CNRS
  *
@@ -374,11 +374,17 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 				nimpl_best = nimpl;
 			}
 
-			if (isnan(local_task_length[worker_ctx][nimpl]))
+			if (isnan(local_task_length[worker_ctx][nimpl])) {
+				static int warned;
+				if (!warned) {
+					warned = 1;
+					_STARPU_DISP("Warning: performance model for %s not finished calibrating on %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), worker);
+				}
 				/* we are calibrating, we want to speed-up calibration time
 				 * so we privilege non-calibrated tasks (but still
 				 * greedily distribute them to avoid dumb schedules) */
 				calibrating = 1;
+			}
 
 			if (isnan(local_task_length[worker_ctx][nimpl])
 					|| _STARPU_IS_ZERO(local_task_length[worker_ctx][nimpl]))

+ 28 - 4
src/sched_policies/work_stealing_policy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2016  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -133,7 +133,8 @@ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsi
 		 * estimation */
 		ntasks = ws->per_worker[workerids[worker]].queue_array->ntasks;
 
-		if (ntasks && ws->per_worker[workerids[worker]].busy)
+		if (ntasks && (ws->per_worker[workerids[worker]].busy
+					   || starpu_worker_is_blocked(workerids[worker])))
 			break;
 
 		worker = (worker + 1) % nworkers;
@@ -538,6 +539,14 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	{
 		/* there was a local task */
 		ws->per_worker[workerid].busy = 1;
+		starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid);
+		unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id);
+		if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS)
+		{
+			starpu_sched_ctx_move_task_to_ctx(task, child_sched_ctx, 1, 1);
+			starpu_sched_ctx_revert_task_counters(sched_ctx_id, task->flops);
+			return NULL;
+		}
 		return task;
 	}
 
@@ -571,6 +580,7 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	{
 		_STARPU_TRACE_WORK_STEALING(workerid, victim);
 		_STARPU_TASK_BREAK_ON(task, sched);
+		starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, victim);
 		record_data_locality(task, workerid);
 		record_worker_locality(ws, task, workerid, sched_ctx_id);
 		locality_popped_task(ws, task, victim, sched_ctx_id);
@@ -580,6 +590,16 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	/* Done with stealing, resynchronize with core */
 	STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex);
 
+	if (task)
+	{
+		unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id);
+		if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS)
+		{
+			starpu_sched_ctx_move_task_to_ctx(task, child_sched_ctx, 1, 1);
+			starpu_sched_ctx_revert_task_counters(sched_ctx_id, task->flops);
+			return NULL;
+		}
+	}
 	ws->per_worker[workerid].busy = !!task;
 	return task;
 }
@@ -589,10 +609,12 @@ int ws_push_task(struct starpu_task *task)
 {
 	unsigned sched_ctx_id = task->sched_ctx;
 	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int workerid = -1;
+	int workerid;
 
 #ifdef USE_LOCALITY
 	workerid = select_worker_locality(ws, task, sched_ctx_id);
+#else
+	workerid = -1;
 #endif
 	if (workerid == -1)
 		workerid = starpu_worker_get_id();
@@ -618,6 +640,7 @@ int ws_push_task(struct starpu_task *task)
 
 	starpu_push_task_end(task);
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex);
+	starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, workerid);
 
 #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID)
 	/* TODO: implement fine-grain signaling, similar to what eager does */
@@ -724,7 +747,8 @@ static int lws_select_victim(struct _starpu_work_stealing_data *ws, unsigned sch
 	{
 		int neighbor = ws->per_worker[workerid].proxlist[i];
 		int ntasks = ws->per_worker[neighbor].queue_array->ntasks;
-		if (ntasks && ws->per_worker[neighbor].busy)
+		if (ntasks && (ws->per_worker[neighbor].busy
+					   || starpu_worker_is_blocked(neighbor)))
 			return neighbor;
 	}
 	return -1;

+ 19 - 1
src/util/misc.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012, 2015  Université de Bordeaux
+ * Copyright (C) 2012, 2015, 2017  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,3 +55,21 @@ const char *_starpu_job_get_task_name(struct _starpu_job *j)
 	else
 		return _starpu_job_get_model_name(j);
 }
+
+const char *starpu_task_get_model_name(struct starpu_task *task)
+{
+	if (!task)
+		return NULL;
+
+	return _starpu_codelet_get_model_name(task->cl);
+}
+
+const char *starpu_task_get_name(struct starpu_task *task)
+{
+	if (!task)
+		return NULL;
+	if (task->name)
+		return task->name;
+	else
+		return starpu_task_get_model_name(task);
+}

+ 6 - 3
src/util/starpu_task_insert.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012, 2014-2016  Université de Bordeaux
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2012, 2014-2017  Université de Bordeaux
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -46,7 +46,10 @@ void _starpu_codelet_unpack_args_and_copyleft(char *cl_arg, void *_buffer, size_
 		void *argptr = va_arg(varg_list, void *);
 
 		/* If not reading all cl_args */
-		if(argptr == NULL)
+		// NULL was the initial end marker, we now use 0
+		// 0 and NULL should be the same value, but we
+		// keep both equalities for systems on which they could be different
+		if(argptr == 0 || argptr == NULL)
 			break;
 
 		size_t arg_size;

+ 3 - 1
tests/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
+# Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
 # Copyright (C) 2010, 2011, 2012  INRIA
 #
@@ -20,8 +20,10 @@ include $(top_srcdir)/starpu.mk
 if STARPU_SIMGRID
 STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
 STARPU_HOSTNAME=mirage
+MALLOC_PERTURB_=0
 export STARPU_PERF_MODEL_DIR
 export STARPU_HOSTNAME
+export MALLOC_PERTURB_
 endif
 
 AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused

+ 32 - 22
tests/coverage/coverage.sh

@@ -2,7 +2,7 @@
 
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010  Université de Bordeaux
+# Copyright (C) 2010, 2017  Université de Bordeaux
 # Copyright (C) 2010, 2011  CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -28,33 +28,43 @@ check_success()
 
 apps()
 {
-    echo "block opencl"
-    STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/block
-    check_success $?
+    if [ -f $exampledir/basic_examples/block ] ; then
+	echo "block opencl"
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/block
+	check_success $?
+    fi
 
-    echo "variable opencl"
-    STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable 100
-    check_success $?
+    if [ -f $exampledir/basic_examples/variable ] ; then
+	echo "variable opencl"
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable 100
+	check_success $?
 
-    echo "variable no worker"
-    STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable
-    check_success $?
+	echo "variable no worker"
+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable
+	check_success $?
+    fi
 
-    echo "incrementer opencl"
-    STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer 10
-    check_success $?
+    if [ -f $exampledir/incrementer/incrementer ] ; then
+	echo "incrementer opencl"
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer 10
+	check_success $?
 
-    echo "incrementer no worker"
-    STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer
-    check_success $?
+	echo "incrementer no worker"
+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer
+	check_success $?
+    fi
 
-    echo "tag_example"
-    $exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
-    check_success $?
+    if [ -f $exampledir/tag_example/tag_example ] ; then
+	echo "tag_example"
+	$exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
+	check_success $?
+    fi
 
-    echo "tag_example2"
-    $exampledir/tag_example/tag_example2 -iter 64 -i 128
-    check_success $?
+    if [ -f $exampledir/tag_example/tag_example2 ] ; then
+	echo "tag_example2"
+	$exampledir/tag_example/tag_example2 -iter 64 -i 128
+	check_success $?
+    fi
 
     if [ -f $exampledir/cholesky/dw_cholesky ] ; then
 	echo "chol.dm"

+ 4 - 0
tests/datawizard/allocate.c

@@ -42,6 +42,10 @@ int test_prefetch(unsigned memnodes)
 	unsigned i;
 	starpu_ssize_t available_size;
 
+	if (starpu_get_env_number_default("STARPU_DIDUSE_BARRIER", 0))
+		/* This would hang */
+		return STARPU_TEST_SKIPPED;
+
 	buffers[0] = malloc(SIZE_ALLOC*1024*512);
 	STARPU_ASSERT(buffers[0]);
 

+ 2 - 2
tests/datawizard/simgrid-locality.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2016 Université de Bordeaux
+ * Copyright (C) 2016-2017 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,7 +17,7 @@
 /* Check that defining a main makes starpu use MSG_process_attach. */
 #include "locality.c"
 #include <config.h>
-#if defined(HAVE_MSG_PROCESS_ATTACH) && SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 14)
+#if defined(HAVE_MSG_PROCESS_ATTACH) && SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
 #undef main
 int main(int argc, char *argv[]) {
 	return starpu_main(argc, argv);

+ 4 - 0
tests/disk/mem_reclaim.c

@@ -142,6 +142,10 @@ int dotest(struct starpu_disk_ops *ops, char *base, void (*vector_data_register)
 	int *A, *C;
 	starpu_data_handle_t handles[NDATA];
 
+	if (starpu_get_env_number_default("STARPU_DIDUSE_BARRIER", 0))
+		/* This would hang */
+		return STARPU_TEST_SKIPPED;
+
 	FPRINTF(stderr, "Testing <%s>\n", text);
 	/* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */
 	struct starpu_conf conf;

+ 2 - 3
tests/loader.c

@@ -237,13 +237,12 @@ int main(int argc, char *argv[])
 		test_args = (char *) calloc(150, sizeof(char));
 		sprintf(test_args, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR);
 	}
-
-	if (strstr(test_name, "starpu_perfmodel_display"))
+	else if (strstr(test_name, "starpu_perfmodel_display"))
 	{
 		test_args = (char *) calloc(5, sizeof(char));
 		sprintf(test_args, "-l");
 	}
-	if (strstr(test_name, "starpu_perfmodel_plot"))
+	else if (strstr(test_name, "starpu_perfmodel_plot"))
 	{
 		test_args = (char *) calloc(5, sizeof(char));
 		sprintf(test_args, "-l");

+ 10 - 10
tests/main/combined_workers/bfs/bfs.cpp

@@ -54,10 +54,10 @@ void read_file(char *input_f, unsigned int *nb_nodes, unsigned int *nb_edges,
 	fscanf(fp, "%u", nb_nodes);
 
 	// allocate host memory
-	*origin_graph_nodes = (Node *) malloc(sizeof(Node) * (*nb_nodes));
-	*origin_graph_mask = (bool *) malloc(sizeof(bool) * (*nb_nodes));
-	*origin_updating_graph_mask = (bool *) malloc(sizeof(bool) * (*nb_nodes));
-	*origin_graph_visited = (bool *) malloc(sizeof(bool) * (*nb_nodes));
+	*origin_graph_nodes = malloc(sizeof(Node) * (*nb_nodes));
+	*origin_graph_mask = malloc(sizeof(bool) * (*nb_nodes));
+	*origin_updating_graph_mask = malloc(sizeof(bool) * (*nb_nodes));
+	*origin_graph_visited = malloc(sizeof(bool) * (*nb_nodes));
 
 	int start, edgeno;
 	// initalize the memory
@@ -156,12 +156,12 @@ int main( int argc, char** argv)
 		  &origin_graph_mask, &origin_updating_graph_mask,
 		  &origin_graph_visited, &origin_graph_edges, &origin_cost);
 
-	graph_nodes = (Node *) calloc(nb_nodes, sizeof(Node));
-	graph_mask = (bool *) calloc(nb_nodes, sizeof(bool));
-	updating_graph_mask = (bool *) calloc(nb_nodes, sizeof(bool));
-	graph_visited = (bool *) calloc(nb_nodes, sizeof(bool));
-	graph_edges = (int*) calloc(nb_edges, sizeof(int));
-	cost = (int*) calloc(nb_nodes, sizeof(int));
+	graph_nodes = calloc(nb_nodes, sizeof(Node));
+	graph_mask = calloc(nb_nodes, sizeof(bool));
+	updating_graph_mask = calloc(nb_nodes, sizeof(bool));
+	graph_visited = calloc(nb_nodes, sizeof(bool));
+	graph_edges = calloc(nb_edges, sizeof(int));
+	cost = calloc(nb_nodes, sizeof(int));
 
 	memcpy(graph_nodes, origin_graph_nodes, nb_nodes*sizeof(Node));
 	memcpy(graph_edges, origin_graph_edges, nb_edges*sizeof(int));

+ 5 - 5
tests/main/insert_task_value.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015, 2016  CNRS
+ * Copyright (C) 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,7 +43,7 @@ void func_cpu_int_float_multiple_unpack(void *descr[], void *_args)
 	float ffactor;
 	(void) descr;
 
-	starpu_codelet_unpack_args(_args, ifactor, NULL);
+	starpu_codelet_unpack_args(_args, ifactor, 0);
 	starpu_codelet_unpack_args(_args, ifactor, &ffactor);
 
 	FPRINTF(stderr, "[func_cpu_int_float_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor);
@@ -60,7 +60,7 @@ void func_cpu_int_float_unpack_copyleft(void *descr[], void *_args)
 
 	buffer_size = sizeof(int)+sizeof(float)+sizeof(size_t);
 	buffer = calloc(buffer_size, 1);
-	starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, ifactor, NULL);
+	starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, ifactor, 0);
 	starpu_codelet_unpack_args(buffer, &ffactor);
 
 	FPRINTF(stderr, "[func_cpu_int_float_unpack_copyleft] Values %d - %3.2f\n", ifactor[0], ffactor);
@@ -86,7 +86,7 @@ void func_cpu_float_int_multiple_unpack(void *descr[], void *_args)
 	float ffactor;
 	(void) descr;
 
-	starpu_codelet_unpack_args(_args, &ffactor, NULL);
+	starpu_codelet_unpack_args(_args, &ffactor, 0);
 	starpu_codelet_unpack_args(_args, &ffactor, ifactor);
 
 	FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor);
@@ -103,7 +103,7 @@ void func_cpu_float_int_unpack_copyleft(void *descr[], void *_args)
 
 	buffer_size = sizeof(int)+2048*sizeof(int)+sizeof(size_t);
 	buffer = calloc(buffer_size, 1);
-	starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, &ffactor, NULL);
+	starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, &ffactor, 0);
 	starpu_codelet_unpack_args(buffer, ifactor);
 
 	FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor);

+ 6 - 3
tools/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+# Copyright (C) 2009-2017  Université de Bordeaux
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2016  Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -20,8 +20,10 @@ include $(top_srcdir)/starpu.mk
 if STARPU_SIMGRID
 STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
 STARPU_HOSTNAME=mirage
+MALLOC_PERTURB_=0
 export STARPU_PERF_MODEL_DIR
 export STARPU_HOSTNAME
+export MALLOC_PERTURB_
 endif
 
 SUBDIRS =
@@ -154,7 +156,8 @@ EXTRA_DIST =				\
 	msvc/starpu_exec.bat		\
 	msvc/starpu_var.bat		\
 	msvc/starpu.sln			\
-	msvc/starpu/starpu.vcxproj
+	msvc/starpu/starpu.vcxproj	\
+	cppcheck/suppressions.txt
 
 CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log figure/* mlr_*
 

+ 66 - 0
tools/cppcheck/suppressions.txt

@@ -0,0 +1,66 @@
+memleakOnRealloc
+knownConditionTrueFalse
+variableScope
+unusedStructMember
+ConfigurationNotChecked
+
+*:tools/dev/*
+*:starpufft/*
+*:min-dgels/*
+*:starpu-top/*
+*:socl/src/CL/*
+
+// TODO. We should cppcheck the code
+*:sc_hypervisor/*
+
+varFuncNullUB:examples/sched_ctx/two_cpu_contexts.c:76
+negativeIndex:examples/stencil/stencil-tasks.c:200
+negativeIndex:examples/stencil/stencil-tasks.c:203
+constStatement:examples/stencil/*
+
+unreadVariable:tests/openmp/*
+unusedLabel:tests/datawizard/gpu_register.c
+unusedLabel:tests/datawizard/gpu_ptr_register.c
+redundantAssignment:tests/datawizard/interfaces/test_interfaces.c:752
+redundantAssignment:tests/datawizard/mpi_like_async.c:165
+redundantAssignment:tests/datawizard/mpi_like_async.c:211
+unusedPrivateFunction:tests/main/combined_workers/bfs/timer.h:45
+redundantAssignment:tests/main/driver_api/init_run_deinit.c
+redundantAssignment:tests/main/driver_api/run_driver.c
+
+uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:155
+unreadVariable:mpi/src/starpu_mpi.c:849
+redundantAssignment:src/core/workers.c
+
+invalidPointerCast:src/core/perfmodel/perfmodel_nan.c:74
+unreadVariable:src/core/dependencies/tags.c:111
+uselessAssignmentPtrArg:src/core/sched_ctx_list.c:144
+unusedStructMember:src/core/perfmodel/perfmodel_bus.c:62
+unusedStructMember:src/core/perfmodel/perfmodel_bus.c:63
+unusedStructMember:src/core/perfmodel/perfmodel_bus.c:64
+unusedStructMember:src/core/perfmodel/perfmodel_bus.c:65
+unusedStructMember:src/core/perfmodel/perfmodel_bus.c:66
+unusedStructMember:src/core/simgrid.c:225
+unusedStructMember:src/core/simgrid.c:226
+duplicateExpression:src/util/starpu_task_insert.c:52
+
+// TODO: this could be an error?
+redundantCopy:src/core/disk_ops/disk_leveldb.cpp:192
+
+nullPointerRedundantCheck:src/common/rbtree.c
+wrongPrintfScanfArgNum:src/core/simgrid.c:715
+unreadVariable:src/datawizard/interfaces/*
+unreadVariable:src/drivers/driver_common/driver_common.c:482
+clarifyCondition:src/drivers/opencl/driver_opencl.c:936
+unreadVariable:src/drivers/opencl/driver_opencl.c:767
+clarifyCondition:src/drivers/cuda/driver_cuda.c:506
+arithOperationsOnVoidPointer:src/drivers/scc/*
+nullPointerRedundantCheck:src/sched_policies/deque_modeling_policy_data_aware.c:197
+sizeofDereferencedVoidPointer:src/util/fstarpu.c
+
+allocaCalled:gcc-plugin/src/*
+unusedVariable:gcc-plugin/tests/*
+unreadVariable:gcc-plugin/tests/*
+duplicateExpression:gcc-plugin/src/*
+
+pointerSize:socl/src/cl_getcontextinfo.c:33

+ 5 - 3
tools/starpu_fxt_stats.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2011, 2012, 2013, 2014, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -193,12 +193,14 @@ int main(int argc, char **argv)
 		{
 			if (transfers[src][dst] != 0)
 			{
-				fprintf(fd_out, "%d -> %d \t %ld MB\n", src, dst, transfers[src][dst]/(1024*1024));
+				fprintf(fd_out, "%u -> %u \t %lu MB\n", src, dst, (unsigned long)(transfers[src][dst]/(1024*1024)));
 			}
 		}
 	}
 
-	fprintf(fd_out, "There was %d tasks and %d work stealing\n", njob, nws);
+	fprintf(fd_out, "There was %u tasks and %u work stealing\n", njob, nws);
+	if (fd_out != stdout)
+		fclose(fd_out);
 
 	return 0;
 }

+ 3 - 2
tools/starpu_lp2paje.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
- * Copyright (C) 2014, 2015, 2016                  CNRS
+ * Copyright (C) 2014, 2015, 2016, 2017                  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
 {
 	int nw, nt;
 	double tmax;
-	int i, w, ww, t, tt, t2;
+	int i, w, ww, t, tt;
 	int foo;
 	double bar;
 
@@ -142,6 +142,7 @@ int main(int argc, char *argv[])
 
 		for (t = 0; t < nt; t++)
 		{
+			int t2;
 			for (t2 = 0; t2 < nt; t2++)
 			{
 				if (t != t2 && task[t].worker == task[t2].worker)

+ 1 - 1
tools/starpu_machine_display.c

@@ -178,7 +178,7 @@ int main(int argc, char **argv)
 	display_worker_names(STARPU_OPENCL_WORKER);
 
 #ifdef STARPU_USE_MIC
-	fprintf(stdout, "\t%d MIC cores (from %d devices)\n", nmiccores, nmicdevs);
+	fprintf(stdout, "\t%u MIC cores (from %u devices)\n", nmiccores, nmicdevs);
 	display_worker_names(STARPU_MIC_WORKER);
 #endif