Browse Source

merge trunk

Nathalie Furmento 7 years ago
parent
commit
6eb01070c2
66 changed files with 567 additions and 541 deletions
  1. 2 0
      ChangeLog
  2. 9 1
      configure.ac
  3. 3 2
      doc/doxygen/chapters/470_simgrid.doxy
  4. 3 0
      doc/doxygen/chapters/api/performance_model.doxy
  5. 3 1
      examples/lu/complex_double.h
  6. 3 1
      examples/lu/complex_float.h
  7. 3 1
      examples/lu/lu-double.h
  8. 3 1
      examples/lu/lu-float.h
  9. 7 18
      examples/lu/xlu_kernels.c
  10. 2 0
      include/starpu_perfmodel.h
  11. 2 1
      mpi/examples/comm/comm.c
  12. 2 1
      mpi/examples/comm/mix_comm.c
  13. 1 16
      mpi/examples/complex/mpi_complex.c
  14. 1 16
      mpi/examples/stencil/stencil5.c
  15. 2 1
      mpi/examples/stencil/stencil5_lb.c
  16. 9 1
      mpi/src/mpi/starpu_mpi_early_data.c
  17. 2 0
      mpi/tests/Makefile.am
  18. 3 14
      mpi/tests/cache.c
  19. 1 12
      mpi/tests/cache_disable.c
  20. 1 12
      mpi/tests/callback.c
  21. 3 16
      mpi/tests/early_request.c
  22. 1 12
      mpi/tests/insert_task.c
  23. 1 14
      mpi/tests/insert_task_block.c
  24. 1 12
      mpi/tests/insert_task_compute.c
  25. 1 12
      mpi/tests/insert_task_count.c
  26. 1 14
      mpi/tests/insert_task_dyn_handles.c
  27. 1 12
      mpi/tests/insert_task_node_choice.c
  28. 5 16
      mpi/tests/insert_task_owner.c
  29. 2 13
      mpi/tests/insert_task_owner2.c
  30. 1 12
      mpi/tests/insert_task_owner_data.c
  31. 1 12
      mpi/tests/insert_task_recv_cache.c
  32. 1 12
      mpi/tests/insert_task_sent_cache.c
  33. 137 0
      mpi/tests/insert_task_seq.c
  34. 1 12
      mpi/tests/matrix.c
  35. 1 12
      mpi/tests/matrix2.c
  36. 5 18
      mpi/tests/mpi_reduction.c
  37. 1 14
      mpi/tests/mpi_scatter_gather.c
  38. 1 12
      mpi/tests/policy_register.c
  39. 2 13
      mpi/tests/policy_selection.c
  40. 1 12
      mpi/tests/policy_selection2.c
  41. 1 12
      mpi/tests/ring.c
  42. 1 12
      mpi/tests/ring_async.c
  43. 1 12
      mpi/tests/ring_async_implicit.c
  44. 1 12
      mpi/tests/ring_sync.c
  45. 1 12
      mpi/tests/ring_sync_detached.c
  46. 1 12
      mpi/tests/temporary.c
  47. 1 0
      src/Makefile.am
  48. 9 0
      src/common/thread.c
  49. 8 0
      src/core/debug.c
  50. 31 3
      src/core/dependencies/cg.c
  51. 15 3
      src/core/dependencies/cg.h
  52. 3 3
      src/core/dependencies/tags.c
  53. 24 3
      src/core/dependencies/task_deps.c
  54. 14 1
      src/core/perfmodel/perfmodel.c
  55. 16 0
      src/core/perfmodel/perfmodel_bus.c
  56. 29 81
      src/core/simgrid.c
  57. 10 0
      src/core/simgrid.h
  58. 115 0
      src/core/simgrid_cpp.cpp
  59. 4 0
      src/datawizard/malloc.c
  60. 7 0
      src/util/openmp_runtime_support.c
  61. 1 0
      tests/Makefile.am
  62. 3 0
      tests/datawizard/deps.c
  63. 1 1
      tests/datawizard/interfaces/copy_interfaces.c
  64. 1 12
      tests/datawizard/variable_size.c
  65. 1 1
      tests/main/starpu_init.c
  66. 38 12
      tools/gdbinit

+ 2 - 0
ChangeLog

@@ -66,6 +66,8 @@ Small features:
   * New starpu_task_insert parameter STARPU_TASK_DEPS_ARRAY which
     allows to declare task dependencies similarly to
     starpu_task_declare_deps_array()
+  * Add dependency backward information in debugging mode for gdb's
+    starpu-print-task
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 9 - 1
configure.ac

@@ -162,9 +162,11 @@ if test x$enable_simgrid = xyes ; then
 		]
 	)
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
-   	AC_CHECK_FUNCS([MSG_process_join MSG_process_attach MSG_get_as_by_name MSG_environment_get_routing_root MSG_host_get_speed xbt_mutex_try_acquire smpi_process_set_user_data sg_link_name sg_host_route])
+   	AC_CHECK_FUNCS([MSG_process_join MSG_process_attach MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_zone_get_hosts MSG_host_get_speed MSG_process_self_name])
+	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_link_name sg_host_route sg_host_self simcall_process_create])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 	AC_CHECK_FUNCS([SIMIX_process_get_code], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMIX_PROCESS_GET_CODE], [1], [Define to 1 if you have the `SIMIX_process_get_code' function.])])
@@ -192,6 +194,12 @@ if test x$enable_simgrid = xyes ; then
 
 	# Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++...
 	AC_LANG_PUSH([C++])
+	if test x$enable_shared = xno ; then
+		# When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp
+		SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++"
+		LDFLAGS="$LDFLAGS -lstdc++"
+	fi
+
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 			  #ifdef HAVE_SIMGRID_MSG_H
 			  #include <simgrid/msg.h>

+ 3 - 2
doc/doxygen/chapters/470_simgrid.doxy

@@ -9,8 +9,9 @@
 /*! \page SimGridSupport SimGrid Support
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
-platform. This was tested with simgrid from 3.11 to 3.15,
-other versions may have compatibility issues.
+platform. This was tested with simgrid from 3.11 to 3.16,
+other versions may have compatibility issues. 3.17 notably does not build at
+all.
 
 \section Preparing Preparing Your Application For Simulation
 

+ 3 - 0
doc/doxygen/chapters/api/performance_model.doxy

@@ -333,4 +333,7 @@ Return the estimated time to transfer a given size between two memory nodes.
 \ingroup API_Performance_Model
 Return the estimated time of a task with the given model and the given footprint.
 
+\var starpu_nop_perf_model
+Performance model which just always return 1µs.
+
 */

+ 3 - 1
examples/lu/complex_double.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -46,3 +46,5 @@
 #define PIVOT_THRESHHOLD	10e-5
 
 #define CAN_EXECUTE .can_execute = can_execute,
+
+#define ISZERO(f)	(fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO)

+ 3 - 1
examples/lu/complex_float.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -46,3 +46,5 @@
 #define PIVOT_THRESHHOLD	10e-5
 
 #define CAN_EXECUTE
+
+#define ISZERO(f)	(fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO)

+ 3 - 1
examples/lu/lu-double.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -43,3 +43,5 @@
 #define PIVOT_THRESHHOLD	10e-10
 
 #define CAN_EXECUTE .can_execute = can_execute,
+
+#define ISZERO(f)	(fpclassify(f) == FP_ZERO)

+ 3 - 1
examples/lu/lu-float.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -43,3 +43,5 @@
 #define PIVOT_THRESHHOLD	10e-5
 
 #define CAN_EXECUTE
+
+#define ISZERO(f)	(fpclassify(f) == FP_ZERO)

+ 7 - 18
examples/lu/xlu_kernels.c

@@ -362,13 +362,8 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 			{
 				TYPE pivot;
 				pivot = sub11[z+z*ld];
-#ifdef COMPLEX_LU
-				STARPU_ASSERT(fpclassify(creal(pivot)) != FP_ZERO);
-				STARPU_ASSERT(fpclassify(cimag(pivot)) != FP_ZERO);
-#else
-				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
-#endif
-		
+				STARPU_ASSERT(!ISZERO(pivot));
+
 				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
 		
 				CPU_GER(nx - z - 1, nx - z - 1, -1.0,
@@ -387,14 +382,8 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 				TYPE inv_pivot;
 				cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream);
 				cudaStreamSynchronize(stream);
+				STARPU_ASSERT(!ISZERO(pivot));
 
-#ifdef COMPLEX_LU
-				STARPU_ASSERT(fpclassify(creal(pivot)) != FP_ZERO);
-				STARPU_ASSERT(fpclassify(cimag(pivot)) != FP_ZERO);
-#else
-				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
-#endif
-				
 				inv_pivot = 1.0/pivot;
 				status = CUBLAS_SCAL(handle,
 						nx - z - 1,
@@ -511,8 +500,8 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 
 					pivot = sub11[z+z*ld];
 				}
-			
-				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
+
+				STARPU_ASSERT(!ISZERO(pivot));
 
 				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
 		
@@ -561,8 +550,8 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 					cudaStreamSynchronize(stream);
 				}
 
-				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
-				
+				STARPU_ASSERT(!ISZERO(pivot));
+
 				inv_pivot = 1.0/pivot;
 				status = CUBLAS_SCAL(handle,
 						nx - z - 1,

+ 2 - 0
include/starpu_perfmodel.h

@@ -202,6 +202,8 @@ double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node);
 double starpu_transfer_latency(unsigned src_node, unsigned dst_node);
 double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
 
+extern struct starpu_perfmodel starpu_nop_perf_model;
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
mpi/examples/comm/comm.c

@@ -36,7 +36,8 @@ struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 2 - 1
mpi/examples/comm/mix_comm.c

@@ -36,7 +36,8 @@ struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 16
mpi/examples/complex/mpi_complex.c

@@ -26,27 +26,12 @@ void display_foo_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	FPRINTF(stderr, "foo = %d\n", *foo);
 }
 
-/* Dumb performance model for simgrid */
-static double display_cost_function(struct starpu_task *task, unsigned nimpl)
-{
-	(void) task;
-	(void) nimpl;
-	return 0.000001;
-}
-
-static struct starpu_perfmodel display_model =
-{
-	.type = STARPU_COMMON,
-	.cost_function = display_cost_function,
-	.symbol = "display"
-};
-
 struct starpu_codelet foo_display =
 {
 	.cpu_funcs = {display_foo_codelet},
 	.nbuffers = 1,
 	.modes = {STARPU_R},
-	.model = &display_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 16
mpi/examples/stencil/stencil5.c

@@ -37,27 +37,12 @@ void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 //	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
 }
 
-/* Dumb performance model for simgrid */
-static double stencil5_cost_function(struct starpu_task *task, unsigned nimpl)
-{
-	(void) task;
-	(void) nimpl;
-	return 0.000001;
-}
-
-static struct starpu_perfmodel stencil5_model =
-{
-	.type = STARPU_COMMON,
-	.cost_function = stencil5_cost_function,
-	.symbol = "stencil5"
-};
-
 struct starpu_codelet stencil5_cl =
 {
 	.cpu_funcs = {stencil5_cpu},
 	.nbuffers = 5,
 	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R},
-	.model = &stencil5_model
+	.model = &starpu_nop_perf_model,
 };
 
 #ifdef STARPU_QUICK_CHECK

+ 2 - 1
mpi/examples/stencil/stencil5_lb.c

@@ -42,7 +42,8 @@ struct starpu_codelet stencil5_cl =
 {
 	.cpu_funcs = {stencil5_cpu},
 	.nbuffers = 5,
-	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R},
+	.model = &starpu_nop_perf_model,
 };
 
 #ifdef STARPU_QUICK_CHECK

+ 9 - 1
mpi/src/mpi/starpu_mpi_early_data.c

@@ -44,7 +44,15 @@ void _starpu_mpi_early_data_init(void)
 
 void _starpu_mpi_early_data_check_termination(void)
 {
-	STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not zero (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count);
+	if (_starpu_mpi_early_data_handle_hashmap_count != 0)
+	{
+		struct _starpu_mpi_early_data_handle_hashlist *current, *tmp;
+		HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp)
+		{
+			_STARPU_MSG("Unexpected message with comm %ld source %d tag %ld\n", (long int)current->node_tag.comm, current->node_tag.rank, current->node_tag.data_tag);
+		}
+		STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not 0 (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count);
+	}
 }
 
 void _starpu_mpi_early_data_shutdown(void)

+ 2 - 0
mpi/tests/Makefile.am

@@ -151,6 +151,7 @@ starpu_mpi_TESTS +=				\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
 	insert_task_count			\
+	insert_task_seq				\
 	multiple_send				\
 	user_defined_datatype			\
 	tags_checking				\
@@ -208,6 +209,7 @@ noinst_PROGRAMS =				\
 	insert_task_node_choice			\
 	insert_task_count			\
 	insert_task_dyn_handles			\
+	insert_task_seq				\
 	multiple_send				\
 	mpi_scatter_gather			\
 	mpi_reduction				\

+ 3 - 14
mpi/tests/cache.c

@@ -23,23 +23,12 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type          = STARPU_COMMON,
-	.cost_function = cost_function
-};
-
 struct starpu_codelet mycodelet_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_w =
@@ -47,7 +36,7 @@ struct starpu_codelet mycodelet_w =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_rw =
@@ -55,7 +44,7 @@ struct starpu_codelet mycodelet_rw =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, starpu_data_handle_t data, int rank, int in_cache)

+ 1 - 12
mpi/tests/cache_disable.c

@@ -23,23 +23,12 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type          = STARPU_COMMON,
-	.cost_function = cost_function
-};
-
 struct starpu_codelet mycodelet_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 12
mpi/tests/callback.c

@@ -27,23 +27,12 @@ void my_func(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED void
 	FPRINTF_MPI(stderr, "i am here\n");
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet my_codelet =
 {
 	.cpu_funcs = {my_func},
 	.cuda_funcs = {my_func},
 	.opencl_funcs = {my_func},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 static

+ 3 - 16
mpi/tests/early_request.c

@@ -60,19 +60,6 @@ void fill_tmp_buffer(void *buffers[], void *cl_arg)
 		tmp[i]=nx+i;
 }
 
-#ifdef STARPU_SIMGRID
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-#endif
-
 static struct starpu_codelet fill_tmp_buffer_cl =
 {
 	.where = STARPU_CPU,
@@ -80,7 +67,7 @@ static struct starpu_codelet fill_tmp_buffer_cl =
 	.nbuffers = 1,
 	.modes = {STARPU_W},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "fill_tmp_buffer"
 };
@@ -103,7 +90,7 @@ static struct starpu_codelet read_ghost_value_cl =
 	.nbuffers = 1,
 	.modes = {STARPU_R},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "read_ghost_value"
 };
@@ -124,7 +111,7 @@ static struct starpu_codelet submitted_order =
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_W},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "submitted_order_enforcer"
 };

+ 1 - 12
mpi/tests/insert_task.c

@@ -27,23 +27,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*x = (*x + *y) / 2;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 #define X     4

+ 1 - 14
mpi/tests/insert_task_block.c

@@ -44,25 +44,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	}
 }
 
-#ifdef STARPU_SIMGRID
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-#endif
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.modes = {STARPU_RW}
 };

+ 1 - 12
mpi/tests/insert_task_compute.c

@@ -29,23 +29,12 @@ void func_cpu(void *descr[], void *_args)
 	*x = *x * *y;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int test(int rank, int node, int *before, int *after, int task_insert, int data_array)

+ 1 - 12
mpi/tests/insert_task_count.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 14
mpi/tests/insert_task_dyn_handles.c

@@ -35,26 +35,13 @@ void func_cpu(void *descr[], void *_args)
 	}
 }
 
-#ifdef STARPU_SIMGRID
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-#endif
-
 struct starpu_codelet codelet =
 {
 	.cpu_funcs = {func_cpu},
 	.cpu_funcs_name = {"func_cpu"},
 	.nbuffers = STARPU_VARIABLE_NBUFFERS,
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 };
 

+ 1 - 12
mpi/tests/insert_task_node_choice.c

@@ -30,23 +30,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	assert(node == rank);
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_RW},
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 	.name = "insert_task_node_choice"
 };
 

+ 5 - 16
mpi/tests/insert_task_owner.c

@@ -30,23 +30,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	assert(node == rank);
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet_r_w =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_rw_r =
@@ -54,7 +43,7 @@ struct starpu_codelet mycodelet_rw_r =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_rw_rw =
@@ -62,7 +51,7 @@ struct starpu_codelet mycodelet_rw_rw =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_w_r =
@@ -70,7 +59,7 @@ struct starpu_codelet mycodelet_w_r =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_W, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 struct starpu_codelet mycodelet_r_r =
@@ -78,7 +67,7 @@ struct starpu_codelet mycodelet_r_r =
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 2 - 13
mpi/tests/insert_task_owner2.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012, 2015                    Université Bordeaux
+ * Copyright (C) 2012, 2015, 2017                    Université Bordeaux
  * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -33,23 +33,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 4,
 	.modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 12
mpi/tests/insert_task_owner_data.c

@@ -27,23 +27,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*x1 *= *x1;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 12
mpi/tests/insert_task_recv_cache.c

@@ -32,23 +32,12 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 #define N     1000

+ 1 - 12
mpi/tests/insert_task_sent_cache.c

@@ -32,23 +32,12 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_R},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 #define N     1000

+ 137 - 0
mpi/tests/insert_task_seq.c

@@ -0,0 +1,137 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012, 2013, 2014, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <math.h>
+#include "helper.h"
+
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
+void func_cpu(void *descr[], void *_args)
+{
+	(void) descr;
+	(void) _args;
+}
+
+struct starpu_codelet mycodelet =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &starpu_nop_perf_model,
+};
+
+struct starpu_codelet mycodelet2 =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.model = &starpu_nop_perf_model,
+};
+
+#define X     4
+
+/* Returns the MPI node number where data is */
+int my_distrib(int x, int nb_nodes)
+{
+	return x % nb_nodes;
+}
+
+void dotest(int rank, int size, char *enabled)
+{
+	int x, i;
+	int ret;
+	unsigned values[X];
+	starpu_data_handle_t data_handles[X];
+
+	setenv("STARPU_MPI_CACHE", enabled, 1);
+
+	FPRINTF(stderr, "Testing with cache '%s'\n", enabled);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	for(x = 0; x < X; x++)
+	{
+		values[x] = (rank+1)*10;
+	}
+
+	for(x = 0; x < X; x++)
+	{
+		int mpi_rank = my_distrib(x, size);
+		if (mpi_rank == rank)
+		{
+			starpu_variable_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)&(values[x]), sizeof(unsigned));
+		}
+		else
+		{
+			/* I don't own that index, but will need it for my computations */
+			starpu_variable_data_register(&data_handles[x], -1, (uintptr_t)NULL, sizeof(unsigned));
+		}
+		if (data_handles[x])
+		{
+			starpu_mpi_data_register(data_handles[x], x, mpi_rank);
+		}
+	}
+
+	for(i = 0 ; i<size ; i++)
+	{
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[i], STARPU_R, data_handles[0], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+	}
+
+	for(i = 0 ; i<size ; i++)
+	{
+		// Calling starpu_mpi_get_data_on_all_nodes_detached() is necessary to make sure all nodes have a valid copy of the data
+		starpu_mpi_get_data_on_all_nodes_detached(MPI_COMM_WORLD, data_handles[i]);
+		starpu_task_insert(&mycodelet2, STARPU_RW, data_handles[i], 0);
+	}
+
+	starpu_task_wait_for_all();
+
+	for(x = 0; x < X; x++)
+	{
+		STARPU_ASSERT(data_handles[x]);
+		starpu_data_unregister(data_handles[x]);
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+}
+
+int main(int argc, char **argv)
+{
+	int rank, size;
+
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	dotest(rank, size, "0");
+	dotest(rank, size, "1");
+
+	MPI_Finalize();
+	return 0;
+}
+#endif

+ 1 - 12
mpi/tests/matrix.c

@@ -28,23 +28,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*Y = *Y + *A * *X;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 #define N 4

+ 1 - 12
mpi/tests/matrix2.c

@@ -28,23 +28,12 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*Y = *Y + *A * *X;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 #define N 4

+ 5 - 18
mpi/tests/mpi_reduction.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013, 2015  Université de Bordeaux
+ * Copyright (C) 2013, 2015, 2017  Université de Bordeaux
  * Copyright (C) 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,26 +24,13 @@ extern void redux_cpu_func(void *descr[], void *cl_arg);
 extern void dot_cpu_func(void *descr[], void *cl_arg);
 extern void display_cpu_func(void *descr[], void *cl_arg);
 
-#ifdef STARPU_SIMGRID
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-#endif
-
 static struct starpu_codelet init_codelet =
 {
 	.cpu_funcs = {init_cpu_func},
 	.nbuffers = 1,
 	.modes = {STARPU_W},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "init_codelet"
 };
@@ -54,7 +41,7 @@ static struct starpu_codelet redux_codelet =
 	.modes = {STARPU_RW, STARPU_R},
 	.nbuffers = 2,
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "redux_codelet"
 };
@@ -65,7 +52,7 @@ static struct starpu_codelet dot_codelet =
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_REDUX},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "dot_codelet"
 };
@@ -76,7 +63,7 @@ static struct starpu_codelet display_codelet =
 	.nbuffers = 1,
 	.modes = {STARPU_R},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 	.name = "display_codelet"
 };

+ 1 - 14
mpi/tests/mpi_scatter_gather.c

@@ -38,26 +38,13 @@ void cpu_codelet(void *descr[], void *_args)
 	}
 }
 
-#ifdef STARPU_SIMGRID
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-#endif
-
 static struct starpu_codelet cl =
 {
 	.cpu_funcs = {cpu_codelet},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
 #ifdef STARPU_SIMGRID
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 #endif
 };
 

+ 1 - 12
mpi/tests/policy_register.c

@@ -23,23 +23,12 @@ void func_cpu(void *descr[], void *_args)
 	(void)_args;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_W, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int starpu_mpi_select_node_my_policy_0(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)

+ 2 - 13
mpi/tests/policy_selection.c

@@ -23,30 +23,19 @@ void func_cpu(void *descr[], void *_args)
 	(void)_args;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet_2 =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
 	.modes = {STARPU_W, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 struct starpu_codelet mycodelet_3 =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_W, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 12
mpi/tests/policy_selection2.c

@@ -28,23 +28,12 @@ void func_cpu(void *descr[], void *_args)
 	*data2 += *data0;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_W, STARPU_W},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 12
mpi/tests/ring.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void increment_token(starpu_data_handle_t token_handle)

+ 1 - 12
mpi/tests/ring_async.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void increment_token(starpu_data_handle_t token_handle)

+ 1 - 12
mpi/tests/ring_async_implicit.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void increment_token(starpu_data_handle_t token_handle)

+ 1 - 12
mpi/tests/ring_sync.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void increment_token(starpu_data_handle_t token_handle)

+ 1 - 12
mpi/tests/ring_sync_detached.c

@@ -36,17 +36,6 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type		= STARPU_COMMON,
-	.cost_function	= cost_function
-};
-
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -55,7 +44,7 @@ static struct starpu_codelet increment_cl =
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &dumb_model
+	.model = &starpu_nop_perf_model,
 };
 
 void increment_token(starpu_data_handle_t handle)

+ 1 - 12
mpi/tests/temporary.c

@@ -29,23 +29,12 @@ static void func_add(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	FPRINTF_MPI(stderr, "%d + %d = %d\n", *b, *c, *a);
 }
 
-/* Dummy cost function for simgrid */
-static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
-{
-	return 0.000001;
-}
-static struct starpu_perfmodel dumb_model =
-{
-	.type          = STARPU_COMMON,
-	.cost_function = cost_function
-};
-
 static struct starpu_codelet codelet_add =
 {
 	.cpu_funcs = {func_add},
 	.nbuffers = 3,
 	.modes = {STARPU_W, STARPU_R, STARPU_R},
-	.model = &dumb_model,
+	.model = &starpu_nop_perf_model,
 	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
 };
 

+ 1 - 0
src/Makefile.am

@@ -200,6 +200,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 	core/perfmodel/multiple_regression.c			\
 	core/sched_policy.c					\
 	core/simgrid.c						\
+	core/simgrid_cpp.cpp					\
 	core/sched_ctx.c					\
 	core/sched_ctx_list.c					\
 	core/parallel_task.c					\

+ 9 - 0
src/common/thread.c

@@ -34,6 +34,7 @@
 #include <xbt/synchro_core.h>
 #endif
 #include <smpi/smpi.h>
+#include <simgrid/simix.h>
 #else
 
 #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
@@ -249,7 +250,11 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 {
 	void **array;
 #ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
+#ifdef HAVE_MSG_PROCESS_SELF_NAME
+	const char *process_name = MSG_process_self_name();
+#else
 	const char *process_name = SIMIX_process_self_get_name();
+#endif
 	char *end;
 	/* Test whether it is an MPI rank */
 	strtol(process_name, &end, 10);
@@ -267,7 +272,11 @@ void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 {
 	void **array;
 #ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
+#ifdef HAVE_MSG_PROCESS_SELF_NAME
+	const char *process_name = MSG_process_self_name();
+#else
 	const char *process_name = SIMIX_process_self_get_name();
+#endif
 	char *end;
 	/* Test whether it is an MPI rank */
 	strtol(process_name, &end, 10);

+ 8 - 0
src/core/debug.c

@@ -26,6 +26,14 @@ static starpu_pthread_mutex_t logfile_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 static FILE *logfile = NULL;
 #endif
 
+int _starpu_debug
+#ifdef STARPU_DEBUG
+	= 1
+#else
+	= 0
+#endif
+	;
+
 /* Tell gdb whether FXT is compiled in or not */
 int _starpu_use_fxt
 #ifdef STARPU_USE_FXT

+ 31 - 3
src/core/dependencies/cg.c

@@ -29,6 +29,10 @@ void _starpu_cg_list_init(struct _starpu_cg_list *list)
 	_starpu_spin_init(&list->lock);
 	list->ndeps = 0;
 	list->ndeps_completed = 0;
+#ifdef STARPU_DEBUG
+	list->deps = NULL;
+	list->done = NULL;
+#endif
 
 	list->terminated = 0;
 
@@ -57,6 +61,10 @@ void _starpu_cg_list_deinit(struct _starpu_cg_list *list)
 #ifdef STARPU_DYNAMIC_DEPS_SIZE
 	free(list->succ);
 #endif
+#ifdef STARPU_DEBUG
+	free(list->deps);
+	free(list->done);
+#endif
 	_starpu_spin_destroy(&list->lock);
 }
 
@@ -158,7 +166,7 @@ int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, u
 }
 
 /* Note: in case of a tag, it must be already locked */
-void _starpu_notify_cg(struct _starpu_cg *cg)
+void _starpu_notify_cg(void *pred, struct _starpu_cg *cg)
 {
 	STARPU_ASSERT(cg);
 	unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1);
@@ -222,6 +230,26 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 				STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 
 				job_successors = &j->job_successors;
+#ifdef STARPU_DEBUG
+				if (!j->task->regenerate) {
+					unsigned i;
+					/* Remove backward cg pointers for easier debugging */
+					if (job_successors->deps) {
+						for (i = 0; i < job_successors->ndeps; i++)
+							if (job_successors->deps[i] == cg)
+								break;
+						STARPU_ASSERT(i < job_successors->ndeps);
+						job_successors->done[i] = 1;
+					}
+					if (cg->deps) {
+						for (i = 0; i < cg->ndeps; i++)
+							if (cg->deps[i] == pred)
+								break;
+						STARPU_ASSERT(i < cg->ndeps);
+						cg->done[i] = 1;
+					}
+				}
+#endif
 
 				unsigned ndeps_completed =
 					STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1);
@@ -255,7 +283,7 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
  * _starpu_notify_cg_list protects the list itself.
  * No job lock should be held, since we might want to immediately call the callback of an empty task.
  */
-void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
+void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors)
 {
 	unsigned succ;
 
@@ -285,7 +313,7 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 			_starpu_spin_lock(&cgtag->lock);
 		}
 
-		_starpu_notify_cg(cg);
+		_starpu_notify_cg(pred, cg);
 
 		if (cg_type == STARPU_CG_TAG)
 			_starpu_spin_unlock(&cgtag->lock);

+ 15 - 3
src/core/dependencies/cg.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012-2013, 2015-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2012-2013, 2015-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -44,6 +44,12 @@ struct _starpu_cg_list
 	/* Number of notifications to be waited for */
 	unsigned ndeps; /* how many deps ? */
 	unsigned ndeps_completed; /* how many deps are done ? */
+#ifdef STARPU_DEBUG
+	/* Array of the notifications, size ndeps */
+	struct _starpu_cg **deps;
+	/* Which ones have notified, size ndeps */
+	char *done;
+#endif
 
 	/* Whether the completion is finished.
 	 * For restartable/restarted tasks, only the first iteration is taken into account here.
@@ -73,6 +79,12 @@ struct _starpu_cg
 	unsigned ntags; /* number of tags depended on */
 	unsigned remaining; /* number of remaining tags */
 
+#ifdef STARPU_DEBUG
+	unsigned ndeps;
+	void **deps; /* array of predecessors, size ndeps */
+	char *done;  /* which ones have notified, size ndeps */
+#endif
+
 	enum _starpu_cg_type cg_type;
 
 	union
@@ -102,8 +114,8 @@ int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct
 int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]);
 int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]);
 int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, starpu_tag_t tag_array[]);
-void _starpu_notify_cg(struct _starpu_cg *cg);
-void _starpu_notify_cg_list(struct _starpu_cg_list *successors);
+void _starpu_notify_cg(void *pred, struct _starpu_cg *cg);
+void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors);
 void _starpu_notify_task_dependencies(struct _starpu_job *j);
 
 #endif // __CG_H__

+ 3 - 3
src/core/dependencies/tags.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013, 2016  Université de Bordeaux
+ * Copyright (C) 2009-2013, 2016-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2016  Inria
  *
@@ -246,7 +246,7 @@ static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg)
 	if (tag->state == STARPU_DONE)
 	{
 		/* the tag was already completed sooner */
-		_starpu_notify_cg(cg);
+		_starpu_notify_cg(tag, cg);
 	}
 }
 
@@ -263,7 +263,7 @@ void _starpu_notify_tag_dependencies(struct _starpu_tag *tag)
 	tag->state = STARPU_DONE;
 	_STARPU_TRACE_TAG_DONE(tag);
 
-	_starpu_notify_cg_list(&tag->tag_successors);
+	_starpu_notify_cg_list(tag, &tag->tag_successors);
 
 	_starpu_spin_unlock(&tag->lock);
 }

+ 24 - 3
src/core/dependencies/task_deps.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  * Copyright (C) 2014, 2016  INRIA
  *
@@ -35,10 +35,21 @@ static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j)
 
 	cg->ntags = ntags;
 	cg->remaining = ntags;
+#ifdef STARPU_DEBUG
+	cg->ndeps = ntags;
+	cg->deps = NULL;
+	cg->done = NULL;
+#endif
 	cg->cg_type = STARPU_CG_TASK;
 
 	cg->succ.job = j;
 	j->job_successors.ndeps++;
+#ifdef STARPU_DEBUG
+	_STARPU_REALLOC(j->job_successors.deps, j->job_successors.ndeps * sizeof(j->job_successors.deps[0]));
+	_STARPU_REALLOC(j->job_successors.done, j->job_successors.ndeps * sizeof(j->job_successors.done[0]));
+	j->job_successors.deps[j->job_successors.ndeps-1] = cg;
+	j->job_successors.done[j->job_successors.ndeps-1] = 0;
+#endif
 
 	return cg;
 }
@@ -49,12 +60,12 @@ static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg)
 
 	if (_starpu_add_successor_to_cg_list(&j->job_successors, cg))
 		/* the task was already completed sooner */
-		_starpu_notify_cg(cg);
+		_starpu_notify_cg(j, cg);
 }
 
 void _starpu_notify_task_dependencies(struct _starpu_job *j)
 {
-	_starpu_notify_cg_list(&j->job_successors);
+	_starpu_notify_cg_list(j, &j->job_successors);
 }
 
 /* task depends on the tasks in task array */
@@ -81,6 +92,11 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 	struct _starpu_cg *cg = create_cg_task(ndeps, job);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex);
 
+#ifdef STARPU_DEBUG
+	_STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0]));
+	_STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0]));
+#endif
+
 	unsigned i;
 	for (i = 0; i < ndeps; i++)
 	{
@@ -91,6 +107,11 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 
 		dep_job = _starpu_get_job_associated_to_task(dep_task);
 
+#ifdef STARPU_DEBUG
+		cg->deps[i] = dep_job;
+		cg->done[i] = 0;
+#endif
+
 		STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself.");
 		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
 		if (check)

+ 14 - 1
src/core/perfmodel/perfmodel.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016, 2017  Inria
@@ -556,3 +556,16 @@ void starpu_perfmodel_free_sampling_directories(void)
 	_perf_model_dir_debug = NULL;
 	directory_existence_was_tested = 0;
 }
+
+
+static double nop_cost_function(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch *a STARPU_ATTRIBUTE_UNUSED, unsigned i STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+
+struct starpu_perfmodel starpu_nop_perf_model =
+{
+	.type = STARPU_PER_ARCH,
+	.arch_cost_function = nop_cost_function,
+};
+

+ 16 - 0
src/core/perfmodel/perfmodel_bus.c

@@ -185,7 +185,11 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 	{
 		/* NUMA mode activated */
 		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+#if HWLOC_API_VERSION >= 0x00020000
+		h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
+#else
 		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+#endif
 	}
 	else
 #endif
@@ -446,7 +450,11 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 	{
 		/* NUMA mode activated */
 		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+#if HWLOC_API_VERSION >= 0x00020000
+		h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
+#else
 		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+#endif
 	}
 	else
 #endif
@@ -678,11 +686,19 @@ static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
 
 		unsigned char *h_buffer;
 		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
+#if HWLOC_API_VERSION >= 0x00020000
+		h_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
+#else
 		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
+#endif
 
 		unsigned char *d_buffer;
 		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
+#if HWLOC_API_VERSION >= 0x00020000
+		d_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
+#else
 		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
+#endif
 
 		memset(h_buffer, 0, SIZE);
 

+ 29 - 81
src/core/simgrid.c

@@ -34,6 +34,9 @@
 #include <sys/resource.h>
 #endif
 #include <simgrid/simix.h>
+#ifdef STARPU_HAVE_SIMGRID_HOST_H
+#include <simgrid/host.h>
+#endif
 
 #pragma weak starpu_main
 extern int starpu_main(int argc, char *argv[]);
@@ -65,13 +68,20 @@ static struct worker_runner
 } worker_runner[STARPU_NMAXWORKERS];
 static int task_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED);
 
-#ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
-#ifdef HAVE_MSG_GET_AS_BY_NAME
+#ifdef HAVE_MSG_ZONE_GET_BY_NAME
+#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
+msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
+{
+	return MSG_zone_get_by_name(name);
+}
+#elif defined(HAVE_MSG_GET_AS_BY_NAME)
+#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
 msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
 {
 	return MSG_get_as_by_name(name);
 }
-#else /* HAVE_MSG_GET_AS_BY_NAME */
+#elif defined(HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT)
+#define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
 static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
 {
 	xbt_dict_t dict;
@@ -94,7 +104,6 @@ msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
 {
 	return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
 }
-#endif /* HAVE_MSG_GET_AS_BY_NAME */
 #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
 
 int _starpu_simgrid_get_nbhosts(const char *prefix)
@@ -103,21 +112,28 @@ int _starpu_simgrid_get_nbhosts(const char *prefix)
 	xbt_dynar_t hosts;
 	unsigned i, nb;
 	unsigned len = strlen(prefix);
-#ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
-	char new_prefix[32];
 
 	if (_starpu_simgrid_running_smpi())
 	{
+#ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
+		char new_prefix[32];
 		char name[32];
 		STARPU_ASSERT(starpu_mpi_world_rank);
 		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank());
+#ifdef HAVE_MSG_ZONE_GET_HOSTS
+		hosts = xbt_dynar_new(sizeof(sg_host_t), NULL);
+		MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
+#else
 		hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name));
+#endif
 		snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix);
 		prefix = new_prefix;
 		len = strlen(prefix);
+#else
+		STARPU_ABORT_MSG("can not continue without an implementation for _starpu_simgrid_get_as_by_name");
+#endif /* HAVE_STARPU_SIMGRID_GET_AS_BY_NAME */
 	}
 	else
-#endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
 		hosts = MSG_hosts_as_dynar();
 	nb = xbt_dynar_length(hosts);
 
@@ -210,10 +226,13 @@ int _starpu_smpi_simulated_main_(int argc, char *argv[])
 int smpi_simulated_main_(int argc, char *argv[]) __attribute__((weak, alias("_starpu_smpi_simulated_main_")));
 
 /* This is used to start a non-MPI simgrid environment */
-static void start_simgrid(int *argc, char **argv)
+void _starpu_start_simgrid(int *argc, char **argv)
 {
 	char path[256];
 
+	if (simgrid_started)
+		return;
+
 	simgrid_started = 1;
 
 	MSG_init(argc, argv);
@@ -280,7 +299,7 @@ int main(int argc, char **argv)
 	}
 
 	/* Managed to catch application's main, initialize simgrid first */
-	start_simgrid(&argc, argv);
+	_starpu_start_simgrid(&argc, argv);
 
 	/* Create a simgrid process for main */
 	char **argv_cpy;
@@ -322,7 +341,7 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 		/* Start maestro as a separate thread */
 		SIMIX_set_maestro(maestro, NULL);
 		/* Initialize simgrid */
-		start_simgrid(argc, *argv);
+		_starpu_start_simgrid(argc, *argv);
 		/* And attach the main thread to the main simgrid process */
 		void **tsd;
 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
@@ -1085,77 +1104,6 @@ void _starpu_simgrid_count_ngpus(void)
 #endif
 }
 
-typedef struct
-{
-	void_f_pvoid_t code;
-	void *userparam;
-	void *father_data;
-} thread_data_t;
-
-static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
-{
-	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
-	MSG_process_sleep(0.000001);
-
-#ifdef HAVE_SMX_ACTOR_T
-	smx_actor_t
-#else
-	smx_process_t
-#endif
-	self = SIMIX_process_self();
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
-	thread_data_t *t = SIMIX_process_self_get_data(self);
-#else
-	thread_data_t *t = SIMIX_process_self_get_data();
-#endif
-	simcall_process_set_data(self, t->father_data);
-	t->code(t->userparam);
-	simcall_process_set_data(self, NULL);
-	free(t);
-
-	return 0;
-}
-
-void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
-{
-#ifdef HAVE_SMX_ACTOR_T
-	smx_actor_t process STARPU_ATTRIBUTE_UNUSED;
-#else
-	smx_process_t process STARPU_ATTRIBUTE_UNUSED;
-#endif
-	thread_data_t *res;
-	_STARPU_MALLOC(res, sizeof(thread_data_t));
-	res->userparam = param;
-	res->code = code;
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
-	res->father_data = SIMIX_process_self_get_data(SIMIX_process_self());
-#else
-	res->father_data = SIMIX_process_self_get_data();
-#endif
-
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 12)
-	simcall_process_create(&process,
-#else
-	process = simcall_process_create(
-#endif
-	                         name,
-	                         _starpu_simgrid_xbt_thread_create_wrapper, res,
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 14)
-	                         SIMIX_host_self_get_name(),
-#else
-	                         SIMIX_host_self(),
-#endif
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
-				 -1.0,
-#endif
-				 0, NULL,
-	                         /*props */ NULL
-#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
-				 , 0
-#endif
-				 );
-}
-
 #if 0
 static size_t used;
 

+ 10 - 0
src/core/simgrid.h

@@ -18,6 +18,10 @@
 #ifndef __SIMGRID_H__
 #define __SIMGRID_H__
 
+#ifdef __cplusplus
+extern "C"
+{
+#endif
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #include <simgrid/msg.h>
@@ -38,6 +42,8 @@ struct _starpu_pthread_args
 #define STARPU_MPI_AS_PREFIX "StarPU-MPI"
 #define _starpu_simgrid_running_smpi() (getenv("SMPI_GLOBAL_SIZE") != NULL)
 
+void _starpu_start_simgrid(int *argc, char **argv);
+
 void _starpu_simgrid_init_early(int *argc, char ***argv);
 void _starpu_simgrid_init(void);
 void _starpu_simgrid_deinit(void);
@@ -116,4 +122,8 @@ void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_
 #define _starpu_simgrid_data_transfer(size, src_node, dst_node) (void)0
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif // __SIMGRID_H__

+ 115 - 0
src/core/simgrid_cpp.cpp

@@ -0,0 +1,115 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2017  Université de Bordeaux
+ * Copyright (C) 2016, 2017  Inria
+ * Copyright (C) 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <core/simgrid.h>
+#include <common/config.h>
+
+#ifdef STARPU_SIMGRID
+#ifdef STARPU_HAVE_SIMGRID_MSG_H
+#include <simgrid/msg.h>
+#else
+#include <msg/msg.h>
+#endif
+#include <simgrid/simix.h>
+#ifdef STARPU_HAVE_SIMGRID_HOST_H
+#include <simgrid/host.h>
+#endif
+
+/* thread_create function which implements inheritence of MPI privatization */
+/* See https://github.com/simgrid/simgrid/issues/139 */
+
+typedef struct
+{
+	void_f_pvoid_t code;
+	void *userparam;
+	void *father_data;
+} thread_data_t;
+
+static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
+{
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	MSG_process_sleep(0.000001);
+
+#ifdef HAVE_SMX_ACTOR_T
+	smx_actor_t
+#else
+	smx_process_t
+#endif
+	self = SIMIX_process_self();
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
+	thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data(self);
+#else
+	thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data();
+#endif
+	simcall_process_set_data(self, t->father_data);
+	t->code(t->userparam);
+	simcall_process_set_data(self, NULL);
+	free(t);
+
+	return 0;
+}
+
+void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
+{
+#ifdef HAVE_SIMCALL_PROCESS_CREATE
+#ifdef HAVE_SMX_ACTOR_T
+	smx_actor_t process STARPU_ATTRIBUTE_UNUSED;
+#else
+	smx_process_t process STARPU_ATTRIBUTE_UNUSED;
+#endif
+	thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t));
+	res->userparam = param;
+	res->code = code;
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
+	res->father_data = SIMIX_process_self_get_data(SIMIX_process_self());
+#else
+	res->father_data = SIMIX_process_self_get_data();
+#endif
+
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 12)
+	simcall_process_create(&process,
+#else
+	process = simcall_process_create(
+#endif
+	                         name,
+	                         _starpu_simgrid_xbt_thread_create_wrapper, res,
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 14)
+	                         SIMIX_host_self_get_name(),
+#else
+#  ifdef HAVE_SG_HOST_SELF
+	                         sg_host_self(),
+#  else
+	                         SIMIX_host_self(),
+#  endif
+#endif
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
+				 -1.0,
+#endif
+				 0, NULL,
+	                         /*props */ NULL
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
+				 , 0
+#endif
+				 );
+#else
+	STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry.");
+#endif
+}
+
+#endif

+ 4 - 0
src/datawizard/malloc.c

@@ -312,7 +312,11 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
 		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node));
 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
+#if HWLOC_API_VERSION >= 0x00020000
+		*A = hwloc_alloc_membind(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags | HWLOC_MEMBIND_BYNODESET);
+#else
 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
+#endif
 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
 		if (!*A)
 			ret = -ENOMEM;

+ 7 - 0
src/util/openmp_runtime_support.c

@@ -920,6 +920,13 @@ void _starpu_omp_dummy_shutdown(void)
  */
 int starpu_omp_init(void)
 {
+#ifdef STARPU_SIMGRID
+	/* XXX: ideally we'd pass the real argc/argv */
+	int argc;
+	char *argv[] = { NULL };
+	_starpu_start_simgrid(&argc, &argv);
+#endif
+
 	_starpu_omp_global_state = &_global_state;
 
 	STARPU_PTHREAD_KEY_CREATE(&omp_thread_key, NULL);

+ 1 - 0
tests/Makefile.am

@@ -67,6 +67,7 @@ EXTRA_DIST =					\
 	perfmodels/opencl_memset_kernel.cl \
 	$(MICROBENCHS:=.sh) \
 	microbenchs/microbench.sh \
+	model-checking/platform.xml \
 	model-checking/prio_list.sh \
 	model-checking/barrier.sh \
 	model-checking/starpu-mc.sh.in

+ 3 - 0
tests/datawizard/deps.c

@@ -43,6 +43,7 @@ static struct starpu_codelet cl_null =
 {
 	.cpu_funcs = {null_cpu_func},
 	.cpu_funcs_name = {"null_cpu_func"},
+	.model = &starpu_nop_perf_model,
 	.name = "null",
 };
 
@@ -52,6 +53,8 @@ static struct starpu_codelet cl_prod =
 	.cpu_funcs_name = {"prod_cpu_func"},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
+	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
+	.model = &starpu_nop_perf_model,
 	.name = "prod",
 };
 

+ 1 - 1
tests/datawizard/interfaces/copy_interfaces.c

@@ -73,7 +73,7 @@ int main(int argc, char **argv)
 		int NX=3;
 		int NY=2;
 		int matrix[NX][NY];
-		starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
+		starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0][0]));
 		ret = check_copy(handle, "matrix");
 	}
 

+ 1 - 12
tests/datawizard/variable_size.c

@@ -258,17 +258,6 @@ static void init(void *descr[], void *cl_arg)
 	VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE((void*) variable_interface->ptr, variable_interface->size);
 }
 
-static double nop_cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i)
-{
-	return 0.001;
-}
-
-static struct starpu_perfmodel nop_perf_model =
-{
-	.type = STARPU_PER_ARCH,
-	.arch_cost_function = nop_cost_function,
-};
-
 static struct starpu_codelet cl_init =
 {
 	.cpu_funcs = {init},
@@ -277,7 +266,7 @@ static struct starpu_codelet cl_init =
 	/*.cpu_funcs_name = {"kernel"},*/
 	.nbuffers = 1,
 	.modes = {STARPU_W},
-	.model = &nop_perf_model,
+	.model = &starpu_nop_perf_model,
 };
 
 int main(int argc, char **argv)

+ 1 - 1
tests/main/starpu_init.c

@@ -39,7 +39,7 @@ static int check_cpu(int env_cpu, int conf_cpu, int expected_cpu, int *cpu)
 
 	if (env_cpu != -1)
 	{
-		char string[10];
+		char string[11];
 		snprintf(string, sizeof(string), "%d", env_cpu);
 		setenv("STARPU_NCPUS", string, 1);
 	}

+ 38 - 12
tools/gdbinit

@@ -85,6 +85,24 @@ define starpu-print-task
   printf "\tjob:\t\t\t\t<%p>\n", $job
   printf "\ttag_id:\t\t\t\t<%d>\n", $task->tag_id
   printf "\tndeps:\t\t\t\t<%u>\n", $job->job_successors->ndeps
+  printf "\tndeps_remaining:\t\t<%u>\n", $job->job_successors->ndeps - $job->job_successors->ndeps_completed
+  if _starpu_debug
+    set $n = 0
+    while $n < $job->job_successors->ndeps
+      if ! $job->job_successors->done[$n]
+        set $cg = $job->job_successors->deps[$n]
+        set $m = 0
+	while $m < $cg->ndeps
+	  if ! $cg->done[$m]
+	    set $depj = (struct _starpu_job *) $cg->deps[$m]
+            printf "\t\ttask %p\n", $depj->task
+	  end
+	  set $m = $m + 1
+	end
+      end
+      set $n = $n + 1
+    end
+  end
   printf "\tndeps_completed:\t\t<%u>\n", $job->job_successors->ndeps_completed
   printf "\tnsuccs:\t\t\t\t<%u>\n", $job->job_successors->nsuccs
   if $job
@@ -243,23 +261,31 @@ end
 
 define starpu-print-all-tasks
   set language c
-  set $l = all_jobs_list->next
-  while $l != &all_jobs_list
-    set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted))
-    printf "task %p\n", $j->task
-    starpu-print-task $j->task
-    set $l = $l->next
+  if ! _starpu_debug
+    printf "you need to configure with --enable-debug to get starpu-print-all-tasks working"
+  else
+    set $l = all_jobs_list->next
+    while $l != &all_jobs_list
+      set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted))
+      printf "task %p\n", $j->task
+      starpu-print-task $j->task
+      set $l = $l->next
+    end
   end
 end
 
 define starpu-all-tasks
   set language c
-  set $l = all_jobs_list->next
-  while $l != &all_jobs_list
-    set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted))
-    set $task = $j->task
-    printf "task %p %s\n", $task, $task->name ? $task->name : ""
-    set $l = $l->next
+  if ! _starpu_debug
+    printf "you need to configure with --enable-debug to get starpu-all-tasks working"
+  else
+    set $l = all_jobs_list->next
+    while $l != &all_jobs_list
+      set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted))
+      set $task = $j->task
+      printf "task %p %s\n", $task, $task->name ? $task->name : ""
+      set $l = $l->next
+    end
   end
 end