瀏覽代碼

Merge branch 'master' into julia-autotools

Nathalie Furmento 5 年之前
父節點
當前提交
17a79454b4

+ 1 - 1
configure.ac

@@ -199,7 +199,7 @@ if test x$enable_simgrid = xyes ; then
 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
 
 	# Latest functions
-	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init sg_actor_set_stacksize MSG_zone_get_hosts sg_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
+	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init sg_actor_set_stacksize sg_actor_on_exit MSG_zone_get_hosts sg_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
 	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_name sg_link_bandwidth_set sg_host_route sg_host_self sg_host_list sg_host_speed simcall_process_create sg_config_continue_after_help])
 	AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])

+ 21 - 1
examples/cholesky/cholesky.sh

@@ -22,6 +22,26 @@ ROOT=${0%.sh}
 [ -n "$STARPU_HOSTNAME" ] || export STARPU_HOSTNAME=mirage
 unset MALLOC_PERTURB_
 
+INCR=2
+STOP=32
+
+if [ -n "$STARPU_SIMGRID" ]
+then
+	INCR=4
+	STOP=14
+	# These use the thread factory, and are thus much longer
+	if [ -n "$STARPU_QUICK_CHECK" ]
+	then
+		INCR=8
+		STOP=10
+	fi
+	if [ -n "$STARPU_LONG_CHECK" ]
+	then
+		INCR=4
+		STOP=32
+	fi
+fi
+
 (
 echo -n "#"
 for STARPU_SCHED in $STARPU_SCHEDS ; do
@@ -29,7 +49,7 @@ for STARPU_SCHED in $STARPU_SCHEDS ; do
 done
 echo
 
-for size in `seq 2 2 30` ; do
+for size in `seq 2 $INCR $STOP` ; do
 	echo -n "$((size * 960))"
 	for STARPU_SCHED in $STARPU_SCHEDS
 	do

+ 2 - 0
mpi/Makefile.am

@@ -27,6 +27,7 @@ versinclude_HEADERS = 					\
 	include/starpu_mpi_lb.h				\
 	include/fstarpu_mpi_mod.f90
 
+if !STARPU_SIMGRID
 check-recursive:
 	RET=0 ; \
 	NJOBS=`printf %s "$(MAKEFLAGS)" | sed -ne 's/.*-j \?\([0-9]\+\).*/\1/p'` ; \
@@ -42,3 +43,4 @@ check-recursive:
 		$(MAKE) check -C $$i MAKEFLAGS="$(MAKEFLAGS) $$JOBS" || RET=1; \
 	done ; \
 	exit $$RET
+endif

+ 1 - 0
mpi/src/mpi/starpu_mpi_mpi_backend.c

@@ -83,6 +83,7 @@ void _starpu_mpi_mpi_backend_request_destroy(struct _starpu_mpi_req *req)
 	STARPU_PTHREAD_MUTEX_DESTROY(&req->backend->posted_mutex);
 	STARPU_PTHREAD_COND_DESTROY(&req->backend->posted_cond);
 	free(req->backend);
+	req->backend = NULL;
 }
 
 void _starpu_mpi_mpi_backend_data_clear(starpu_data_handle_t data_handle)

+ 7 - 3
mpi/tests/Makefile.am

@@ -139,7 +139,8 @@ starpu_mpi_TESTS +=				\
 	temporary				\
 	user_defined_datatype			\
 	early_stuff				\
-	sendrecv_bench
+	sendrecv_bench				\
+	burst
 
 if !STARPU_USE_MPI_MPI
 starpu_mpi_TESTS +=				\
@@ -239,7 +240,9 @@ noinst_PROGRAMS =				\
 	load_balancer				\
 	driver					\
 	sendrecv_bench				\
-	sendrecv_parallel_tasks_bench
+	sendrecv_parallel_tasks_bench		\
+	burst					\
+	nothing
 
 if !NO_BLAS_LIB
 noinst_PROGRAMS +=				\
@@ -249,7 +252,8 @@ endif
 XFAIL_TESTS=					\
 	policy_register_toomany			\
 	policy_unregister			\
-	starpu_redefine
+	starpu_redefine				\
+	nothing
 
 ring_SOURCES = ring.c
 ring_sync_SOURCES = ring_sync.c

+ 272 - 0
mpi/tests/burst.c

@@ -0,0 +1,272 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This test sends simultaneously many communications, with various configurations.
+ *
+ * Global purpose is to watch the behaviour with traces.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+#if defined(STARPU_SIMGRID) || defined(STARPU_QUICK_CHECK)
+#define NB_REQUESTS 10
+#else
+#define NB_REQUESTS 500
+#endif
+#define NX_ARRAY (320 * 320)
+
+static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
+static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
+
+void recv_callback(void* arg)
+{
+	int* received = arg;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+	*received = 1;
+	STARPU_PTHREAD_COND_SIGNAL(&cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size, mpi_init, other_rank;
+	starpu_data_handle_t recv_handles[NB_REQUESTS];
+	starpu_data_handle_t send_handles[NB_REQUESTS];
+	float* recv_buffers[NB_REQUESTS];
+	float* send_buffers[NB_REQUESTS];
+	starpu_mpi_req recv_reqs[NB_REQUESTS];
+	starpu_mpi_req send_reqs[NB_REQUESTS];
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
+
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	other_rank = (rank == 0) ? 1 : 0;
+
+	if (rank == 0 || rank == 1)
+	{
+		for (int i = 0; i < NB_REQUESTS; i++)
+		{
+			send_buffers[i] = malloc(NX_ARRAY * sizeof(float));
+			memset(send_buffers[i], 0, NX_ARRAY * sizeof(float));
+			starpu_vector_data_register(&send_handles[i], STARPU_MAIN_RAM, (uintptr_t) send_buffers[i], NX_ARRAY, sizeof(float));
+
+			recv_buffers[i] = malloc(NX_ARRAY * sizeof(float));
+			memset(recv_buffers[i], 0, NX_ARRAY * sizeof(float));
+			starpu_vector_data_register(&recv_handles[i], STARPU_MAIN_RAM, (uintptr_t) recv_buffers[i], NX_ARRAY, sizeof(float));
+		}
+	}
+
+	{
+		/* Burst simultaneous from both nodes: 0 and 1 post all the recvs, synchronise, and then post all the sends */
+		FPRINTF(stderr, "Simultaneous....start (rank %d)\n", rank);
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				recv_reqs[i] = NULL;
+				starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD);
+			}
+		}
+
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				send_reqs[i] = NULL;
+				starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD);
+			}
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				if (recv_reqs[i]) starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE);
+				if (send_reqs[i]) starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE);
+			}
+		}
+		starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+		FPRINTF(stderr, "Simultaneous....end (rank %d)\n", rank);
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+	}
+
+	{
+		/* Burst from 0 to 1 : rank 1 posts all the recvs, barrier, then rank 0 posts all the sends */
+		FPRINTF(stderr, "0 -> 1...start (rank %d)\n", rank);
+
+		if (rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				recv_reqs[i] = NULL;
+				starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD);
+			}
+		}
+
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+
+		if (rank == 0)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				send_reqs[i] = NULL;
+				starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD);
+			}
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				if (rank == 1 && recv_reqs[i]) starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE);
+				if (rank == 0 && send_reqs[i]) starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE);
+			}
+		}
+		starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+		FPRINTF(stderr, "0 -> 1...done (rank %d)\n", rank);
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+	}
+
+	{
+		FPRINTF(stderr, "1 -> 0...start (rank %d)\n", rank);
+		/* Burst from 1 to 0 */
+		if (rank == 0)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				recv_reqs[i] = NULL;
+				starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD);
+			}
+		}
+
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+
+		if (rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				send_reqs[i] = NULL;
+				starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD);
+			}
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				if (rank == 0 && recv_reqs[i]) starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE);
+				if (rank == 1 && send_reqs[i]) starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE);
+			}
+		}
+		starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+		FPRINTF(stderr, "1 -> 0...done (rank %d)\n", rank);
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+	}
+
+	{
+		/* Half burst from both nodes, second half burst is triggered after some requests finished. */
+		FPRINTF(stderr, "Half/half burst...start (rank %d)\n", rank);
+
+		int received = 0;
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				recv_reqs[i] = NULL;
+				if (i % 2)
+				{
+					starpu_mpi_irecv_detached(recv_handles[i], other_rank, i, MPI_COMM_WORLD, recv_callback, &received);
+				}
+				else
+				{
+					starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD);
+				}
+			}
+		}
+
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < (NB_REQUESTS / 2); i++)
+			{
+				send_reqs[i] = NULL;
+				starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD);
+			}
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+			while (!received)
+				STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = (NB_REQUESTS / 2); i < NB_REQUESTS; i++)
+			{
+				send_reqs[i] = NULL;
+				starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD);
+			}
+		}
+
+		if (rank == 0 || rank == 1)
+		{
+			for (int i = 0; i < NB_REQUESTS; i++)
+			{
+				if (recv_reqs[i]) starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE);
+				if (send_reqs[i]) starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE);
+			}
+		}
+
+		starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+		FPRINTF(stderr, "Half/half burst...done (rank %d)\n", rank);
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+	}
+
+	/* Clear up */
+	if (rank == 0 || rank == 1)
+	{
+		for (int i = 0; i < NB_REQUESTS; i++)
+		{
+			starpu_data_unregister(send_handles[i]);
+			free(send_buffers[i]);
+
+			starpu_data_unregister(recv_handles[i]);
+			free(recv_buffers[i]);
+		}
+	}
+
+	starpu_mpi_shutdown();
+	if (!mpi_init)
+		MPI_Finalize();
+
+	return 0;
+}

+ 69 - 0
mpi/tests/nothing.c

@@ -0,0 +1,69 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This program does nothing. It waits until it is interrupted by the user.
+ * Useful to check binding while StarPU is running.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+
+int main(int argc, char **argv)
+{
+	int ret, rank, worldsize;
+	int mpi_init;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
+
+	starpu_pause(); // our program will only wait, no need to stress cores by polling workers
+
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize);
+
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+
+	char hostname[65];
+	gethostname(hostname, sizeof(hostname));
+
+	printf("[rank %d on %s] ready to wait !\n", rank, hostname);
+
+	if (rank == 0)
+	{
+		printf("You can now check if thread binding is correct, for instance.\n");
+	}
+
+	fflush(stdout);
+
+	while(1)
+	{
+		sleep(1);
+	}
+
+	// TODO: maybe better handle the user interruption ?
+
+
+	starpu_resume();
+
+	starpu_mpi_shutdown();
+	if (!mpi_init)
+		MPI_Finalize();
+
+	return 0;
+}

+ 6 - 9
mpi/tools/Makefile.am

@@ -21,23 +21,20 @@ include $(top_srcdir)/starpu.mk
 SUBDIRS =
 
 AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(GLOBAL_AM_CFLAGS)
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI
+LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la @LIBS@ $(FXT_LIBS)
+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/mpi/include -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
 
 CC=$(CC_OR_MPICC)
 CCLD=$(CC_OR_MPICC)
 
-if STARPU_USE_MPI
-LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-AM_CPPFLAGS += -I$(top_srcdir)/mpi/include
-endif
-
-bin_PROGRAMS = starpu_replay_mpi
-
 starpu_replay.c starpu_replay_sched.c:
 	$(LN_S) $(top_srcdir)/tools/$(notdir $@) $@
 
+if STARPU_SIMGRID
+bin_PROGRAMS = starpu_replay_mpi
+
 starpu_replay_mpi_SOURCES = \
 	starpu_replay.c \
 	starpu_replay_sched.c
+endif

+ 1 - 5
src/common/thread.c

@@ -81,11 +81,7 @@ int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const s
 	asprintf(&_args[1], "%p", arg);
 	_args[2] = NULL;
 	if (!host)
-#ifdef STARPU_HAVE_SIMGRID_HOST_H
-		host = sg_host_by_name("MAIN");
-#else
-		host = MSG_get_host_by_name("MAIN");
-#endif
+		host = _starpu_simgrid_get_host_by_name("MAIN");
 
 	void *tsd;
 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));

+ 17 - 0
src/core/simgrid.c

@@ -97,6 +97,21 @@ void _starpu_simgrid_set_stack_size(size_t stack_size)
 #endif
 }
 
+#ifdef HAVE_SG_ACTOR_ON_EXIT
+static void on_exit_backtrace(int failed, void *data STARPU_ATTRIBUTE_UNUSED)
+{
+	if (failed)
+		xbt_backtrace_display_current();
+}
+#endif
+
+void _starpu_simgrid_actor_setup(void)
+{
+#ifdef HAVE_SG_ACTOR_ON_EXIT
+	sg_actor_on_exit(on_exit_backtrace, NULL);
+#endif
+}
+
 #if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name)
 #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
 sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name)
@@ -360,6 +375,7 @@ int do_starpu_main(int argc, char *argv[])
 {
 	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
 	starpu_sleep(0.000001);
+	_starpu_simgrid_actor_setup();
 
 	if (!starpu_main)
 	{
@@ -1168,6 +1184,7 @@ _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
 
 	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
 	starpu_sleep(0.000001);
+	_starpu_simgrid_actor_setup();
 
 	/* _args is freed with process context */
 	f(arg);

+ 1 - 0
src/core/simgrid.h

@@ -63,6 +63,7 @@ void _starpu_simgrid_init_early(int *argc, char ***argv);
 void _starpu_simgrid_init(void);
 void _starpu_simgrid_deinit(void);
 void _starpu_simgrid_deinit_late(void);
+void _starpu_simgrid_actor_setup(void);
 void _starpu_simgrid_wait_tasks(int workerid);
 struct _starpu_job;
 void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *job, struct starpu_perfmodel_arch* perf_arch, double length, unsigned *finished);

+ 5 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -135,8 +135,11 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, int home_node,
 	{
 		if (nnz)
 		{
-			STARPU_ASSERT_ACCESSIBLE(nzval);
-			STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize*r*c - 1);
+			if (r && c && elemsize)
+			{
+				STARPU_ASSERT_ACCESSIBLE(nzval);
+				STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize*r*c - 1);
+			}
 			STARPU_ASSERT_ACCESSIBLE(colind);
 			STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1);
 		}

+ 5 - 2
src/datawizard/interfaces/block_interface.c

@@ -136,8 +136,11 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node,
 #ifndef STARPU_SIMGRID
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(ptr);
-		STARPU_ASSERT_ACCESSIBLE(ptr + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1);
+		if (nx && ny && nz && elemsize)
+		{
+			STARPU_ASSERT_ACCESSIBLE(ptr);
+			STARPU_ASSERT_ACCESSIBLE(ptr + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1);
+		}
 	}
 #endif
 

+ 7 - 4
src/datawizard/interfaces/coo_interface.c

@@ -250,10 +250,13 @@ starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node,
 #ifndef STARPU_SIMGRID
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(columns);
-		STARPU_ASSERT_ACCESSIBLE((uintptr_t) columns + n_values*sizeof(uint32_t) - 1);
-		STARPU_ASSERT_ACCESSIBLE(rows);
-		STARPU_ASSERT_ACCESSIBLE((uintptr_t) rows + n_values*sizeof(uint32_t) - 1);
+		if (n_values)
+		{
+			STARPU_ASSERT_ACCESSIBLE(columns);
+			STARPU_ASSERT_ACCESSIBLE((uintptr_t) columns + n_values*sizeof(uint32_t) - 1);
+			STARPU_ASSERT_ACCESSIBLE(rows);
+			STARPU_ASSERT_ACCESSIBLE((uintptr_t) rows + n_values*sizeof(uint32_t) - 1);
+		}
 		STARPU_ASSERT_ACCESSIBLE(values);
 		STARPU_ASSERT_ACCESSIBLE(values + n_values*elemsize - 1);
 	}

+ 5 - 2
src/datawizard/interfaces/csr_interface.c

@@ -117,8 +117,11 @@ void starpu_csr_data_register(starpu_data_handle_t *handleptr, int home_node,
 	{
 		if (nnz)
 		{
-			STARPU_ASSERT_ACCESSIBLE(nzval);
-			STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize - 1);
+			if (elemsize)
+			{
+				STARPU_ASSERT_ACCESSIBLE(nzval);
+				STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize - 1);
+			}
 			STARPU_ASSERT_ACCESSIBLE(colind);
 			STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1);
 		}

+ 5 - 2
src/datawizard/interfaces/matrix_interface.c

@@ -145,8 +145,11 @@ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int
 #ifndef STARPU_SIMGRID
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(ptr);
-		STARPU_ASSERT_ACCESSIBLE(ptr + (ny-1)*ld*elemsize + nx*elemsize - 1);
+		if (nx && ny && elemsize)
+		{
+			STARPU_ASSERT_ACCESSIBLE(ptr);
+			STARPU_ASSERT_ACCESSIBLE(ptr + (ny-1)*ld*elemsize + nx*elemsize - 1);
+		}
 	}
 #endif
 

+ 5 - 2
src/datawizard/interfaces/tensor_interface.c

@@ -143,8 +143,11 @@ void starpu_tensor_data_register(starpu_data_handle_t *handleptr, int home_node,
 #ifndef STARPU_SIMGRID
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(ptr);
-		STARPU_ASSERT_ACCESSIBLE(ptr + (nt-1)*ldt*elemsize + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1);
+		if (nx && ny && nz && nt && elemsize)
+		{
+			STARPU_ASSERT_ACCESSIBLE(ptr);
+			STARPU_ASSERT_ACCESSIBLE(ptr + (nt-1)*ldt*elemsize + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1);
+		}
 	}
 #endif
 

+ 5 - 2
src/datawizard/interfaces/variable_interface.c

@@ -112,8 +112,11 @@ void starpu_variable_data_register(starpu_data_handle_t *handleptr, int home_nod
 #ifndef STARPU_SIMGRID
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(ptr);
-		STARPU_ASSERT_ACCESSIBLE(ptr + elemsize - 1);
+		if (elemsize)
+		{
+			STARPU_ASSERT_ACCESSIBLE(ptr);
+			STARPU_ASSERT_ACCESSIBLE(ptr + elemsize - 1);
+		}
 	}
 #endif
 

+ 5 - 2
src/datawizard/interfaces/vector_interface.c

@@ -136,8 +136,11 @@ void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int
 #if (!defined(STARPU_SIMGRID) && !defined(STARPU_OPENMP))
 	if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM)
 	{
-		STARPU_ASSERT_ACCESSIBLE(ptr);
-		STARPU_ASSERT_ACCESSIBLE(ptr + nx*elemsize - 1);
+		if (nx && elemsize)
+		{
+			STARPU_ASSERT_ACCESSIBLE(ptr);
+			STARPU_ASSERT_ACCESSIBLE(ptr + nx*elemsize - 1);
+		}
 	}
 #endif
 

+ 32 - 2
src/debug/traces/starpu_fxt.c

@@ -209,8 +209,7 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 	fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id);
 	if (task->submit_order)
 		fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order);
-	if (task->priority)
-		fprintf(tasks_file, "Priority: %ld\n", task->priority);
+	fprintf(tasks_file, "Priority: %ld\n", task->priority);
 	if (task->dependencies)
 	{
 		fprintf(tasks_file, "DependsOn:");
@@ -500,6 +499,7 @@ LIST_TYPE(_starpu_communication,
 	double bandwidth;
 	unsigned src_node;
 	unsigned dst_node;
+	unsigned long size;
 	const char *type;
 	unsigned long handle;
 	struct _starpu_communication *peer;
@@ -2315,6 +2315,7 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 		struct _starpu_communication *com = _starpu_communication_new();
 		com->comid = comid;
 		com->comm_start = get_event_time_stamp(ev, options);
+		com->size = size;
 
 		com->src_node = src;
 		com->dst_node = dst;
@@ -2393,6 +2394,7 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 				com->comid = comid;
 				com->comm_start = get_event_time_stamp(ev, options);
 				com->bandwidth = -bandwidth;
+				com->size = size;
 
 				src = com->src_node = itor->src_node;
 				com->dst_node = itor->dst_node;
@@ -4145,6 +4147,34 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 		_starpu_fxt_process_computations(options);
 	}
 
+	if (out_paje_file && !options->no_bus)
+	{
+		while (!_starpu_communication_list_empty(&communication_list)) {
+			struct _starpu_communication*itor;
+			itor = _starpu_communication_list_pop_front(&communication_list);
+
+			/* Trace finished with this communication uncompleted, fake its termination */
+
+			unsigned comid = itor->comid;
+			unsigned long size = itor->size;
+			unsigned dst = itor->dst_node;
+			double time = current_computation_time;
+			const char *link_type = itor->type;
+#ifdef STARPU_HAVE_POTI
+			char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN];
+			char dst_memnode_container[STARPU_POTI_STR_LEN], program_container[STARPU_POTI_STR_LEN];
+			snprintf(paje_value, sizeof(paje_value), "%lu", size);
+			snprintf(paje_key, sizeof(paje_key), "com_%u", comid);
+			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
+			poti_EndLink(time, program_container, link_type, dst_memnode_container, paje_value, paje_key);
+#else
+			fprintf(out_paje_file, "19	%.9f	%s	%sp	%lu	%smm%u	com_%u\n", time, link_type, prefix, size, prefix, dst, comid);
+#endif
+			_starpu_communication_delete(itor);
+		}
+	}
+
 	if (out_paje_file && !options->no_flops)
 	{
 		unsigned i;

+ 15 - 0
starpu.mk

@@ -87,3 +87,18 @@ env:
 	@echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME)
 	@echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_)
 endif
+
+if STARPU_SIMGRID
+STARPU_SIMGRID=1
+export STARPU_SIMGRID
+endif
+
+if STARPU_QUICK_CHECK
+STARPU_QUICK_CHECK=1
+export STARPU_QUICK_CHECK
+endif
+
+if STARPU_LONG_CHECK
+STARPU_LONG_CHECK=1
+export STARPU_LONG_CHECK
+endif

+ 34 - 28
tests/loader.c

@@ -242,6 +242,9 @@ int main(int argc, char *argv[])
 	}
 	if (timeout <= 0)
 		timeout = DEFAULT_TIMEOUT;
+#ifdef STARPU_SIMGRID
+	timeout *= 10;
+#endif
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 	/* compare values between the 2 values of timeout */
@@ -257,9 +260,13 @@ int main(int argc, char *argv[])
 	{
 		test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1);
 		sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]);
+		x += 3;
 	}
 	else
+	{
 		test_name = argv[x];
+		x += 1;
+	}
 
 	if (!test_name)
 	{
@@ -352,40 +359,39 @@ int main(int argc, char *argv[])
 	child_pid = fork();
 	if (child_pid == 0)
 	{
-		if (launcher)
+		char *launcher_argv[100];
+		int i=0;
+
+		/* "Launchers" such as Valgrind need to be inserted
+		 * after the Libtool-generated wrapper scripts, hence
+		 * this special-case.  */
+		if (launcher && top_builddir != NULL)
 		{
-			/* "Launchers" such as Valgrind need to be inserted
-			 * after the Libtool-generated wrapper scripts, hence
-			 * this special-case.  */
-			if (top_builddir != NULL)
+			launcher_argv[i++] = libtool;
+			launcher_argv[i++] = "--mode=execute";
+			launcher_argv[i++] = launcher;
+			if (launcher_args)
 			{
-				char *launcher_argv[100];
-				int i=3;
-
-				launcher_argv[0] = libtool;
-				launcher_argv[1] = "--mode=execute";
-				launcher_argv[2] = launcher;
-				if (launcher_args)
+				launcher_argv[i++] = strtok(launcher_args, " ");
+				while (launcher_argv[i-1])
 				{
-					launcher_argv[i] = strtok(launcher_args, " ");
-					while (launcher_argv[i])
-					{
-						i++;
-						launcher_argv[i] = strtok(NULL, " ");
-					}
+					launcher_argv[i++] = strtok(NULL, " ");
 				}
-				launcher_argv[i] = test_name;
-				launcher_argv[i+1] = test_args;
-				launcher_argv[i+2] = NULL;
-				execvp(*launcher_argv, launcher_argv);
-			}
-			else
-			{
-				execl(test_name, test_name, test_args, NULL);
 			}
 		}
-		else
-			execl(test_name, test_name, test_args, NULL);
+
+		launcher_argv[i++] = test_name;
+		if (test_args)
+			launcher_argv[i++] = test_args;
+		else while (argv[x])
+		{
+			launcher_argv[i++] = argv[x++];
+		}
+#ifdef STARPU_SIMGRID
+		launcher_argv[i++] = "--cfg=contexts/factory:thread";
+#endif
+		launcher_argv[i++] = NULL;
+		execvp(*launcher_argv, launcher_argv);
 
 		fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name);
 		exit(EXIT_FAILURE);

+ 7 - 0
tests/model-checking/starpu_barrier.c

@@ -73,6 +73,13 @@ _starpu_simgrid_thread_start(int argc, char *argv[])
 	return 0;
 }
 
+size_t _starpu_default_stack_size = 8192;
+
+void
+_starpu_simgrid_set_stack_size(size_t stack_size)
+{
+}
+
 static void _starpu_clock_gettime(struct timespec *ts)
 {
 #ifdef HAVE_SIMGRID_GET_CLOCK

+ 1 - 0
tools/Makefile.am

@@ -432,6 +432,7 @@ dist_bin_SCRIPTS +=				\
 	starpu_mlr_analysis			\
 	starpu_mlr_analysis.Rmd			\
 	starpu_paje_state_stats			\
+	starpu_paje_state_stats.R			\
 	starpu_send_recv_data_use.py 		\
 	starpu_trace_state_stats.py
 

+ 5 - 0
tools/gdbinit

@@ -75,6 +75,11 @@ define starpu-print-task
   if $task->cl && $task->cl->name
     printf "\tcodelet name:\t\t\t<%s>\n", $task->cl->name
   end
+  set $nbuffers = $task->nbuffers
+  if $task->cl && $task->cl->nbuffers != -1
+    set $nbuffers = $task->cl->nbuffers
+  end
+  printf "\tnbuffers:\t\t\t<%d>\n", $nbuffers
   printf "\tcallback:\t\t\t<%p>\n", $task->callback_func
   printf "\tsynchronous:\t\t\t<%d>\n", $task->synchronous
   printf "\texecute_on_a_specific_worker:\t<%d>\n", $task->execute_on_a_specific_worker

+ 12 - 4
tools/starpu_smpirun.in

@@ -17,7 +17,7 @@
 # Script for running starpu-mpi application in simgrid mode
 
 prefix=@prefix@
-SMPIRUN=@smpirun_path@
+SMPIRUN=@mpiexec_path@
 STARPU_DATADIR=@datarootdir@
 STARPU_XSLTDIR=$STARPU_DATADIR/starpu
 SOURCE_DATADIR=@abs_srcdir@
@@ -40,11 +40,19 @@ else
 	DASH=_
 fi
 
+EXTRA_OPT=""
+
 if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 16 \) ]
 then
-	PRIV_OPT="--cfg=smpi/privatization:yes"
+	EXTRA_OPT+=" --cfg=smpi/privatization:yes"
 else
-	PRIV_OPT="--cfg=smpi/privatize${DASH}global${DASH}variables:yes"
+	EXTRA_OPT+=" --cfg=smpi/privatize${DASH}global${DASH}variables:yes"
+fi
+
+if [ -n "$TEST_LOGS" ]
+then
+	# Testsuite, use our loader
+	WRAPPER="-wrapper $BUILDDIR/../tests/loader"
 fi
 
 # When executed from source, take xslt from source
@@ -125,7 +133,7 @@ EOF
 
 STACKSIZE=$(ulimit -s)
 [ "$STACKSIZE" != unlimited ] || STACKSIZE=8192
-$SMPIRUN $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $PRIV_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE
+$SMPIRUN $WRAPPER $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $EXTRA_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE
 RET=$?
 
 rm -f $PLATFORM

+ 3 - 3
tools/starpu_trace_state_stats.py

@@ -74,9 +74,9 @@ class Worker():
             return # Will look later to find a PopState event.
         elif curr_event._type == "PopState":
             if len(self._stack) == 0:
-                sys.exit("ERROR: The trace is most likely corrupted "
-                         "because a PopState event has been found without "
-                         "a PushState!")
+                print("warning: PopState without a PushState, probably a trace with start/stop profiling")
+                self._current_state = None
+                return
             next_event = curr_event
             curr_event = self._stack.pop()
         elif curr_event._type == "SetState":