Browse Source

Add STARPU_DISPLAY_BINDINGS env var to display all bindings

Philippe SWARTVAGHER 5 years ago
parent
commit
e1bffe77f9

+ 3 - 0
ChangeLog

@@ -60,6 +60,9 @@ Small features:
   * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the
     exponential backoff limits of the number of cycles to pause while drivers
     are spinning.
+  * Add STARPU_DISPLAY_BINDINGS environment variable and
+    starpu_display_bindings() function to display all bindings on the machine by
+    calling hwloc-ps
 
 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
 ====================================================================

+ 9 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -1366,6 +1366,15 @@ application has crashed. Setting this variable to a value other than 1
 will disable this behaviour. This should be done on JVM systems which
 may use these signals for their own needs.
 The flag can also be set through the field starpu_conf::catch_signals.
+</dd>
+
+<dt>STARPU_DISPLAY_BINDINGS</dt>
+<dd>
+\anchor STARPU_DISPLAY_BINDINGS
+\addindex __env__STARPU_DISPLAY_BINDINGS
+Display the binding of all processes and threads running on the machine. If MPI is enabled, display the binding of each node.<br>
+Users can manually display the binding by calling starpu_display_bindings().
+</dd>
 </dl>
 
 \section ConfiguringTheHypervisor Configuring The Hypervisor

+ 6 - 0
include/starpu.h

@@ -111,6 +111,12 @@ struct starpu_conf
 	int magic;
 
 	/**
+	   @private
+	   Tell starpu_init() if MPI will be initialized later.
+	*/
+	int will_use_mpi;
+
+	/**
 	   Name of the scheduling policy. This can also be specified
 	   with the environment variable \ref STARPU_SCHED. (default =
 	   <c>NULL</c>).

+ 8 - 0
include/starpu_helper.h

@@ -182,6 +182,14 @@ double starpu_timing_now(void);
 */
 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
 
+/**
+   Call hwloc-ps to display binding of each processus and thread running on
+   the machine.<br>
+   Use the environment variable \ref STARPU_DISPLAY_BINDINGS to automatically
+   call this function at the beginning of the execution of StarPU.
+*/
+void starpu_display_bindings(void);
+
 /** @} */
 
 #ifdef __cplusplus

+ 34 - 1
mpi/src/starpu_mpi_init.c

@@ -134,7 +134,38 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm
 	_starpu_mpi_do_initialize(argc_argv);
 #endif
 
-	return _mpi_backend._starpu_mpi_backend_progress_init(argc_argv);
+	int ret = _mpi_backend._starpu_mpi_backend_progress_init(argc_argv);
+
+	if (starpu_get_env_number_default("STARPU_DISPLAY_BINDINGS", 0))
+	{
+		int rank, size, i;
+		char hostname[65];
+
+		starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+		starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+		gethostname(hostname, sizeof(hostname));
+
+		/* We make a barrier between each node calling hwloc-ps, to avoid mixing
+		 * outputs in stdout. */
+		for (i = 0; i < size; i++)
+		{
+			starpu_mpi_barrier(MPI_COMM_WORLD);
+			if (rank == i)
+			{
+				fprintf(stdout, "== Binding for rank %d on node %s ==\n", rank, hostname);
+				starpu_display_bindings();
+				fflush(stdout);
+			}
+		}
+		starpu_mpi_barrier(MPI_COMM_WORLD);
+		if (rank == 0)
+		{
+			fprintf(stdout, "== End of bindings ==\n");
+			fflush(stdout);
+		}
+	}
+
+	return ret;
 }
 
 #ifdef STARPU_SIMGRID
@@ -215,6 +246,8 @@ int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm c
 			conf->reserve_ncpus++;
 	}
 
+	conf->will_use_mpi = 1;
+
 	int ret = starpu_init(conf);
 	if (ret < 0)
 		return ret;

+ 4 - 2
mpi/tests/Makefile.am

@@ -143,7 +143,8 @@ starpu_mpi_TESTS +=				\
 	user_defined_datatype			\
 	early_stuff				\
 	sendrecv_bench				\
-	burst
+	burst						\
+	display_bindings
 
 if !STARPU_USE_MPI_MPI
 starpu_mpi_TESTS +=				\
@@ -246,7 +247,8 @@ noinst_PROGRAMS +=				\
 	sendrecv_bench				\
 	sendrecv_parallel_tasks_bench		\
 	burst					\
-	nothing
+	nothing							\
+	display_bindings
 
 if !STARPU_NO_BLAS_LIB
 noinst_PROGRAMS +=				\

+ 44 - 0
mpi/tests/display_bindings.c

@@ -0,0 +1,44 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_mpi.h>
+#include <stdlib.h>
+#include "helper.h"
+
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(void)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+int main(int argc, char **argv)
+{
+	int ret;
+	setenv("STARPU_DISPLAY_BINDINGS", "1", 1);
+
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
+
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
+
+	starpu_mpi_shutdown();
+	MPI_Finalize();
+
+	return EXIT_SUCCESS;
+}
+#endif

+ 15 - 0
src/common/utils.c

@@ -740,3 +740,18 @@ int starpu_get_env_size_default(const char *str, int defval)
 	}
 	return val;
 }
+
+void starpu_display_bindings(void)
+{
+#ifdef STARPU_HAVE_HWLOC
+	int hwloc_ret = system("hwloc-ps -a -t -c");
+	if (hwloc_ret)
+	{
+		_STARPU_DISP("hwloc-ps returned %d\n", hwloc_ret);
+		fflush(stderr);
+	}
+	fflush(stdout);
+#else
+	_STARPU_DISP("hwloc not available to display bindings.\n");
+#endif
+}

+ 10 - 0
src/core/workers.c

@@ -1059,6 +1059,7 @@ int starpu_conf_init(struct starpu_conf *conf)
 
 	memset(conf, 0, sizeof(*conf));
 	conf->magic = 42;
+	conf->will_use_mpi = 0;
 	conf->sched_policy_name = starpu_getenv("STARPU_SCHED");
 	conf->sched_policy = NULL;
 	conf->global_sched_ctx_min_priority = starpu_get_env_number("STARPU_MIN_PRIO");
@@ -1666,6 +1667,15 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
 	_starpu_catch_signals();
 
+	/* if MPI is enabled, binding display will be done later, after MPI initialization */
+	if (!_starpu_config.conf.will_use_mpi && starpu_get_env_number_default("STARPU_DISPLAY_BINDINGS", 0))
+	{
+		fprintf(stdout, "== Binding ==\n");
+		starpu_display_bindings();
+		fprintf(stdout, "== End of binding ==\n");
+		fflush(stdout);
+	}
+
 	return 0;
 }
 

+ 1 - 0
tests/Makefile.am

@@ -145,6 +145,7 @@ myPROGRAMS =
 myPROGRAMS +=					\
 	main/bind				\
 	main/mkdtemp				\
+	main/display_binding			\
 	main/execute_schedule			\
 	main/insert_task_pack			\
 	main/insert_task_nullcodelet		\

+ 40 - 0
tests/main/display_binding.c

@@ -0,0 +1,40 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include "../helper.h"
+
+
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(void)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+int main(void)
+{
+	setenv("STARPU_DISPLAY_BINDINGS", "1", 1);
+
+	int ret = starpu_init(NULL);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	return EXIT_SUCCESS;
+}
+#endif