Browse Source

Add STARPU_WORKERS_GETBIND environment variable

Samuel Thibault 5 years ago
parent
commit
cb1736a5b3

+ 1 - 0
ChangeLog

@@ -49,6 +49,7 @@ Small features:
   * New function starpu_task_watchdog_set_hook to specify a function
     to be called when the watchdog is raised
   * Add STARPU_LIMIT_CPU_NUMA_MEM environment variable.
+  * Add STARPU_WORKERS_GETBIND environment variable.
 
 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
 ====================================================================

+ 5 - 0
doc/doxygen/chapters/410_mpi_support.doxy

@@ -25,6 +25,11 @@ GPU-RAM-NIC transfers are handled efficiently by StarPU-MPI.  The user has to
 use the usual <c>mpirun</c> command of the MPI implementation to start StarPU on
 the different MPI nodes.
 
+In case the user wants to run several MPI processes by machine (e.g. one per
+NUMA node), \ref STARPU_WORKERS_GETBIND should be used to make StarPU take into
+account the binding set by the MPI launcher (otherwise each StarPU instance
+would try to bind on all cores of the machine...)
+
 An MPI Insert Task function provides an even more seamless transition to a
 distributed application, by automatically issuing all required data transfers
 according to the task graph and an application-provided distribution.

+ 10 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -184,6 +184,16 @@ Setting it to non-zero will prevent StarPU from binding its threads to
 CPUs. This is for instance useful when running the testsuite in parallel.
 </dd>
 
+<dt>STARPU_WORKERS_GETBIND</dt>
+<dd>
+\anchor STARPU_WORKERS_GETBIND
+\addindex __env__STARPU_WORKERS_GETBIND
+Setting it to non-zero makes StarPU use the OS-provided CPU binding to determine
+how many and which CPU cores it should use. This is notably useful when running
+several StarPU-MPI processes on the same host, to let the MPI launcher set the
+CPUs to be used.
+</dd>
+
 <dt>STARPU_WORKERS_CPUID</dt>
 <dd>
 \anchor STARPU_WORKERS_CPUID

+ 19 - 0
src/core/topology.c

@@ -806,8 +806,27 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 		int err = hwloc_topology_set_xml(topology->hwtopology, hwloc_input);
 		if (err < 0) _STARPU_DISP("Could not load hwloc input %s\n", hwloc_input);
 	}
+
 	_starpu_topology_filter(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
+
+	if (starpu_get_env_number_default("STARPU_WORKERS_GETBIND", 0))
+	{
+		/* Respect the existing binding */
+		hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
+
+		int ret = hwloc_get_cpubind(topology->hwtopology, cpuset, HWLOC_CPUBIND_THREAD);
+		if (ret)
+			_STARPU_DISP("Warning: could not get current CPU binding: %s\n", strerror(errno));
+		else
+		{
+			ret = hwloc_topology_restrict(topology->hwtopology, cpuset, 0);
+			if (ret)
+				_STARPU_DISP("Warning: could not restrict hwloc to cpuset: %s\n", strerror(errno));
+		}
+		hwloc_bitmap_free(cpuset);
+	}
+
 	_starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
 #endif
 #endif