5 yıl önce · 07242e4ec3
--- a/configure.ac
+++ b/configure.ac
@@ -1848,9 +1848,6 @@ AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
 
				 			nmaxmicthreads=$enableval, nmaxmicthreads=120)
			
 
				 AC_MSG_RESULT($nmaxmicthread)
			
 
				 
			
 
				-AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
			
 
				-	[maximum number of MIC cores])
			
 
				-
			
 
				 AC_ARG_WITH(coi-dir,
			
 
				 	[AS_HELP_STRING([--with-coi-dir=<path>],
			
 
				 	[specify the MIC's COI installation directory])],
			
@@ -2487,6 +2484,23 @@ nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev +  $n
 
				 AC_MSG_CHECKING(Maximum number of workers)
			
 
				 AC_MSG_RESULT($nmaxworkers)
			
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
			
 
				+nmaxdevs=0
			
 
				+if test $nmaxdevs -lt $nmaxcudadev; then
			
 
				+	nmaxdevs=$nmaxcudadev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxopencldev; then
			
 
				+	nmaxdevs=$nmaxopencldev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxfpgadev; then
			
 
				+	nmaxdevs=$nmaxfpgadev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxmicdev; then
			
 
				+	nmaxdevs=$nmaxmicdev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxmpidev; then
			
 
				+	nmaxdevs=$nmaxmpidev
			
 
				+fi
			
 
				+AC_DEFINE_UNQUOTED(STARPU_NMAXDEVS, [$nmaxdevs], [Maximum number of device per device arch])
			
 
				 
			
 
				 # Computes the maximun number of combined worker
			
 
				 nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,6 +20,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
			
--- a/examples/fortran90/f90_example.f90
+++ b/examples/fortran90/f90_example.f90
@@ -33,8 +33,7 @@ PROGRAM f90_example
 
				   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
			
 
				   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
			
 
				   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
			
 
				-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
			
 
				-  REAL(KIND=C_DOUBLE)            :: r, coeff2
			
 
				+  INTEGER(KIND=C_INT)            :: it,it_tot
			
 
				 
			
 
				   !Initialization with arbitrary data
			
 
				   Nelt           = 2
			
--- a/examples/native_fortran/nf_example.f90
+++ b/examples/native_fortran/nf_example.f90
@@ -32,8 +32,7 @@ PROGRAM f90_example
 
				   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
			
 
				   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
			
 
				   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
			
 
				-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
			
 
				-  REAL(KIND=C_DOUBLE)            :: r, coeff2
			
 
				+  INTEGER(KIND=C_INT)            :: it,it_tot
			
 
				   REAL(KIND=C_DOUBLE),TARGET     :: flops
			
 
				 
			
 
				   TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -37,7 +37,7 @@
 
				 #include <starpu.h>
			
 
				 
			
 
				 /* some key to identify the worker kind */
			
 
				-#define _STARPU_FUT_WORKER_KEY(kind) (kind - 0x100)
			
 
				+#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100)
			
 
				 #define _STARPU_FUT_KEY_WORKER(key) (key - 0x100)
			
 
				 
			
 
				 #define _STARPU_FUT_WORKER_INIT_START	0x5100
			
--- a/src/core/detect_combined_workers.c
+++ b/src/core/detect_combined_workers.c
@@ -252,7 +252,7 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 
				 	int cpu_workers[STARPU_NMAXWORKERS];
			
 
				 	unsigned ncpus = 0;
			
 
				 #ifdef STARPU_USE_MIC
			
 
				-	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
			
 
				+	unsigned nb_mics = _starpu_get_machine_config()->topology.ndevices[STARPU_MIC_WORKER];
			
 
				 	unsigned * nmics_table;
			
 
				 	int * mic_id;
			
 
				 	int ** mic_workers;
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -3058,7 +3058,7 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
				 	int busid = starpu_bus_get_id(src_node, dst_node);
			
 
				 	int direct = starpu_bus_get_direct(busid);
			
 
				 #endif
			
 
				-	float ngpus = topology->ncudagpus+topology->nopenclgpus;
			
 
				+	float ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning FIXME: ngpus should not be used e.g. for slow disk transfers...
			
 
				 #endif
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1217,22 +1217,22 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	starpu_perfmodel_initialize();
			
 
				 
			
 
				 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	unsigned ncores = conf->topology.nhwcpus;
			
 
				-	unsigned ncuda =  conf->topology.nhwcudagpus;
			
 
				-	unsigned nopencl = conf->topology.nhwopenclgpus;
			
 
				+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				 	unsigned nmic = 0;
			
 
				 	enum starpu_worker_archtype archtype;
			
 
				 #if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				 	unsigned i;
			
 
				 #endif
			
 
				 #if STARPU_MAXMICDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwmicdevices; i++)
			
 
				-		nmic += conf->topology.nhwmiccores[i];
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				 #endif
			
 
				 	unsigned nmpi = 0;
			
 
				 #if STARPU_MAXMPIDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwmpidevices; i++)
			
 
				-		nmpi += conf->topology.nhwmpicores[i];
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				 #endif
			
 
				 
			
 
				 	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -560,13 +560,13 @@ static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config
 
				 					    &(config->current_cuda_gpuid),
			
 
				 					    (int *)topology->workers_cuda_gpuid,
			
 
				 					    "STARPU_WORKERS_CUDAID",
			
 
				-					    topology->nhwcudagpus,
			
 
				+					    topology->nhwdevices[STARPU_CUDA_WORKER],
			
 
				 					    STARPU_CUDA_WORKER);
			
 
				 }
			
 
				 
			
 
				 static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
			
 
				+	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ndevices[STARPU_CUDA_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_cuda_gpuid[i];
			
 
				 }
			
@@ -584,7 +584,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 					    &(config->current_opencl_gpuid),
			
 
				 					    (int *)topology->workers_opencl_gpuid,
			
 
				 					    "STARPU_WORKERS_OPENCLID",
			
 
				-					    topology->nhwopenclgpus,
			
 
				+					    topology->nhwdevices[STARPU_OPENCL_WORKER],
			
 
				 					    STARPU_OPENCL_WORKER);
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
@@ -647,7 +647,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 
			
 
				 static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
			
 
				+	unsigned i = ((config->current_opencl_gpuid++) % config->topology.ndevices[STARPU_OPENCL_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_opencl_gpuid[i];
			
 
				 }
			
@@ -665,13 +665,13 @@ static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_conf
 
				 					    &(config->current_fpga_deviceid),
			
 
				 					    (int *)topology->workers_fpga_deviceid,
			
 
				 					    "STARPU_WORKERS_FPGAID",
			
 
				-					    topology->nhwfpgafpgas,
			
 
				+					    topology->nhwdevices[STARPU_FPGA_WORKER],
			
 
				 					    STARPU_FPGA_WORKER);
			
 
				 }
			
 
				 
			
 
				 static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_fpga_deviceid++) % config->topology.nfpgafpgas);
			
 
				+	unsigned i = ((config->current_fpga_deviceid++) % config->topology.ndevices[STARPU_FPGA_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_fpga_deviceid[i];
			
 
				 }
			
@@ -690,7 +690,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 
				 					    &(config->current_mic_deviceid),
			
 
				 					    (int *)topology->workers_mic_deviceid,
			
 
				 					    "STARPU_WORKERS_MICID",
			
 
				-					    topology->nhwmiccores,
			
 
				+					    topology->nhwdevices[STARPU_MIC_WORKER],
			
 
				 					    STARPU_MIC_WORKER);
			
 
				 }
			
 
				 #endif
			
@@ -700,7 +700,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 
				 #ifdef STARPU_USE_MIC
			
 
				 static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
			
 
				+	unsigned i = ((config->current_mic_deviceid++) % config->topology.ndevices[STARPU_MIC_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_mic_deviceid[i];
			
 
				 }
			
@@ -710,7 +710,7 @@ static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *c
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_mpi_deviceid++) % config->topology.nmpidevices);
			
 
				+	unsigned i = ((config->current_mpi_deviceid++) % config->topology.ndevices[STARPU_MPI_MS_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_mpi_ms_deviceid[i];
			
 
				 }
			
@@ -719,14 +719,14 @@ static void _starpu_init_mpi_topology(struct _starpu_machine_config *config, lon
 
				 {
			
 
				 	/* Discover the topology of the mpi node identifier by MPI_IDX. That
			
 
				 	 * means, make this StarPU instance aware of the number of cores available
			
 
				-	 * on this MPI device. Update the `nhwmpicores' topology field
			
 
				+	 * on this MPI device. Update the `nhwworker[STARPU_MPI_MS_WORKER]' topology field
			
 
				 	 * accordingly. */
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				 	int nbcores;
			
 
				 	_starpu_src_common_sink_nbcores(_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
			
 
				-	topology->nhwmpicores[mpi_idx] = nbcores;
			
 
				+	topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nbcores;
			
 
				 }
			
 
				 
			
 
				 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
			
@@ -736,14 +736,14 @@ static void _starpu_init_mic_topology(struct _starpu_machine_config *config, lon
 
				 {
			
 
				 	/* Discover the topology of the mic node identifier by MIC_IDX. That
			
 
				 	 * means, make this StarPU instance aware of the number of cores available
			
 
				-	 * on this MIC device. Update the `nhwmiccores' topology field
			
 
				+	 * on this MIC device. Update the `nhwworker[STARPU_MIC_WORKER]' topology field
			
 
				 	 * accordingly. */
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				 	int nbcores;
			
 
				 	_starpu_src_common_sink_nbcores(_starpu_mic_nodes[mic_idx], &nbcores);
			
 
				-	topology->nhwmiccores[mic_idx] = nbcores;
			
 
				+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = nbcores;
			
 
				 }
			
 
				 
			
 
				 static int _starpu_init_mic_node(struct _starpu_machine_config *config, int mic_idx,
			
@@ -862,7 +862,8 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 
			
 
				 	nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
			
 
				 
			
 
				-	topology->nhwcpus = 0;
			
 
				+	topology->nhwdevices[STARPU_CPU_WORKER] = 1;
			
 
				+	topology->nhwworker[STARPU_CPU_WORKER][0] = 0;
			
 
				 	topology->nhwpus = 0;
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -906,7 +907,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
			
 
				 #elif defined(STARPU_HAVE_HWLOC)
			
 
				 	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
			
 
				 	 * accordingly. */
			
@@ -926,24 +927,24 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 							 HWLOC_OBJ_PU);
			
 
				 	}
			
 
				 
			
 
				-	topology->nhwcpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
			
 
				+	topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
			
 
				 	topology->nhwpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth);
			
 
				 
			
 
				 #elif defined(HAVE_SYSCONF)
			
 
				 	/* Discover the CPUs relying on the sysconf(3) function and fills
			
 
				 	 * CONFIG accordingly. */
			
 
				 
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
			
 
				 
			
 
				 #elif defined(_WIN32)
			
 
				 	/* Discover the CPUs on Cygwin and MinGW systems. */
			
 
				 
			
 
				 	SYSTEM_INFO sysinfo;
			
 
				 	GetSystemInfo(&sysinfo);
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
			
 
				 #else
			
 
				 #warning no way to know number of cores, assuming 1
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = 1;
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = 1;
			
 
				 #endif
			
 
				 
			
 
				 	if (config->conf.ncuda != 0)
			
@@ -953,7 +954,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				         if (config->conf.nfpga != 0)
			
 
				 		_starpu_fpga_discover_devices(config);
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-        config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
			
 
				+        config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count();
			
 
				 #endif
			
 
				 
			
 
				 	topology_is_initialized = 1;
			
@@ -968,7 +969,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 	unsigned i;
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				-	int nhyperthreads = topology->nhwpus / topology->nhwcpus;
			
 
				+	int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 	unsigned bind_on_core = 0;
			
 
				 	int scale = 1;
			
 
				 
			
@@ -1032,7 +1033,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 						}
			
 
				 						else
			
 
				 						{
			
 
				-							endval = (bind_on_core ? topology->nhwcpus : topology->nhwpus) - 1;
			
 
				+							endval = (bind_on_core ? topology->nhwworker[STARPU_CPU_WORKER][0] : topology->nhwpus) - 1;
			
 
				 							if (*strval)
			
 
				 								strval++;
			
 
				 						}
			
@@ -1136,7 +1137,7 @@ static inline unsigned _starpu_get_next_bindid(struct _starpu_machine_config *co
 
				 	STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?");
			
 
				 
			
 
				 	unsigned current_preferred;
			
 
				-	unsigned nhyperthreads = topology->nhwpus / topology->nhwcpus;
			
 
				+	unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 	unsigned ncores = topology->nhwpus / nhyperthreads;
			
 
				 	unsigned i;
			
 
				 
			
@@ -1219,7 +1220,7 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 	_starpu_init_topology(config);
			
 
				 
			
 
				-	return config->topology.nhwcpus;
			
 
				+	return config->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
 
				 
			
 
				 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
			
@@ -1304,7 +1305,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-	topology->nhwmiccores[mic_idx] = 0;
			
 
				+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = 0;
			
 
				 
			
 
				 	_starpu_init_mic_topology(config, mic_idx);
			
 
				 
			
@@ -1316,29 +1317,29 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 	{
			
 
				 		/* Nothing was specified, so let's use the number of
			
 
				 		 * detected mic cores. ! */
			
 
				-		nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+		nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
			
 
				+		if ((unsigned) nmiccores > topology->nhwworker[STARPU_MIC_WORKER][mic_idx])
			
 
				 		{
			
 
				 			/* The user requires more MIC cores than there is available */
			
 
				-			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
			
 
				-			nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwworker[STARPU_MIC_WORKER][mic_idx]);
			
 
				+			nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nmiccores[mic_idx] = nmiccores;
			
 
				-	STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				-			  "topology->nmiccores[mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				-			  mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				+	topology->nworker[STARPU_MIC_WORKER][mic_idx] = nmiccores;
			
 
				+	STARPU_ASSERT_MSG(topology->nworker[STARPU_MIC_WORKER][mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				+			  "topology->nworker[STARPU_MIC_WORKER][mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				+			  mic_idx, topology->nworker[STARPU_MIC_WORKER][mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	/* _starpu_initialize_workers_mic_deviceid (config); */
			
 
				 
			
 
				 	mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
			
 
				-	mic_worker_set[mic_idx].nworkers = topology->nmiccores[mic_idx];
			
 
				+	mic_worker_set[mic_idx].nworkers = topology->nworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 	unsigned miccore_id;
			
 
				-	for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
			
 
				+	for (miccore_id = 0; miccore_id < topology->nworker[STARPU_MIC_WORKER][mic_idx]; miccore_id++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + miccore_id;
			
 
				 		config->workers[worker_idx].set = &mic_worker_set[mic_idx];
			
@@ -1355,7 +1356,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 	}
			
 
				 	_starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
			
 
				 
			
 
				-	topology->nworkers += topology->nmiccores[mic_idx];
			
 
				+	topology->nworkers += topology->nworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 }
			
 
				 
			
 
				 static COIENGINE mic_handles[STARPU_MAXMICDEVS];
			
@@ -1369,7 +1370,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				 {
			
 
				         struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-        topology->nhwmpicores[mpi_idx] = 0;
			
 
				+        topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = 0;
			
 
				 
			
 
				         _starpu_init_mpi_topology(config, mpi_idx);
			
 
				 
			
@@ -1380,28 +1381,28 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				         {
			
 
				                 /* Nothing was specified, so let's use the number of
			
 
				                  * detected mpi cores. ! */
			
 
				-                nmpicores = topology->nhwmpicores[mpi_idx];
			
 
				+                nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				         }
			
 
				         else
			
 
				         {
			
 
				-                if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
			
 
				+                if ((unsigned) nmpicores > topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx])
			
 
				                 {
			
 
				                         /* The user requires more MPI cores than there is available */
			
 
				                         _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
			
 
				-				    nmpicores, topology->nhwmpicores[mpi_idx]);
			
 
				-                        nmpicores = topology->nhwmpicores[mpi_idx];
			
 
				+				    nmpicores, topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx]);
			
 
				+                        nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				                 }
			
 
				         }
			
 
				 
			
 
				-        topology->nmpicores[mpi_idx] = nmpicores;
			
 
				-        STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				-                        "topology->nmpicores[mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				-                        mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				+        topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] = nmpicores;
			
 
				+        STARPU_ASSERT_MSG(topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				+                        "topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				+                        mpi_idx, topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				 
			
 
				         mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
			
 
				-        mpi_worker_set[mpi_idx].nworkers = topology->nmpicores[mpi_idx];
			
 
				+        mpi_worker_set[mpi_idx].nworkers = topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				         unsigned mpicore_id;
			
 
				-        for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
			
 
				+        for (mpicore_id = 0; mpicore_id < topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx]; mpicore_id++)
			
 
				         {
			
 
				                 int worker_idx = topology->nworkers + mpicore_id;
			
 
				                 config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
			
@@ -1418,7 +1419,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				         }
			
 
				 	_starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
			
 
				 
			
 
				-        topology->nworkers += topology->nmpicores[mpi_idx];
			
 
				+        topology->nworkers += topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1461,13 +1462,13 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		topology->nmicdevices = 0;
			
 
				+		topology->ndevices[STARPU_MIC_WORKER] = 0;
			
 
				 		unsigned i;
			
 
				 		for (i = 0; i < (unsigned) reqmicdevices; i++)
			
 
				 			if (0 == _starpu_init_mic_node(config, i, &mic_handles[i], &_starpu_mic_process[i]))
			
 
				-				topology->nmicdevices++;
			
 
				+				topology->ndevices[STARPU_MIC_WORKER]++;
			
 
				 
			
 
				-		for (i = 0; i < topology->nmicdevices; i++)
			
 
				+		for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
			
 
				 			_starpu_init_mic_config(config, user_conf, i);
			
 
				 	}
			
 
				 #endif
			
@@ -1498,10 +1499,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		topology->nmpidevices = reqmpidevices;
			
 
				+		topology->ndevices[STARPU_MPI_MS_WORKER] = reqmpidevices;
			
 
				 
			
 
				 		/* if user don't want to use MPI slaves, we close the slave processes */
			
 
				-		if (no_mp_config && topology->nmpidevices == 0)
			
 
				+		if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0)
			
 
				 		{
			
 
				 			_starpu_mpi_common_mp_deinit();
			
 
				 			exit(0);
			
@@ -1510,10 +1511,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 		if (!no_mp_config)
			
 
				 		{
			
 
				 			unsigned i;
			
 
				-			for (i = 0; i < topology->nmpidevices; i++)
			
 
				+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 				_starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
			
 
				 
			
 
				-			for (i = 0; i < topology->nmpidevices; i++)
			
 
				+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 				_starpu_init_mpi_config(config, user_conf, i);
			
 
				 		}
			
 
				 	}
			
@@ -1549,12 +1550,12 @@ static void _starpu_deinit_mp_config(struct _starpu_machine_config *config)
 
				 	unsigned i;
			
 
				 
			
 
				 #ifdef STARPU_USE_MIC
			
 
				-	for (i = 0; i < topology->nmicdevices; i++)
			
 
				+	for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
			
 
				 		_starpu_deinit_mic_node(i);
			
 
				 	_starpu_mic_clear_kernels();
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-	for (i = 0; i < topology->nmpidevices; i++)
			
 
				+	for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 		_starpu_deinit_mpi_node(i);
			
 
				 #endif
			
 
				 }
			
@@ -1664,9 +1665,10 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 	}
			
 
				 
			
 
				 	/* Now we know how many CUDA devices will be used */
			
 
				-	topology->ncudagpus = ncuda;
			
 
				-	topology->nworkerpercuda = nworker_per_cuda;
			
 
				-	STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
			
 
				+	topology->ndevices[STARPU_CUDA_WORKER] = ncuda;
			
 
				+	for (i = 0; i < ncuda; i++)
			
 
				+		topology->nworker[STARPU_CUDA_WORKER][i] = nworker_per_cuda;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_CUDA_WORKER] <= STARPU_MAXCUDADEVS);
			
 
				 
			
 
				 	_starpu_initialize_workers_cuda_gpuid(config);
			
 
				 
			
@@ -1693,11 +1695,11 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 	if (!topology->cuda_th_per_dev)
			
 
				 	{
			
 
				 		cuda_worker_set[0].workers = &config->workers[topology->nworkers];
			
 
				-		cuda_worker_set[0].nworkers = topology->ncudagpus * nworker_per_cuda;
			
 
				+		cuda_worker_set[0].nworkers = topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
			
 
				 	}
			
 
				 
			
 
				 	unsigned cudagpu;
			
 
				-	for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
			
 
				+	for (cudagpu = 0; cudagpu < topology->ndevices[STARPU_CUDA_WORKER]; cudagpu++)
			
 
				 	{
			
 
				 		int devid = _starpu_get_next_cuda_gpuid(config);
			
 
				 		int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
			
@@ -1769,7 +1771,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 #endif
			
 
				         }
			
 
				 
			
 
				-	topology->nworkers += topology->ncudagpus * nworker_per_cuda;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
			
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
@@ -1813,20 +1815,22 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nopenclgpus = nopencl;
			
 
				-	STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
			
 
				+	for (i = 0; i < nopencl; i++)
			
 
				+		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	_starpu_initialize_workers_opencl_gpuid(config);
			
 
				 
			
 
				 	unsigned openclgpu;
			
 
				-	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
			
 
				+	for (openclgpu = 0; openclgpu < topology->ndevices[STARPU_OPENCL_WORKER]; openclgpu++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + openclgpu;
			
 
				 		int devid = _starpu_get_next_opencl_gpuid(config);
			
 
				 		if (devid == -1)
			
 
				 		{
			
 
				 			// There is no more devices left
			
 
				-			topology->nopenclgpus = openclgpu;
			
 
				+			topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu;
			
 
				 			break;
			
 
				 		}
			
 
				 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
			
@@ -1841,7 +1845,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->nopenclgpus;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 #endif
			
 
				 
			
 
				 
			
@@ -1884,19 +1888,21 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nfpgafpgas = nfpga;
			
 
				-	STARPU_ASSERT(topology->nfpgafpgas + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_FPGA_WORKER] = nfpga;
			
 
				+	for (i = 0; i < nfpga; i++)
			
 
				+		topology->nworker[STARPU_FPGA_WORKER][i] = 1;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_FPGA_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	_starpu_initialize_workers_fpga_deviceid(config);
			
 
				 
			
 
				 	unsigned fpgafpga;
			
 
				-	for (fpgafpga = 0; fpgafpga < topology->nfpgafpgas; fpgafpga++)
			
 
				+	for (fpgafpga = 0; fpgafpga < topology->ndevices[STARPU_FPGA_WORKER]; fpgafpga++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + fpgafpga;
			
 
				 		int devid = _starpu_get_next_fpga_deviceid(config);
			
 
				 		if (devid == -1)
			
 
				 		{ // There is no more devices left
			
 
				-			topology->nfpgafpgas = fpgafpga;
			
 
				+			topology->ndevices[STARPU_FPGA_WORKER] = fpgafpga;
			
 
				 			break;
			
 
				 		}
			
 
				 		config->workers[worker_idx].arch = STARPU_FPGA_WORKER;
			
@@ -1911,7 +1917,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_FPGA;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->nfpgafpgas;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_FPGA_WORKER];
			
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
@@ -1931,13 +1937,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 			unsigned mic_busy_cpus = 0;
			
 
				 			int j = 0;
			
 
				 			for (j = 0; j < STARPU_MAXMICDEVS; j++)
			
 
				-				mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
			
 
				+				mic_busy_cpus += (topology->nworker[STARPU_MIC_WORKER][j] ? 1 : 0);
			
 
				 
			
 
				 			unsigned mpi_ms_busy_cpus = 0;
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
			
 
				 			for (j = 0; j < STARPU_MAXMPIDEVS; j++)
			
 
				-				mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
			
 
				+				mpi_ms_busy_cpus += (topology->nworker[STARPU_MPI_MS_WORKER][j] ? 1 : 0);
			
 
				 #else
			
 
				 			mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
			
 
				 #endif
			
@@ -1945,15 +1951,15 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 			unsigned cuda_busy_cpus = 0;
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 			cuda_busy_cpus =
			
 
				-				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ncudagpus ? 1 : 0) :
			
 
				-				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ncudagpus) : topology->ncudagpus;
			
 
				+				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ndevices[STARPU_CUDA_WORKER] ? 1 : 0) :
			
 
				+				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ndevices[STARPU_CUDA_WORKER]) : topology->ndevices[STARPU_CUDA_WORKER];
			
 
				 #endif
			
 
				 			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
			
 
				 				+ cuda_busy_cpus
			
 
				-				+ topology->nopenclgpus
			
 
				-				+ topology->nfpgafpgas;
			
 
				+				+ topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				+				+ topology->ndevices[STARPU_FPGA_WORKER];
			
 
				 
			
 
				-			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
			
 
				+			long avail_cpus = (long) topology->nhwworker[STARPU_CPU_WORKER][0] - (long) already_busy_cpus;
			
 
				 			if (avail_cpus < 0)
			
 
				 				avail_cpus = 0;
			
 
				 			int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
			
@@ -1983,12 +1989,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 
			
 
				 	}
			
 
				 
			
 
				-	topology->ncpus = ncpu;
			
 
				-	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_CPU_WORKER] = 1;
			
 
				+	topology->nworker[STARPU_CPU_WORKER][0] = ncpu;
			
 
				+	STARPU_ASSERT(topology->nworker[STARPU_CPU_WORKER][0] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	unsigned cpu;
			
 
				 	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
			
 
				-	for (cpu = 0; cpu < topology->ncpus; cpu++)
			
 
				+	for (cpu = 0; cpu < topology->nworker[STARPU_CPU_WORKER][0]; cpu++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + cpu;
			
 
				 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
			
@@ -2003,7 +2010,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_CPU;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->ncpus;
			
 
				+	topology->nworkers += topology->nworker[STARPU_CPU_WORKER][0];
			
 
				 #endif
			
 
				 
			
 
				 	if (topology->nworkers == 0)
			
@@ -2136,7 +2143,7 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
				 
			
 
				 			if (workerid >= 0)
			
 
				 				/* This shouldn't happen for workers */
			
 
				-				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwcpus, config->topology.nhwpus);
			
 
				+				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus);
			
 
				 			ret = -1;
			
 
				 		}
			
 
				 		else
			
@@ -2426,7 +2433,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
			
 
				-		for (i = 0; i < config->topology.ncudagpus; i++)
			
 
				+		for (i = 0; i < config->topology.ndevices[STARPU_CUDA_WORKER]; i++)
			
 
				 		{
			
 
				 			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
			
 
				 			if (obj)
			
@@ -2461,7 +2468,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 		}
			
 
				 #endif
			
 
				 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
			
 
				-		if (config->topology.nopenclgpus > 0)
			
 
				+		if (config->topology.ndevices[STARPU_OPENCL_WORKER] > 0)
			
 
				 		{
			
 
				 			cl_int err;
			
 
				 			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
@@ -3180,7 +3187,7 @@ void starpu_topology_print(FILE *output)
 
				 	unsigned worker;
			
 
				 	unsigned nworkers = starpu_worker_get_count();
			
 
				 	unsigned ncombinedworkers = topology->ncombinedworkers;
			
 
				-	unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
			
 
				+	unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	hwloc_topology_t topo = topology->hwtopology;
			
@@ -3269,5 +3276,5 @@ unsigned _starpu_get_nhyperthreads()
 
				 {
			
 
				 	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 
			
 
				-	return config->topology.nhwpus / config->topology.nhwcpus;
			
 
				+	return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1036,7 +1036,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 	}
			
 
				 
			
 
				 #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
			
 
				-        if (pconfig->topology.nmpidevices > 0)
			
 
				+        if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
			
 
				         {
			
 
				                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
			
 
				                 struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
			
@@ -2150,32 +2150,22 @@ unsigned starpu_worker_is_slave_somewhere(int workerid)
 
				 
			
 
				 int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
			
 
				 {
			
 
				-	switch (type)
			
 
				-	{
			
 
				-		case STARPU_CPU_WORKER:
			
 
				-			return _starpu_config.topology.ncpus;
			
 
				-
			
 
				-		case STARPU_CUDA_WORKER:
			
 
				-			return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
			
 
				-
			
 
				-		case STARPU_OPENCL_WORKER:
			
 
				-			return _starpu_config.topology.nopenclgpus;
			
 
				-
			
 
				-		case STARPU_MIC_WORKER:
			
 
				-			return _starpu_config.topology.nmicdevices;
			
 
				-
			
 
				-                case STARPU_MPI_MS_WORKER:
			
 
				-                        return _starpu_config.topology.nmpidevices;
			
 
				+	unsigned n = 0;
			
 
				 
			
 
				-                case STARPU_ANY_WORKER:
			
 
				-                        return _starpu_config.topology.ncpus+
			
 
				-				_starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda+
			
 
				-                                _starpu_config.topology.nopenclgpus+
			
 
				-                                _starpu_config.topology.nmicdevices+
			
 
				-                                _starpu_config.topology.nmpidevices;
			
 
				-		default:
			
 
				+	if (type != STARPU_ANY_WORKER)
			
 
				+	{
			
 
				+		if (type >= STARPU_NARCH)
			
 
				 			return -EINVAL;
			
 
				+
			
 
				+		unsigned i;
			
 
				+		for (i = 0; i < _starpu_config.topology.ndevices[type]; i++)
			
 
				+			n += _starpu_config.topology.nworker[type][i];
			
 
				+		return n;
			
 
				 	}
			
 
				+
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		n += starpu_worker_get_count_by_type(type);
			
 
				+	return n;
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_combined_worker_get_count(void)
			
@@ -2185,17 +2175,17 @@ unsigned starpu_combined_worker_get_count(void)
 
				 
			
 
				 unsigned starpu_cpu_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.ncpus;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_cuda_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_opencl_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.nopenclgpus;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER);
			
 
				 }
			
 
				 
			
 
				 int starpu_asynchronous_copy_disabled(void)
			
@@ -2230,17 +2220,12 @@ int starpu_asynchronous_mpi_ms_copy_disabled(void)
 
				 
			
 
				 unsigned starpu_mic_worker_get_count(void)
			
 
				 {
			
 
				-	int i = 0, count = 0;
			
 
				-
			
 
				-	for (i = 0; i < STARPU_MAXMICDEVS; i++)
			
 
				-		count += _starpu_config.topology.nmiccores[i];
			
 
				-
			
 
				-	return count;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_MIC_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_mpi_ms_worker_get_count(void)
			
 
				 {
			
 
				-        return _starpu_config.topology.nmpidevices;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER);
			
 
				 }
			
 
				 
			
 
				 /* When analyzing performance, it is useful to see what is the processing unit
			
@@ -2656,7 +2641,8 @@ const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
				 const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
			
 
				 {
			
 
				 	const char *ret = starpu_driver_info[type].name_var;
			
 
				-	STARPU_ASSERT(ret);
			
 
				+	if (!ret)
			
 
				+		ret = "UNKNOWN";
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -269,66 +269,33 @@ struct _starpu_machine_topology
 
				 	/** custom hwloc tree*/
			
 
				 	struct starpu_tree *tree;
			
 
				 
			
 
				-	/** Total number of CPU cores, as detected by the topology code. May
			
 
				-	 * be different from the actual number of CPU workers.
			
 
				-	 */
			
 
				-	unsigned nhwcpus;
			
 
				-
			
 
				 	/** Total number of PUs (i.e. threads), as detected by the topology code. May
			
 
				-	 * be different from the actual number of PU workers.
			
 
				+	 * be different from the actual number of CPU workers.
			
 
				 	 */
			
 
				 	unsigned nhwpus;
			
 
				 
			
 
				-	/** Total number of CUDA devices, as detected. May be different
			
 
				-	 * from the actual number of CUDA workers.
			
 
				+	/** Total number of devices, as detected. May be different from the
			
 
				+	 * actual number of devices run by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwcudagpus;
			
 
				-
			
 
				-	/** Total number of OpenCL devices, as detected. May be
			
 
				-	 * different from the actual number of OpenCL workers.
			
 
				+	unsigned nhwdevices[STARPU_NARCH];
			
 
				+	/** Total number of worker for each device, as detected. May be different from the
			
 
				+	 * actual number of workers run by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwopenclgpus;
			
 
				+	unsigned nhwworker[STARPU_NARCH][STARPU_NMAXDEVS];
			
 
				 
			
 
				-        /* Total number of FPGA devices, as detected. May be different
			
 
				-	 * from the actual number of FPGA workers.
			
 
				+	/** Actual number of devices used by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwfpgafpgas;
			
 
				+	unsigned ndevices[STARPU_NARCH];
			
 
				 
			
 
				-	/** Total number of MPI nodes, as detected. May be different
			
 
				-	 * from the actual number of node workers.
			
 
				+	/** Number of worker per device
			
 
				 	 */
			
 
				-	unsigned nhwmpi;
			
 
				-
			
 
				-	/** Actual number of CPU workers used by StarPU. */
			
 
				-	unsigned ncpus;
			
 
				+	unsigned nworker[STARPU_NARCH][STARPU_NMAXDEVS];
			
 
				 
			
 
				-	/** Actual number of CUDA GPUs used by StarPU. */
			
 
				-	unsigned ncudagpus;
			
 
				-	unsigned nworkerpercuda;
			
 
				+	/** Whether we should have one thread per stream */
			
 
				 	int cuda_th_per_stream;
			
 
				+	/** Whether we should have one thread per device */
			
 
				 	int cuda_th_per_dev;
			
 
				 
			
 
				-	/** Actual number of OpenCL workers used by StarPU. */
			
 
				-	unsigned nopenclgpus;
			
 
				-
			
 
				-        /* Actual number of Fpga workers used by StarPU. */
			
 
				-	unsigned nfpgafpgas;
			
 
				-
			
 
				-	/** Actual number of MPI workers used by StarPU. */
			
 
				-	unsigned nmpidevices;
			
 
				-        unsigned nhwmpidevices;
			
 
				-
			
 
				-	unsigned nhwmpicores[STARPU_MAXMPIDEVS]; /**< Each MPI node has its set of cores. */
			
 
				-	unsigned nmpicores[STARPU_MAXMPIDEVS];
			
 
				-
			
 
				-	/** Topology of MP nodes (MIC) as well as necessary
			
 
				-	 * objects to communicate with them. */
			
 
				-	unsigned nhwmicdevices;
			
 
				-	unsigned nmicdevices;
			
 
				-
			
 
				-	unsigned nhwmiccores[STARPU_MAXMICDEVS]; /**< Each MIC node has its set of cores. */
			
 
				-	unsigned nmiccores[STARPU_MAXMICDEVS];
			
 
				-
			
 
				 	/** Indicates the successive logical PU identifier that should be used
			
 
				 	 * to bind the workers. It is either filled according to the
			
 
				 	 * user's explicit parameters (from starpu_conf) or according
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -110,7 +110,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 
				 	/* Discover the number of CUDA devices. Fill the result in CONFIG. */
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	config->topology.nhwcudagpus = _starpu_simgrid_get_nbhosts("CUDA");
			
 
				+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = _starpu_simgrid_get_nbhosts("CUDA");
			
 
				 #else
			
 
				 	int cnt;
			
 
				 	cudaError_t cures;
			
@@ -118,7 +118,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 
				 	cures = cudaGetDeviceCount (&cnt);
			
 
				 	if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				 		cnt = 0;
			
 
				-	config->topology.nhwcudagpus = cnt;
			
 
				+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt;
			
 
				 #ifdef HAVE_LIBNVIDIA_ML
			
 
				 	nvmlInit();
			
 
				 #endif
			
@@ -710,7 +710,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 		init_device_context(devid, memnode);
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
 
				-		if (worker->config->topology.nworkerpercuda > 1 && props[devid].concurrentKernels == 0)
			
 
				+		if (worker->config->topology.nworker[STARPU_CUDA_WORKER][devid] > 1 && props[devid].concurrentKernels == 0)
			
 
				 			_STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid);
			
 
				 #endif /* !STARPU_SIMGRID */
			
 
				 	}
			
@@ -723,7 +723,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 		struct _starpu_worker *worker = &worker_set->workers[i];
			
 
				 		unsigned devid = worker->devid;
			
 
				 		unsigned workerid = worker->workerid;
			
 
				-		unsigned subdev = i % _starpu_get_machine_config()->topology.nworkerpercuda;
			
 
				+		unsigned subdev = i % _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid];
			
 
				 
			
 
				 		float size = (float) global_mem[devid] / (1<<30);
			
 
				 #ifdef STARPU_SIMGRID
			
--- a/src/drivers/cuda/starpu_cublas.c
+++ b/src/drivers/cuda/starpu_cublas.c
@@ -100,9 +100,11 @@ void starpu_cublas_shutdown(void)
 
				 void starpu_cublas_set_stream(void)
			
 
				 {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				+	unsigned workerid = starpu_worker_get_id_check();
			
 
				+	int devid = starpu_worker_get_devid(workerid);
			
 
				 	if (!_starpu_get_machine_config()->topology.cuda_th_per_dev ||
			
 
				 		(!_starpu_get_machine_config()->topology.cuda_th_per_stream &&
			
 
				-		 _starpu_get_machine_config()->topology.nworkerpercuda > 1))
			
 
				+		 _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid] > 1))
			
 
				 		cublasSetKernelStream(starpu_cuda_get_local_stream());
			
 
				 #endif
			
 
				 }
			
--- a/src/drivers/max/driver_fpga.c
+++ b/src/drivers/max/driver_fpga.c
@@ -80,7 +80,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 
				 	n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
			
 
				 	if (n != -1)
			
 
				 	{
			
 
				-		config->topology.nhwfpgafpgas = nfpgafpgas = n;
			
 
				+		config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -134,7 +134,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 
				 
			
 
				         //LMemInterface addLMemInterface()
			
 
				         //// pour récupérer l'accès à la LMem
			
 
				-	config->topology.nhwfpgafpgas = nfpgafpgas = n;
			
 
				+	config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
			
 
				 }
			
 
				 
			
 
				 unsigned _starpu_fpga_get_device_count(void)
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -241,7 +241,7 @@ unsigned starpu_mic_device_get_count(void)
 
				     struct _starpu_machine_config *config = _starpu_get_machine_config ();
			
 
				     struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-    return topology->nmicdevices;
			
 
				+    return topology->ndevices[STARPU_MIC_WORKER];
			
 
				 }
			
 
				 
			
 
				 starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
			
@@ -530,7 +530,7 @@ void *_starpu_mic_src_worker(void *arg)
 
				 	// Current task for a thread managing a worker set has no sense.
			
 
				 	_starpu_set_current_task(NULL);
			
 
				 
			
 
				-	for (i = 0; i < config->topology.nmiccores[devid]; i++)
			
 
				+	for (i = 0; i < config->topology.nworker[STARPU_MIC_WORKER][devid]; i++)
			
 
				 	{
			
 
				 		struct _starpu_worker *worker = &config->workers[baseworkerid+i];
			
 
				 		snprintf(worker->name, sizeof(worker->name), "MIC %u core %u", devid, i);
			
--- a/src/drivers/mpi/driver_mpi_common.c
+++ b/src/drivers/mpi/driver_mpi_common.c
@@ -127,7 +127,7 @@ void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node)
 
				 {
			
 
				         struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
			
 
				 
			
 
				-        node->nb_cores = topology->nhwcpus;
			
 
				+        node->nb_cores = topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
 
				 
			
 
				 int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node)
			
--- a/src/drivers/mpi/driver_mpi_source.c
+++ b/src/drivers/mpi/driver_mpi_source.c
@@ -320,7 +320,7 @@ void *_starpu_mpi_src_worker(void *arg)
 
				                 // Current task for a thread managing a worker set has no sense.
			
 
				                 _starpu_set_current_task(NULL);
			
 
				 
			
 
				-                for (i = 0; i < config->topology.nmpicores[devid]; i++)
			
 
				+                for (i = 0; i < config->topology.nworker[STARPU_MPI_MS_WORKER][devid]; i++)
			
 
				                 {
			
 
				                         struct _starpu_worker *worker = &config->workers[baseworkerid+i];
			
 
				                         snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i);
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -71,7 +71,7 @@ _starpu_opencl_discover_devices(struct _starpu_machine_config *config)
 
				 	/* As OpenCL must have been initialized before calling this function,
			
 
				 	 * `nb_device' is ensured to be correctly set. */
			
 
				 	STARPU_ASSERT(init_done == 1);
			
 
				-	config->topology.nhwopenclgpus = nb_devices;
			
 
				+	config->topology.nhwdevices[STARPU_OPENCL_WORKER] = nb_devices;
			
 
				 }
			
 
				 
			
 
				 static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -231,12 +231,9 @@ static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
 
				 static void initialize_duration(struct bound_task *task)
			
 
				 {
			
 
				 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.nhwcpus);
			
 
				-	task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.nhwcudagpus,NULL);
			
 
				-	task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nhwopenclgpus,NULL);
			
 
				-	task->duration[STARPU_FPGA_WORKER] = initialize_arch_duration(conf->topology.nhwfpgafpgas,NULL);
			
 
				-	task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nhwmicdevices,conf->topology.nmiccores);
			
 
				-	task->duration[STARPU_MPI_MS_WORKER] = initialize_arch_duration(conf->topology.nhwmpidevices,conf->topology.nmpicores);
			
 
				+	enum starpu_worker_archtype type;
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		task->duration[type] = initialize_arch_duration(conf->topology.nhwdevices[type], conf->topology.nworker[type]);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_perfmodel_device device =
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -577,7 +577,7 @@ int starpu_bus_get_ngpus(int busid)
 
				 	int ngpus = bus_ngpus[busid];
			
 
				 	if (!ngpus)
			
 
				 		/* Unknown number of GPUs, assume it's shared by all GPUs */
			
 
				-		ngpus = topology->ncudagpus+topology->nopenclgpus;
			
 
				+		ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 	return ngpus;
			
 
				 }
			
 
				 
			
--- a/src/util/openmp_runtime_support_omp_api.c
+++ b/src/util/openmp_runtime_support_omp_api.c
@@ -65,7 +65,7 @@ int starpu_omp_get_max_threads()
 
				 
			
 
				 int starpu_omp_get_num_procs (void)
			
 
				 {
			
 
				-	/* starpu_cpu_worker_get_count defined as topology.ncpus */
			
 
				+	/* starpu_cpu_worker_get_count defined as topology.nworkers[STARPU_CPU_WORKER] */
			
 
				 	return starpu_cpu_worker_get_count();
			
 
				 }
			
 
				 
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -17,6 +17,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += -Wno-unused
			
 
				 AM_CXXFLAGS += -Wno-unused
			
 
				+AM_FFLAGS += -Wno-unused
			
 
				+AM_FCFLAGS += -Wno-unused
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)