Bläddra i källkod

consider the hyperthreading when constructing the list of workers
add the STARPU_NTHREADS_PER_CORE env var to specify the number of hyperthreads to use (to add to the doc too)

Andra Hugo 11 år sedan
förälder
incheckning
8c75841bf9

+ 1 - 2
src/core/perfmodel/perfmodel_bus.c

@@ -678,7 +678,6 @@ static void benchmark_all_gpu_devices(void)
 #endif
 
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
-	ncpus = _starpu_topology_get_nhwcpu(config);
 
 #ifdef STARPU_USE_CUDA
 	ncuda = _starpu_get_cuda_device_count();
@@ -1564,7 +1563,7 @@ static void check_bus_config_file(void)
                 fclose(f);
 
                 // Loading current configuration
-                ncpus = _starpu_topology_get_nhwcpu(config);
+                ncpus = _starpu_topology_get_nhwpu(config);
 #ifdef STARPU_USE_CUDA
 		ncuda = _starpu_get_cuda_device_count();
 #endif

+ 102 - 17
src/core/topology.c

@@ -404,6 +404,7 @@ _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
 #endif
 
 
+
 static void
 _starpu_init_topology (struct _starpu_machine_config *config)
 {
@@ -417,7 +418,7 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 	if (topology_is_initialized)
 		return;
 
-	topology->nhwcpus = 0;
+	topology->nhwpus = 0;
 
 #ifndef STARPU_SIMGRID
 #ifdef STARPU_HAVE_HWLOC
@@ -426,7 +427,50 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 #endif
 #endif
 
-	_starpu_cpu_discover_devices(config);
+#ifdef STARPU_SIMGRID
+	config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
+#elif defined(STARPU_HAVE_HWLOC)
+	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
+	 * accordingly. */
+
+
+	config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
+						  HWLOC_OBJ_CORE);
+	config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
+					     HWLOC_OBJ_PU);
+
+	/* Would be very odd */
+	STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
+
+	if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
+		/* unknown, using logical procesors as fallback */
+		_STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
+		config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
+							 HWLOC_OBJ_PU);
+	}
+
+	topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
+						       config->cpu_depth);
+	topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
+						       config->pu_depth);
+
+#elif defined(HAVE_SYSCONF)
+	/* Discover the CPUs relying on the sysconf(3) function and fills
+	 * CONFIG accordingly. */
+
+	config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+#elif defined(__MINGW32__) || defined(__CYGWIN__)
+	/* Discover the CPUs on Cygwin and MinGW systems. */
+
+	SYSTEM_INFO sysinfo;
+	GetSystemInfo(&sysinfo);
+	config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
+#else
+#warning no way to know number of cores, assuming 1
+	config->topology.nhwpus = 1;
+#endif
+
 	_starpu_cuda_discover_devices(config);
 	_starpu_opencl_discover_devices(config);
 #ifdef STARPU_USE_SCC
@@ -478,7 +522,7 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 				if (endptr != strval)
 				{
 					topology->workers_bindid[i] =
-						(unsigned)(val % topology->nhwcpus);
+						(unsigned)(val % topology->nhwpus);
 					strval = endptr;
 				}
 				else
@@ -511,10 +555,38 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 	}
 	else
 	{
-		/* by default, we take a round robin policy */
-		for (i = 0; i < STARPU_NMAXWORKERS; i++)
-			topology->workers_bindid[i] =
-				(unsigned)(i % topology->nhwcpus);
+		int nth_per_core = starpu_get_env_number("STARPU_NTHREADS_PER_CORE");
+		nth_per_core = nth_per_core == -1 ? 1 : nth_per_core;
+		int k, l;
+		int nbindids=0;
+		int nhyperthreads = topology->nhwpus / topology->nhwcpus;
+		STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
+		while(nbindids < STARPU_NMAXWORKERS)
+		{
+			k=0;
+			l= nhyperthreads-nth_per_core;
+			/* by default, we take a round robin policy */
+			for (i = 0; i < STARPU_NMAXWORKERS; i++)
+			{
+				if(k >= nth_per_core)
+				{
+					if(l == 0)
+					{
+						k=0;
+						l= nhyperthreads-nth_per_core;
+					}
+					else
+						l--;
+				}
+		       
+				if(k < nth_per_core || nhyperthreads == nth_per_core)
+				{
+					topology->workers_bindid[nbindids++] =
+						(unsigned)(i % topology->nhwpus);
+					k++;
+				}
+			}
+		}
 	}
 }
 
@@ -543,7 +615,7 @@ _starpu_get_next_bindid (struct _starpu_machine_config *config,
 		/* can we bind the worker on the requested cpu ? */
 		unsigned ind;
 		for (ind = config->current_bindid;
-		     ind < topology->nhwcpus;
+		     ind < topology->nhwpus;
 		     ind++)
 		{
 			if (topology->workers_bindid[ind] == requested_cpu)
@@ -582,6 +654,20 @@ _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
 	return config->topology.nhwcpus;
 }
 
+unsigned
+_starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
+{
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
+	_starpu_opencl_init();
+#endif
+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
+	_starpu_init_cuda();
+#endif
+	_starpu_init_topology(config);
+
+	return config->topology.nhwpus;
+}
+
 #ifdef STARPU_USE_MIC
 static void
 _starpu_init_mic_config (struct _starpu_machine_config *config,
@@ -950,7 +1036,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 			unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
 				+ topology->nopenclgpus + topology->nsccdevices;
 
-			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
+			long avail_cpus = (long) topology->nhwpus - (long) already_busy_cpus;
 			if (avail_cpus < 0)
 				avail_cpus = 0;
 			ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
@@ -991,7 +1077,6 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
                 _STARPU_DEBUG("No worker found, aborting ...\n");
 		return -ENODEV;
 	}
-
 	return 0;
 }
 
@@ -1025,7 +1110,7 @@ _starpu_bind_thread_on_cpu (
 	{
 		hwloc_obj_t obj =
 			hwloc_get_obj_by_depth (config->topology.hwtopology,
-						config->cpu_depth, cpuid);
+						config->pu_depth, cpuid);
 		hwloc_bitmap_t set = obj->cpuset;
 		int ret;
 
@@ -1200,7 +1285,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				{
 					/* StarPU is allowed to bind threads automatically */
 					preferred_binding = _starpu_get_cuda_affinity_vector(devid);
-					npreferred = config->topology.nhwcpus;
+					npreferred = config->topology.nhwpus;
 				}
 #endif /* SIMGRID */
 				if (cuda_init[devid])
@@ -1251,7 +1336,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				{
 					/* StarPU is allowed to bind threads automatically */
 					preferred_binding = _starpu_get_opencl_affinity_vector(devid);
-					npreferred = config->topology.nhwcpus;
+					npreferred = config->topology.nhwpus;
 				}
 #endif /* SIMGRID */
 				if (opencl_init[devid])
@@ -1296,7 +1381,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					//{
 					//	/* StarPU is allowed to bind threads automatically */
 						//	preferred_binding = _starpu_get_mic_affinity_vector(devid);
-					//	npreferred = config->topology.nhwcpus;
+					//	npreferred = config->topology.nhwpus;
 					//}
 					mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
 #endif /* SIMGRID */
@@ -1350,7 +1435,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 			/* Put the worker descriptor in the userdata field of the
 			 * hwloc object describing the CPU */
 			hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
-									config->cpu_depth,
+									config->pu_depth,
 									workerarg->bindid);
 			if (worker_obj->userdata == NULL)
 			{
@@ -1448,7 +1533,7 @@ _starpu_destroy_topology (
 		if (workerarg->bindid != -1)
 		{
 			hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
-									config->cpu_depth,
+									config->pu_depth,
 									workerarg->bindid);
 			if (worker_obj->userdata)
 			{
@@ -1488,7 +1573,7 @@ starpu_topology_print (FILE *output)
 	unsigned nworkers = starpu_worker_get_count();
 	unsigned ncombinedworkers = topology->ncombinedworkers;
 
-	for (core = 0; core < topology->nhwcpus; core++)
+	for (core = 0; core < topology->nhwpus; core++)
 	{
 		fprintf(output, "core %u\t", core);
 		for (worker = 0;

+ 3 - 0
src/core/topology.h

@@ -35,6 +35,9 @@ void _starpu_destroy_topology(struct _starpu_machine_config *config);
 /* returns the number of physical cpus */
 unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config);
 
+/* returns the number of logical cpus */
+unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
+
 /* Bind the current thread on the CPU logically identified by "cpuid". The
  * logical ordering of the processors is either that of hwloc (if available),
  * or the ordering exposed by the OS. */

+ 6 - 0
src/core/workers.h

@@ -181,6 +181,11 @@ struct _starpu_machine_topology
 	 */
 	unsigned nhwcpus;
 
+	/* Total number of PUs, as detected by the topology code. May
+	 * be different from the actual number of PU workers.
+	 */
+	unsigned nhwpus;
+
 	/* Total number of CUDA devices, as detected. May be different
 	 * from the actual number of CUDA workers.
 	 */
@@ -265,6 +270,7 @@ struct _starpu_machine_config
 
 #ifdef STARPU_HAVE_HWLOC
 	int cpu_depth;
+	int pu_depth;
 #endif
 
 	/* Where to bind workers ? */

+ 0 - 65
src/drivers/cpu/driver_cpu.c

@@ -42,71 +42,6 @@
 #include <windows.h>
 #endif
 
-#ifdef STARPU_SIMGRID
-#include <core/simgrid.h>
-#endif
-
-#ifdef STARPU_SIMGRID
-void
-_starpu_cpu_discover_devices(struct _starpu_machine_config *config)
-{
-	config->topology.nhwcpus = _starpu_simgrid_get_nbhosts("CPU");
-}
-#elif defined(STARPU_HAVE_HWLOC)
-void
-_starpu_cpu_discover_devices(struct _starpu_machine_config *config)
-{
-	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
-	 * accordingly. */
-
-	struct _starpu_machine_topology *topology = &config->topology;
-
-	config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
-						  HWLOC_OBJ_CORE);
-
-	/* Would be very odd */
-	STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
-
-	if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
-		/* unknown, using logical procesors as fallback */
-		_STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
-		config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
-							 HWLOC_OBJ_PU);
-	}
-
-	topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
-						       config->cpu_depth);
-}
-
-#elif defined(HAVE_SYSCONF)
-void
-_starpu_cpu_discover_devices(struct _starpu_machine_config *config)
-{
-	/* Discover the CPUs relying on the sysconf(3) function and fills
-	 * CONFIG accordingly. */
-
-	config->topology.nhwcpus = sysconf(_SC_NPROCESSORS_ONLN);
-}
-
-#elif defined(__MINGW32__) || defined(__CYGWIN__)
-void
-_starpu_cpu_discover_devices(struct _starpu_machine_config *config)
-{
-	/* Discover the CPUs on Cygwin and MinGW systems. */
-
-	SYSTEM_INFO sysinfo;
-	GetSystemInfo(&sysinfo);
-	config->topology.nhwcpus = sysinfo.dwNumberOfProcessors;
-}
-#else
-#warning no way to know number of cores, assuming 1
-void
-_starpu_cpu_discover_devices(struct _starpu_machine_config *config)
-{
-	config->topology.nhwcpus = 1;
-}
-#endif
-
 
 /* Actually launch the job on a cpu worker.
  * Handle binding CPUs on cores.

+ 0 - 5
src/drivers/cpu/driver_cpu.h

@@ -36,11 +36,6 @@ int _starpu_run_cpu(struct _starpu_worker *);
 int _starpu_cpu_driver_init(struct _starpu_worker *);
 int _starpu_cpu_driver_run_once(struct _starpu_worker *);
 int _starpu_cpu_driver_deinit(struct _starpu_worker *);
-void _starpu_cpu_discover_devices(struct _starpu_machine_config *config);
-#else
-#define _starpu_cpu_discover_devices(config) do { \
-	(config)->topology.nhwcpus = 1; \
-} while (0)
 #endif /* !STARPU_USE_CPU */
 
 #endif //  __DRIVER_CPU_H__