Browse Source

Fix thread binding on knf in accelerator mode

Samuel Thibault 8 years ago
parent
commit
cdbf49888a

+ 4 - 4
doc/doxygen/chapters/501_environment_variables.doxy

@@ -106,11 +106,11 @@ MIC equivalent of the environment variable \ref STARPU_NCUDA, i.e. the number of
 MIC devices to use.
 </dd>
 
-<dt>STARPU_NMICCORES</dt>
+<dt>STARPU_NMICTHREADS</dt>
 <dd>
-\anchor STARPU_NMICCORES
-\addindex __env__STARPU_NMICCORES
-Number of cores to use on the MIC devices.
+\anchor STARPU_NMICTHREADS
+\addindex __env__STARPU_NMICTHREADS
+Number of threads to use on the MIC devices.
 </dd>
 
 <dt>STARPU_NSCC</dt>

+ 1 - 1
src/core/topology.c

@@ -777,7 +777,7 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 	_starpu_init_mic_topology (config, mic_idx);
 
 	int nmiccores;
-	nmiccores = starpu_get_env_number("STARPU_NMICCORES");
+	nmiccores = starpu_get_env_number("STARPU_NMICTHREADS");
 
 	if (nmiccores == -1)
 	{

+ 22 - 3
src/drivers/mic/driver_mic_sink.c

@@ -28,19 +28,22 @@
 #include "driver_mic_common.h"
 #include "driver_mic_sink.h"
 
+static int mic_core_to_thread[240];
 /* Initialize the MIC sink, initializing connection to the source
  * and to the other devices (not implemented yet).
  */
 void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 {
+#ifdef __KNC__
 	starpu_pthread_t self;
 	cpu_set_t cpuset;
-
+	/* We reserve one core for the communications */
 	/*Bind on the first core*/
 	self = pthread_self();
 	CPU_ZERO(&cpuset);
-	CPU_SET(241,&cpuset);
+	CPU_SET(0,&cpuset);
 	pthread_setaffinity_np(self,sizeof(cpu_set_t),&cpuset);
+#endif
 
 
 	/* Initialize connection with the source */
@@ -53,6 +56,19 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 	node->nb_cores = COISysGetHardwareThreadCount() - COISysGetHardwareThreadCount() / COISysGetCoreCount();
 	node->thread_table = malloc(sizeof(starpu_pthread_t)*node->nb_cores);
 
+#ifdef STARPU_DEVEL
+#warning rather use hwloc
+#endif
+#ifdef __KNC__
+	unsigned core,thread;
+	/* Round-robin between cores. Take care of the odd numbering of threads on the KNC */
+	for (core = 0; core < 60; core++)
+		for (thread = 0; thread < 4; thread++)
+			mic_core_to_thread[core + thread * 60] = core * 4 + thread + 1;
+#elif defined(__KNF__)
+#error need to check the numbering
+#endif
+
 	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
 
 	//for (i = 0; i < (unsigned int)node->devid; ++i)
@@ -187,6 +203,9 @@ void _starpu_mic_sink_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUT
 
 /* bind the thread to a core
  */
+#ifdef STARPU_DEVEL
+#warning Use hwloc, the numbering is *really* odd on the MIC
+#endif
 void _starpu_mic_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core)
 {
 	cpu_set_t cpuset;
@@ -197,7 +216,7 @@ void _starpu_mic_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_A
 
 	//adding the core to the set
 	for(i=0;i<nb_core;i++)
-		CPU_SET(core_table[i],&cpuset);
+		CPU_SET(mic_core_to_thread[core_table[i]],&cpuset);
 
 	pthread_setaffinity_np(((starpu_pthread_t*)mp_node->thread_table)[coreid],sizeof(cpu_set_t),&cpuset);
 }

+ 2 - 0
src/drivers/mp_common/sink_common.c

@@ -527,6 +527,8 @@ void* _starpu_sink_thread(void * thread_arg)
 
 	struct _starpu_worker *worker = &_starpu_get_machine_config()->workers[node->baseworkerid + coreid];
 
+	node->bind_thread(node, coreid, &coreid, 1);
+
 	_starpu_set_local_worker_key(worker);
 	while(node->is_running)
 	{