Browse Source

- Implement _starpu_sched_find_worker_combinations with hwloc.
In order to get some parallel workers even if there are some GPUs and that
the topology is rather flat, when there are 6 cores in a processor and a gpu
controlled by one of the cores, we create a combined worker of size 5 along
with the GPU worker.
- Store a pointer to the worker structure in the userdata field of the hwloc
object corresponding to each worker.

Cédric Augonnet 14 years ago
parent
commit
bdd2f07666
3 changed files with 104 additions and 4 deletions
  1. 10 1
      src/core/topology.c
  2. 0 1
      src/core/workers.c
  3. 94 2
      src/sched_policies/detect_combined_workers.c

+ 10 - 1
src/core/topology.c

@@ -272,11 +272,14 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 	int explicitval __attribute__((unused));
 	int explicitval __attribute__((unused));
 	unsigned use_accelerator = 0;
 	unsigned use_accelerator = 0;
 
 
+	int i;
+	for (i = 0; i < STARPU_NMAXWORKERS; i++)
+		config->workers[i].workerid = i;
+
 	struct starpu_machine_topology_s *topology = &config->topology;
 	struct starpu_machine_topology_s *topology = &config->topology;
 
 
 	topology->nworkers = 0;
 	topology->nworkers = 0;
 	topology->ncombinedworkers = 0;
 	topology->ncombinedworkers = 0;
-
 	_starpu_init_topology(config);
 	_starpu_init_topology(config);
 
 
 	_starpu_initialize_workers_bindid(config);
 	_starpu_initialize_workers_bindid(config);
@@ -733,6 +736,12 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 		hwloc_bitmap_only(workerarg->initial_hwloc_cpu_set, workerarg->bindid);
 		hwloc_bitmap_only(workerarg->initial_hwloc_cpu_set, workerarg->bindid);
 		workerarg->current_hwloc_cpu_set = hwloc_bitmap_alloc();
 		workerarg->current_hwloc_cpu_set = hwloc_bitmap_alloc();
 		hwloc_bitmap_only(workerarg->current_hwloc_cpu_set, workerarg->bindid);
 		hwloc_bitmap_only(workerarg->current_hwloc_cpu_set, workerarg->bindid);
+
+		/* Put the worker descriptor in the userdata field of the hwloc object describing the CPU */
+		hwloc_obj_t worker_obj;
+		worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
+					config->cpu_depth, workerarg->bindid);
+		worker_obj->userdata = &config->workers[worker];
 #endif
 #endif
 	}
 	}
 }
 }

+ 0 - 1
src/core/workers.c

@@ -147,7 +147,6 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 		PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
 		PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
 		PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
 		PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
 
 
-		workerarg->workerid = (int)worker;
 		workerarg->worker_size = 1;
 		workerarg->worker_size = 1;
 		workerarg->combined_workerid = workerarg->workerid;
 		workerarg->combined_workerid = workerarg->workerid;
 		workerarg->current_rank = 0;
 		workerarg->current_rank = 0;

+ 94 - 2
src/sched_policies/detect_combined_workers.c

@@ -19,9 +19,91 @@
 #include <common/utils.h>
 #include <common/utils.h>
 #include <core/workers.h>
 #include <core/workers.h>
 
 
-void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *topology)
+#ifdef STARPU_HAVE_HWLOC
+#include <hwloc.h>
+#endif
+
+#ifdef STARPU_HAVE_HWLOC
+/* This function returns 1 the subtree induced by obj only contains CPU
+ * workers, otherwise 0 is returned. This function registers all valid worker
+ * combination below obj. The id of the CPU workers are put in the worker_array
+ * and their count is put in the worker_cnt pointer. */
+static int find_combinations_with_hwloc_rec(hwloc_obj_t obj, int *worker_array, int *worker_cnt)
+{
+	struct starpu_machine_config_s *config = _starpu_get_machine_config();
+
+	/* Is this a leaf ? (eg. a PU for hwloc) */
+	int is_leaf = !hwloc_compare_types(config->cpu_depth, obj->depth);
+
+	if (is_leaf)
+	{
+		struct starpu_worker_s *worker = obj->userdata;
+
+		/* If this is a CPU worker, append its id at the end of the
+		 * list */
+		if (worker && worker->arch == STARPU_CPU_WORKER)
+		{
+			worker_array[*worker_cnt] = worker->workerid;
+			*worker_cnt = *worker_cnt + 1;
+		}
+
+		/* We cannot create a combined worker only if there is a CPU
+		 * worker. */
+		return (!worker || worker->arch == STARPU_CPU_WORKER);
+	}
+
+	/* If there is only one child, we go to the next level directly */
+	if (obj->arity == 1)
+		return find_combinations_with_hwloc_rec(obj->children[0], worker_array, worker_cnt);
+
+	/* We recursively go from the root to the leaves of the tree to find
+	 * subtrees that only have CPUs as leaves. */
+	unsigned cpu_children_cnt = 0;
+
+	int worker_array_rec[STARPU_NMAXWORKERS];
+	int worker_cnt_rec = 0;
+	memset(worker_array, 0, sizeof(worker_array));
+
+	unsigned i;
+	for (i = 0; i < obj->arity; i++)
+	{
+		int valid_subtree = find_combinations_with_hwloc_rec(obj->children[i],
+						worker_array_rec, &worker_cnt_rec);
+		if (valid_subtree)
+			cpu_children_cnt++;
+	}
+
+	int child;
+
+	if (cpu_children_cnt == obj->arity)
+	for (child = 0; child < worker_cnt_rec; child++)
+	{
+		worker_array[*worker_cnt] = worker_array_rec[child];
+		*worker_cnt = *worker_cnt + 1;
+	}
+	
+	/* If there is at least 2 children that are valid, we combined them. */
+	if (cpu_children_cnt > 1)
+		starpu_combined_worker_assign_workerid(worker_cnt_rec, worker_array_rec);
+
+	return (cpu_children_cnt == obj->arity);
+}
+
+static void find_combinations_with_hwloc(struct starpu_machine_topology_s *topology)
+{
+	/* We don't care about the result */
+	int worker_array[STARPU_NMAXWORKERS];
+	int worker_cnt = 0;
+
+	/* We recursively go from the root to the leaves of the tree to find
+	 * subtrees that only have CPUs as leaves. */
+	hwloc_obj_t root;
+	root = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0); 
+	find_combinations_with_hwloc_rec(root, worker_array, &worker_cnt);
+}
+#else
+static void find_combinations_without_hwloc(struct starpu_machine_topology_s *topology)
 {
 {
-#warning TODO: use hwloc instead
 	struct starpu_machine_config_s *config = _starpu_get_machine_config();
 	struct starpu_machine_config_s *config = _starpu_get_machine_config();
 
 
 	/* We put the id of all CPU workers in this array */
 	/* We put the id of all CPU workers in this array */
@@ -56,3 +138,13 @@ void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *to
 		}
 		}
 	}
 	}
 }
 }
+#endif
+
+void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *topology)
+{
+#ifdef STARPU_HAVE_HWLOC
+	find_combinations_with_hwloc(topology);
+#else
+	find_combinations_without_hwloc(topology);
+#endif
+}