Bläddra i källkod

Reduce cost of tree worker iterator by caching the set of workers using the same bindid

Samuel Thibault 9 år sedan
förälder
incheckning
c14e34df69

+ 1 - 1
include/starpu_worker.h

@@ -123,7 +123,7 @@ unsigned starpu_worker_is_slave_somewhere(int workerid);
 
 char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
 
-int starpu_worker_get_workerids(int bindid, int *workerids);
+int starpu_bindid_get_workerids(int bindid, int **workerids);
 
 #ifdef __cplusplus
 }

+ 28 - 0
src/core/topology.c

@@ -1454,6 +1454,14 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
 	unsigned mic_bindid[STARPU_MAXMICDEVS];
 #endif
+	unsigned bindid;
+
+	for (bindid = 0; bindid < config->nbindid; bindid++)
+	{
+		free(config->bindid_workers[bindid].workerids);
+		config->bindid_workers[bindid].workerids = NULL;
+		config->bindid_workers[bindid].nworkers = 0;
+	}
 
 	unsigned worker;
 	for (worker = 0; worker < config->topology.nworkers; worker++)
@@ -1690,6 +1698,26 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 			workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
 		}
 #endif
+		if (workerarg->bindid != -1)
+		{
+			bindid = workerarg->bindid;
+			unsigned old_nbindid = config->nbindid;
+			if (bindid >= old_nbindid)
+			{
+				/* More room needed */
+				if (!old_nbindid)
+					config->nbindid = STARPU_NMAXWORKERS;
+				else
+					config->nbindid = 2 * old_nbindid;
+				config->bindid_workers = realloc(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0]));
+				memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0]));
+			}
+			/* Add slot for this worker */
+			/* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */
+			config->bindid_workers[bindid].nworkers++;
+			config->bindid_workers[bindid].workerids = realloc(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0]));
+			config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker;
+		}
 	}
 }
 

+ 6 - 6
src/core/tree.c

@@ -22,8 +22,8 @@ void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited)
 {
 	if(tree->arity == 0)
 	{
-		int workerids[STARPU_NMAXWORKERS];
-		int nworkers = starpu_worker_get_workerids(tree->id, workerids);
+		int *workerids;
+		int nworkers = starpu_bindid_get_workerids(tree->id, &workerids);
 		int w;
 		for(w = 0; w < nworkers; w++)
 		{
@@ -81,8 +81,8 @@ static struct starpu_tree* _get_down_to_leaves(struct starpu_tree *node, char *v
 		{
 			if(node->nodes[i]->is_pu)
 			{
-				int workerids[STARPU_NMAXWORKERS];
-				int nworkers = starpu_worker_get_workerids(node->nodes[i]->id, workerids);
+				int *workerids;
+				int nworkers = starpu_bindid_get_workerids(node->nodes[i]->id, &workerids);
 				int w;
 				for(w = 0; w < nworkers; w++)
 				{
@@ -122,8 +122,8 @@ struct starpu_tree* starpu_tree_get_neighbour(struct starpu_tree *tree, struct s
 			{
 				if(father->nodes[i]->is_pu)
 				{
-					int workerids[STARPU_NMAXWORKERS];
-					int nworkers = starpu_worker_get_workerids(father->nodes[i]->id, workerids);
+					int *workerids;
+					int nworkers = starpu_bindid_get_workerids(father->nodes[i]->id, &workerids);
 					int w;
 					for(w = 0; w < nworkers; w++)
 					{

+ 4 - 8
src/core/workers.c

@@ -1829,15 +1829,11 @@ int starpu_worker_get_bindid(int workerid)
 	return _starpu_config.workers[workerid].bindid;
 }
 
-int starpu_worker_get_workerids(int bindid, int *workerids)
+int starpu_bindid_get_workerids(int bindid, int **workerids)
 {
-	unsigned nworkers = starpu_worker_get_count();
-	int nw = 0;
-	unsigned id;
-	for (id = 0; id < nworkers; id++)
-		if (_starpu_config.workers[id].bindid == bindid)
-			workerids[nw++] = id;
-	return nw;
+	STARPU_ASSERT(bindid < _starpu_config.nbindid);
+	*workerids = _starpu_config.bindid_workers[bindid].workerids;
+	return _starpu_config.bindid_workers[bindid].nworkers;
 }
 
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)

+ 7 - 0
src/core/workers.h

@@ -323,6 +323,13 @@ struct _starpu_machine_config
 	 * that can run parallel tasks together. */
 	struct _starpu_combined_worker combined_workers[STARPU_NMAX_COMBINEDWORKERS];
 
+	/* Translation table from bindid to worker IDs */
+	struct {
+		int *workerids;
+		unsigned nworkers; /* size of workerids */
+	} *bindid_workers;
+	unsigned nbindid; /* size of bindid_workers */
+
 	/* This bitmask indicates which kinds of worker are available. For
 	 * instance it is possible to test if there is a CUDA worker with
 	 * the result of (worker_mask & STARPU_CUDA). */

+ 2 - 2
src/sched_policies/work_stealing_policy.c

@@ -504,8 +504,8 @@ static void lws_add_workers(unsigned sched_ctx_id, int *workerids,
 		for(;;)
 		{
 			neighbour = (struct starpu_tree*)it.value;
-			int neigh_workerids[STARPU_NMAXWORKERS];
-			int neigh_nworkers = starpu_worker_get_workerids(neighbour->id, neigh_workerids);
+			int *neigh_workerids;
+			int neigh_nworkers = starpu_bindid_get_workerids(neighbour->id, &neigh_workerids);
 			int w;
 			for(w = 0; w < neigh_nworkers; w++)
 			{

+ 12 - 12
src/worker_collection/worker_tree.c

@@ -38,8 +38,8 @@ static unsigned tree_has_next_unblocked_worker(struct starpu_worker_collection *
 		return 0;
 	}
 	int id = -1;
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -83,8 +83,8 @@ static int tree_get_next_unblocked_worker(struct starpu_worker_collection *worke
 	STARPU_ASSERT_MSG(neighbour, "no element anymore");
 
 
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -117,8 +117,8 @@ static unsigned tree_has_next_master(struct starpu_worker_collection *workers, s
 		return 0;
 	}
 	int id = -1;
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -152,8 +152,8 @@ static int tree_get_next_master(struct starpu_worker_collection *workers, struct
 	STARPU_ASSERT_MSG(neighbour, "no element anymore");
 
 
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -192,8 +192,8 @@ static unsigned tree_has_next(struct starpu_worker_collection *workers, struct s
 		return 0;
 	}
 	int id = -1;
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{
@@ -232,8 +232,8 @@ static int tree_get_next(struct starpu_worker_collection *workers, struct starpu
 	STARPU_ASSERT_MSG(neighbour, "no element anymore");
 
 
-	int workerids[STARPU_NMAXWORKERS];
-	int nworkers = starpu_worker_get_workerids(neighbour->id, workerids);
+	int *workerids;
+	int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids);
 	int w;
 	for(w = 0; w < nworkers; w++)
 	{