瀏覽代碼

Support STARPU_MIN_WORKERSIZE and STARPU_MAX_WORKERSIZE for the non-single combined workers too

Samuel Thibault 12 年之前
父節點
當前提交
da222e0b6a
共有 1 個文件被更改,包括 37 次插入12 次删除
  1. 37 12
      src/sched_policies/detect_combined_workers.c

+ 37 - 12
src/sched_policies/detect_combined_workers.c

@@ -50,7 +50,7 @@ static void find_workers(hwloc_obj_t obj, int cpu_workers[STARPU_NMAXWORKERS], u
 	}
 }
 
-static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned arity, unsigned n, unsigned synthesize_arity)
+static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned min, unsigned max, unsigned arity, unsigned n, unsigned synthesize_arity)
 {
 	unsigned nworkers, i, j;
 	unsigned chunk_size = (n + synthesize_arity-1) / synthesize_arity;
@@ -80,11 +80,20 @@ static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned arit
 		/* Completed a chunk, or last bit (but not if it's just 1 subobject) */
 		if (j == chunk_size || (i == arity-1 && j > 1))
 		{
-			_STARPU_DEBUG("Adding it\n");
-			ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers);
-			STARPU_ASSERT(ret >= 0);
+			if (nworkers >= min && nworkers <= max)
+			{
+				unsigned sched_ctx_id  = starpu_task_get_context();
+				if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
+					sched_ctx_id = 0;
+				struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+
+				_STARPU_DEBUG("Adding it\n");
+				ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers);
+				STARPU_ASSERT(ret >= 0);
+				workers->add(workers,ret);
+			}
 			/* Recurse there */
-			synthesize_intermediate_workers(children+chunk_start, i - chunk_start, n, synthesize_arity);
+			synthesize_intermediate_workers(children+chunk_start, min, max, i - chunk_start, n, synthesize_arity);
 			/* And restart another one */
 			n = 0;
 			j = 0;
@@ -94,7 +103,7 @@ static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned arit
 	}
 }
 
-static void find_and_assign_combinations(hwloc_obj_t obj, unsigned synthesize_arity)
+static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned max, unsigned synthesize_arity)
 {
 	char name[64];
 	unsigned i, n, nworkers;
@@ -114,7 +123,7 @@ static void find_and_assign_combinations(hwloc_obj_t obj, unsigned synthesize_ar
 	if (n == 1)
 	{
 		/* If there is only one child, we go to the next level right away */
-		find_and_assign_combinations(obj->children[0], synthesize_arity);
+		find_and_assign_combinations(obj->children[0], min, max, synthesize_arity);
 		return;
 	}
 
@@ -122,7 +131,7 @@ static void find_and_assign_combinations(hwloc_obj_t obj, unsigned synthesize_ar
 	nworkers = 0;
 	find_workers(obj, cpu_workers, &nworkers);
 
-	if (nworkers > 1)
+	if (nworkers >= min && nworkers <= max)
 	{
 		_STARPU_DEBUG("Adding it\n");
 		unsigned sched_ctx_id  = starpu_task_get_context();
@@ -137,12 +146,12 @@ static void find_and_assign_combinations(hwloc_obj_t obj, unsigned synthesize_ar
 	}
 
 	/* Add artificial intermediate objects recursively */
-	synthesize_intermediate_workers(obj->children, obj->arity, n, synthesize_arity);
+	synthesize_intermediate_workers(obj->children, min, max, obj->arity, n, synthesize_arity);
 
 	/* And recurse */
 	for (i = 0; i < obj->arity; i++)
 		if (obj->children[i]->userdata == (void*) -1)
-			find_and_assign_combinations(obj->children[i], synthesize_arity);
+			find_and_assign_combinations(obj->children[i], min, max, synthesize_arity);
 }
 
 static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers)
@@ -151,6 +160,13 @@ static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers
 	struct starpu_machine_topology *topology = &config->topology;
 	int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER");
 
+	int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
+	if (min < 2)
+		min = 2;
+	int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
+	if (max == -1)
+		max = INT_MAX;
+
 	if (synthesize_arity == -1)
 		synthesize_arity = 2;
 
@@ -171,7 +187,7 @@ static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers
 			}
 		}
 	}
-	find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), synthesize_arity);
+	find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity);
 }
 
 #else /* STARPU_HAVE_HWLOC */
@@ -181,6 +197,8 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 	unsigned sched_ctx_id  = starpu_task_get_context();
 	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
 		sched_ctx_id = 0;
+	int min;
+	int max;
 
 	struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
@@ -198,8 +216,15 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 			cpu_workers[ncpus++] = i;
 	}
 
+	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
+	if (min < 2)
+		min = 2;
+	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
+	if (max == -1 || max > (int) ncpus)
+		max = ncpus;
+
 	int size;
-	for (size = 2; size <= ncpus; size *= 2)
+	for (size = min; size <= max; size *= 2)
 	{
 		unsigned first_cpu;
 		for (first_cpu = 0; first_cpu < ncpus; first_cpu += size)