瀏覽代碼

Add a dumb heuristic to graph_test, to decide between CPU or GPU execution

Samuel Thibault 9 年之前
父節點
當前提交
a43e6c7246
共有 2 個文件被更改,包括 78 次插入9 次删除
  1. 6 2
      doc/doxygen/chapters/08scheduling.doxy
  2. 72 7
      src/sched_policies/graph_test_policy.c

+ 6 - 2
doc/doxygen/chapters/08scheduling.doxy

@@ -206,7 +206,11 @@ to a bag of tasks. When the application is finished with submitting tasks,
 it calls starpu_do_schedule (or starpu_task_wait_for_all, which calls
 starpu_do_schedule), and the starpu_sched_policy::do_schedule method of the
 scheduler is called. This method calls _starpu_graph_compute_depths to compute
-the bottom-up ranks, and then uses these rank to set priorities over tasks. It
-can then let tasks go to a priority queue, from which workers can then pop.
+the bottom-up ranks, and then uses these rank to set priorities over tasks.
+
+It then has two priority queues, one for CPUs, and one for GPUs, and uses a dumb
+heuristic based on the duration of the task over CPUs and GPUs to decide between
+the two queues. CPU workers can then pop from the CPU priority queue, and GPU
+workers from the GPU priority queue.
 
 */

+ 72 - 7
src/sched_policies/graph_test_policy.c

@@ -38,7 +38,8 @@
 struct _starpu_graph_test_policy_data
 {
 	struct _starpu_fifo_taskq *fifo;	/* Bag of tasks which are ready before do_schedule is called */
-	struct _starpu_prio_deque prio;
+	struct _starpu_prio_deque prio_cpu;
+	struct _starpu_prio_deque prio_gpu;
 	starpu_pthread_mutex_t policy_mutex;
 	struct starpu_bitmap *waiters;
 	unsigned computed;
@@ -50,7 +51,8 @@ static void initialize_graph_test_policy(unsigned sched_ctx_id)
 
 	/* there is only a single queue in that trivial design */
 	data->fifo =  _starpu_create_fifo();
-	 _starpu_prio_deque_init(&data->prio );
+	 _starpu_prio_deque_init(&data->prio_cpu);
+	 _starpu_prio_deque_init(&data->prio_gpu);
 	data->waiters = starpu_bitmap_create();
 	data->computed = 0;
 
@@ -80,6 +82,64 @@ static void deinitialize_graph_test_policy(unsigned sched_ctx_id)
 	free(data);
 }
 
+/* Push the given task on CPU or GPU prio list, using a dumb heuristic */
+static void push_task_to_prio(unsigned sched_ctx_id, struct _starpu_graph_test_policy_data *data, struct starpu_task *task)
+{
+	int cpu_can = 0, gpu_can = 0;
+	double cpu_speed;
+	double gpu_speed;
+
+	/* Compute how fast CPUs can compute it, and how fast GPUs can compute it */
+	unsigned worker;
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_sched_ctx_iterator it;
+	workers->init_iterator(workers, &it);
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		if (!starpu_worker_can_execute_task(worker, task, 0))
+			/* This worker can not execute this task, don't count it */
+			continue;
+
+		if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER)
+			/* At least one CPU can run it */
+			cpu_can = 1;
+		else
+			/* At least one GPU can run it */
+			gpu_can = 1;
+
+		/* Get expected task duration for this worker */
+		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id);
+		double length = starpu_task_expected_length(task, perf_arch, 0);
+		double power;
+
+		if (isnan(length))
+			/* We don't have an estimation yet */
+			length = 0.;
+		if (length == 0.)
+		{
+			_STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n",
+					_starpu_job_get_task_name(_starpu_get_job_associated_to_task(task)));
+			power = 0.;
+		}
+		else
+			power = 1./length;
+
+		/* Add the computation power to the CPU or GPU pool */
+		if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER)
+			cpu_speed += power;
+		else
+			gpu_speed += power;
+	}
+
+	/* Decide to push on CPUs or GPUs depending on the overall computation power */
+	if (!gpu_can || (cpu_can && cpu_speed > gpu_speed))
+		_starpu_prio_deque_push_back_task(&data->prio_cpu, task);
+	else
+		_starpu_prio_deque_push_back_task(&data->prio_gpu, task);
+
+}
+
 static void set_priority(void *_data STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job)
 {
 	job->task->priority = job->depth;
@@ -96,8 +156,7 @@ static void do_schedule_graph_test_policy(unsigned sched_ctx_id)
 
 	/* Now that we have priorities, move tasks from bag to priority queue */
 	while(!_starpu_fifo_empty(data->fifo)) {
-		struct starpu_task *task = _starpu_fifo_pop_task(data->fifo, -1);
-		_starpu_prio_deque_push_back_task(&data->prio, task);
+		push_task_to_prio(sched_ctx_id, data, _starpu_fifo_pop_task(data->fifo, -1));
 	}
 
 	/* And unleash the beast! */
@@ -144,7 +203,7 @@ static int push_task_graph_test_policy(struct starpu_task *task)
 	}
 
 	/* Priorities are computed, we can push to execution */
-	_starpu_prio_deque_push_back_task(&data->prio,task);
+	push_task_to_prio(sched_ctx_id, data, task);
 
 	starpu_push_task_end(task);
 
@@ -205,12 +264,18 @@ static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id)
 	struct starpu_task *chosen_task = NULL;
 	unsigned workerid = starpu_worker_get_id();
 	struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	struct _starpu_prio_deque *prio;
+
+	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
+		prio = &data->prio_cpu;
+	else
+		prio = &data->prio_gpu;
 
 	/* block until some event happens */
 	/* Here helgrind would shout that this is unprotected, this is just an
 	 * integer access, and we hold the sched mutex, so we can not miss any
 	 * wake up. */
-	if (!STARPU_RUNNING_ON_VALGRIND && _starpu_prio_deque_is_empty(&data->prio))
+	if (!STARPU_RUNNING_ON_VALGRIND && _starpu_prio_deque_is_empty(prio))
 		return NULL;
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
@@ -229,7 +294,7 @@ static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id)
 		return NULL;
 	}
 
-	chosen_task = _starpu_prio_deque_pop_task_for_worker(&data->prio, workerid);
+	chosen_task = _starpu_prio_deque_pop_task_for_worker(prio, workerid);
 	if (!chosen_task)
 		/* Tell pushers that we are waiting for tasks for us */
 		starpu_bitmap_set(data->waiters, workerid);