Browse Source

Fix counting non-ready data

We don't care about data that we will not read...
Samuel Thibault 4 years ago
parent
commit
32f805366b
3 changed files with 51 additions and 35 deletions
  1. 29 20
      src/sched_policies/fifo_queues.c
  2. 1 1
      src/sched_policies/fifo_queues.h
  3. 21 14
      src/sched_policies/prio_deque.c

+ 29 - 20
src/sched_policies/fifo_queues.c

@@ -355,9 +355,9 @@ int _starpu_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_
 	return ((num_priorities-1)/(max-min)) * (priority - min);
 }
 
-void _starpu_size_non_ready_buffers(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_allocatedp, size_t *non_loadingp)
+void _starpu_size_non_ready_buffers(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp)
 {
-	size_t non_ready = 0, non_allocated = 0, non_loading = 0;
+	size_t non_ready = 0, non_loading = 0, non_allocated = 0;
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned index;
 
@@ -365,25 +365,28 @@ void _starpu_size_non_ready_buffers(struct starpu_task *task, unsigned worker, s
 	{
 		starpu_data_handle_t handle;
 		unsigned buffer_node = _starpu_task_data_get_node_on_worker(task, index, worker);
+		enum starpu_data_access_mode mode;
 
 		handle = STARPU_TASK_GET_HANDLE(task, index);
+		mode = STARPU_TASK_GET_MODE(task, index);
 
 		int is_allocated, is_valid, is_loading;
 		starpu_data_query_status2(handle, buffer_node, &is_allocated, &is_valid, &is_loading, NULL);
 
-		if (!is_valid)
+		if (!is_allocated)
+			non_allocated+=starpu_data_get_size(handle);
+
+		if (mode & STARPU_R && !is_valid)
 		{
 			non_ready+=starpu_data_get_size(handle);
-			if (!is_allocated)
-				non_allocated+=starpu_data_get_size(handle);
 			if (!is_loading)
 				non_loading+=starpu_data_get_size(handle);
 		}
 	}
 
 	*non_readyp = non_ready;
-	*non_allocatedp = non_allocated;
 	*non_loadingp = non_loading;
+	*non_allocatedp = non_allocated;
 }
 
 int _starpu_count_non_ready_buffers(struct starpu_task *task, unsigned worker)
@@ -427,8 +430,8 @@ struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo_taskq
 		int first_task_priority = task->priority;
 
 		size_t non_ready_best = SIZE_MAX;
-		size_t non_allocated_best = SIZE_MAX;
 		size_t non_loading_best = SIZE_MAX;
+		size_t non_allocated_best = SIZE_MAX;
 
 		for (current = task; current; current = current->next)
 		{
@@ -436,28 +439,34 @@ struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo_taskq
 
 			if (priority >= first_task_priority)
 			{
-				size_t non_allocated, non_ready, non_loading;
-				_starpu_size_non_ready_buffers(current, workerid, &non_ready, &non_allocated, &non_loading);
+				size_t non_ready, non_loading, non_allocated;
+				_starpu_size_non_ready_buffers(current, workerid, &non_ready, &non_loading, &non_allocated);
 				if (non_ready < non_ready_best)
 				{
 					non_ready_best = non_ready;
-					non_allocated_best = non_allocated;
 					non_loading_best = non_loading;
+					non_allocated_best = non_allocated;
 					task = current;
 
-					if (non_ready == 0)
+					if (non_ready == 0 && non_allocated == 0)
 						break;
 				}
-				else if (non_ready == non_ready_best && non_allocated < non_allocated_best)
+				else if (non_ready == non_ready_best)
 				{
-					non_allocated_best = non_allocated;
-					non_loading_best = non_loading;
-					task = current;
-				}
-				else if (non_ready == non_ready_best && non_allocated == non_allocated_best && non_loading < non_loading_best)
-				{
-					non_loading_best = non_loading;
-					task = current;
+					if (non_loading < non_loading_best)
+					{
+						non_loading_best = non_loading;
+						non_allocated_best = non_allocated;
+						task = current;
+					}
+					else if (non_loading == non_loading_best)
+					{
+						if (non_allocated < non_allocated_best)
+						{
+							non_allocated_best = non_allocated;
+							task = current;
+						}
+					}
 				}
 			}
 		}

+ 1 - 1
src/sched_policies/fifo_queues.h

@@ -71,7 +71,7 @@ struct starpu_task *_starpu_fifo_pop_local_task(struct _starpu_fifo_taskq *fifo)
 struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo, int workerid);
 int _starpu_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_id);
 int _starpu_count_non_ready_buffers(struct starpu_task *task, unsigned worker);
-void _starpu_size_non_ready_buffers(struct starpu_task *task, unsigned worker, size_t *non_ready, size_t *non_allocated, size_t *non_loading);
+void _starpu_size_non_ready_buffers(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp);
 struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo_taskq *fifo_queue, unsigned workerid, int num_priorities);
 
 #pragma GCC visibility pop

+ 21 - 14
src/sched_policies/prio_deque.c

@@ -94,9 +94,10 @@ struct starpu_task *_starpu_prio_deque_deque_first_ready_task(struct _starpu_pri
 			return NULL;
 
 		int first_task_priority = task->priority;
+
 		size_t non_ready_best = SIZE_MAX;
-		size_t non_allocated_best = SIZE_MAX;
 		size_t non_loading_best = SIZE_MAX;
+		size_t non_allocated_best = SIZE_MAX;
 
 		for (current = starpu_task_prio_list_begin(&pdeque->list);
 		     current != starpu_task_prio_list_end(&pdeque->list);
@@ -106,28 +107,34 @@ struct starpu_task *_starpu_prio_deque_deque_first_ready_task(struct _starpu_pri
 
 			if (priority >= first_task_priority)
 			{
-				size_t non_ready, non_allocated, non_loading;
-				_starpu_size_non_ready_buffers(current, workerid, &non_ready, &non_allocated, &non_loading);
+				size_t non_ready, non_loading, non_allocated;
+				_starpu_size_non_ready_buffers(current, workerid, &non_ready, &non_loading, &non_allocated);
 				if (non_ready < non_ready_best)
 				{
 					non_ready_best = non_ready;
-					non_allocated_best = non_allocated;
 					non_loading_best = non_loading;
+					non_allocated_best = non_allocated;
 					task = current;
 
-					if (non_ready == 0)
+					if (non_ready == 0 && non_allocated == 0)
 						break;
 				}
-				else if (non_ready == non_ready_best && non_allocated < non_allocated_best)
-				{
-					non_allocated_best = non_allocated;
-					non_loading_best = non_loading;
-					task = current;
-				}
-				else if (non_ready == non_ready_best && non_allocated == non_allocated_best && non_loading < non_loading_best)
+				else if (non_ready == non_ready_best)
 				{
-					non_loading_best = non_loading;
-					task = current;
+					if (non_loading < non_loading_best)
+					{
+						non_loading_best = non_loading;
+						non_allocated_best = non_allocated;
+						task = current;
+					}
+					else if (non_loading == non_loading_best)
+					{
+						if (non_allocated < non_allocated_best)
+						{
+							non_allocated_best = non_allocated;
+							task = current;
+						}
+					}
 				}
 			}
 		}