Browse Source

Add experimental support for temporal data locality in ws and lws

Samuel Thibault 9 years ago
parent
commit
dcb41bb207
3 changed files with 203 additions and 15 deletions
  1. 1 0
      ChangeLog
  2. 201 14
      src/sched_policies/work_stealing_policy.c
  3. 1 1
      tests/datawizard/locality.c

+ 1 - 0
ChangeLog

@@ -119,6 +119,7 @@ New features:
   * Add graph inspection facility for schedulers.
   * New STARPU_LOCALITY flag to mark data which should be taken into account
     by schedulers for improving locality.
+  * Experimental support for data locality in ws and lws.
 
 Small features:
   * Tasks can now have a name (via the field const char *name of

+ 201 - 14
src/sched_policies/work_stealing_policy.c

@@ -32,14 +32,58 @@
 /* Experimental (dead) code which needs to be tested, fixed... */
 /* #define USE_OVERLOAD */
 
+/*
+ * Experimental code for improving data cache locality:
+ *
+ * USE_LOCALITY:
+ * - for each data, we record on which worker it was last accessed with the
+ *   locality flag.
+ *
+ * - when pushing a ready task, we choose the worker which has last accessed the
+ *   most data of the task with the locality flag.
+ *
+ * USE_LOCALITY_TASKS:
+ * - for each worker, we record the locality data that the task used last (i.e. a rough
+ *   estimation of what is contained in the innermost caches).
+ *
+ * - for each worker, we have a hash table associating from a data handle to
+ *   all the ready tasks pushed to it that will use it with the locality flag.
+ *
+ * - When fetching a task from a queue, pick a task which has the biggest number
+ *   of data estimated to be contained in the cache.
+ */
+
+//#define USE_LOCALITY
+
+
+//#define USE_LOCALITY_TASKS
+
+/* Maximum number of recorded locality data per task */
+#define MAX_LOCALITY 8
+
+/* Entry for queued_tasks_per_data: records that a queued task is accessing the data with locality flag */
+struct locality_entry {
+	UT_hash_handle hh;
+	starpu_data_handle_t data;
+	struct starpu_task *task;
+};
+
 struct _starpu_work_stealing_data_per_worker
 {
 	struct _starpu_fifo_taskq *queue_array;
 	int *proxlist;
-};
 
-/* Experimental code for improving data cache locality */
-//#define USE_LOCALITY
+#ifdef USE_LOCALITY_TASKS
+	/* This records the same as queue_array, but hashed by data accessed with locality flag.  */
+	/* FIXME: we record only one task per data, assuming that the access is
+	 * RW, and thus only one task is ready to write to it. Do we really need to handle the R case too? */
+	struct locality_entry *queued_tasks_per_data;
+
+	/* This records the last data accessed by the worker */
+	starpu_data_handle_t last_locality[MAX_LOCALITY];
+	int nlast_locality;
+#endif
+};
 
 struct _starpu_work_stealing_data
 {
@@ -137,7 +181,6 @@ static unsigned select_worker_locality(struct starpu_task *task, unsigned sched_
 		return -1;
 
 	unsigned i, n;
-	starpu_data_handle_t locality[nbuffers];
 	unsigned ndata[STARPU_NMAXWORKERS] = { 0 };
 	int best_worker = -1;
 	unsigned best_ndata = 0;
@@ -146,9 +189,10 @@ static unsigned select_worker_locality(struct starpu_task *task, unsigned sched_
 	for (i = 0; i < nbuffers; i++)
 		if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY)
 		{
-			locality[n] = STARPU_TASK_GET_HANDLE(task, i);
-			if (locality[n]->last_locality >= 0)
-				ndata[locality[n]->last_locality]++;
+			starpu_data_handle_t data = STARPU_TASK_GET_HANDLE(task, i);
+			int locality = data->last_locality;
+			if (locality >= 0)
+				ndata[locality]++;
 			n++;
 		}
 
@@ -173,7 +217,8 @@ static unsigned select_worker_locality(struct starpu_task *task, unsigned sched_
 	return best_worker;
 }
 
-static void record_worker_locality(struct starpu_task *task, int workerid)
+/* Record in the data which worker will handle the task with the locality flag */
+static void record_data_locality(struct starpu_task *task, int workerid)
 {
 	/* Record where in locality data where the task went */
 	unsigned i;
@@ -183,9 +228,146 @@ static void record_worker_locality(struct starpu_task *task, int workerid)
 			STARPU_TASK_GET_HANDLE(task, i)->last_locality = workerid;
 		}
 }
+#else
+static void record_data_locality(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
+{
+}
+#endif
+
+#ifdef USE_LOCALITY_TASKS
+/* Record in the worker which data it used last with the locality flag */
+static void record_worker_locality(struct starpu_task *task, int workerid, unsigned sched_ctx_id)
+{
+	/* Record where in locality data where the task went */
+	unsigned i;
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid];
+
+	data->nlast_locality = 0;
+	for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++)
+		if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY)
+		{
+			data->last_locality[data->nlast_locality] = STARPU_TASK_GET_HANDLE(task, i);
+			data->nlast_locality++;
+			if (data->nlast_locality == MAX_LOCALITY)
+				break;
+		}
+}
+/* Called when pushing a task to a queue */
+static void locality_pushed_task(struct starpu_task *task, int workerid, unsigned sched_ctx_id)
+{
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid];
+	unsigned i;
+	for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++)
+		if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY)
+		{
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+			struct locality_entry *entry;
+			HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry);
+			if (STARPU_LIKELY(!entry))
+			{
+				entry = malloc(sizeof(*entry));
+				entry->data = handle;
+				entry->task = task;
+				HASH_ADD_PTR(data->queued_tasks_per_data, data, entry);
+			}
+		}
+}
+
+/* Pick a task from workerid's queue, for execution on target */
+static struct starpu_task *ws_pick_task(int source, int target, unsigned sched_ctx_id)
+{
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	struct _starpu_work_stealing_data_per_worker *data_source = &ws->per_worker[source];
+	struct _starpu_work_stealing_data_per_worker *data_target = &ws->per_worker[target];
+	unsigned i, j, n = data_target->nlast_locality;
+	struct starpu_task *(tasks[MAX_LOCALITY]) = { NULL }, *best_task;
+	int ntasks[MAX_LOCALITY] = { 0 }, best_n; /* Number of locality data for this worker used by this task */
+	/* Look at the last data accessed by this worker */
+	STARPU_ASSERT(n < MAX_LOCALITY);
+	for (i = 0; i < n; i++)
+	{
+		starpu_data_handle_t handle = data_target->last_locality[i];
+		struct locality_entry *entry;
+		HASH_FIND_PTR(data_source->queued_tasks_per_data, &handle, entry);
+		if (entry)
+		{
+			/* Record task */
+			tasks[i] = entry->task;
+			ntasks[i] = 1;
+
+			/* And increment counter of the same task */
+			for (j = 0; j < i; j++)
+			{
+				if (tasks[j] == tasks[i])
+				{
+					ntasks[j]++;
+					break;
+				}
+			}
+		}
+	}
+	/* Now find the task with most locality data for this worker */
+	best_n = 0;
+	for (i = 0; i < n; i++)
+	{
+		if (ntasks[i] > best_n)
+		{
+			best_task = tasks[i];
+			best_n = ntasks[i];
+		}
+	}
+
+	if (best_n > 0)
+	{
+		/* found an interesting task, try to pick it! */
+		if (_starpu_fifo_pop_this_task(data_source->queue_array, target, best_task))
+			return best_task;
+	}
 
+	/* Didn't find an interesting task, or couldn't run it :( */
+	return _starpu_fifo_pop_task(data_source->queue_array, target);
+}
+
+/* Called when popping a task from a queue */
+static void locality_popped_task(struct starpu_task *task, int workerid, unsigned sched_ctx_id)
+{
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid];
+	unsigned i;
+	for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++)
+		if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY)
+		{
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+			struct locality_entry *entry;
+			HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry);
+			if (STARPU_LIKELY(entry))
+			{
+				if (entry->task == task)
+				{
+					HASH_DEL(data->queued_tasks_per_data, entry);
+					free(entry);
+				}
+			}
+		}
+}
 #else
-static void record_worker_locality(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
+static void record_worker_locality(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
+{
+}
+/* Called when pushing a task to a queue */
+static void locality_pushed_task(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
+{
+}
+/* Pick a task from workerid's queue, for execution on target */
+static struct starpu_task *ws_pick_task(int source, int target, unsigned sched_ctx_id)
+{
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	return _starpu_fifo_pop_task(ws->per_worker[source].queue_array, target);
+}
+/* Called when popping a task from a queue */
+static void locality_popped_task(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
 {
 }
 #endif
@@ -347,9 +529,10 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 
 	STARPU_ASSERT(workerid != -1);
 
-	task = _starpu_fifo_pop_task(ws->per_worker[workerid].queue_array, workerid);
+	task = ws_pick_task(workerid, workerid, sched_ctx_id);
 	if (task)
 	{
+		locality_popped_task(task, workerid, sched_ctx_id);
 		/* there was a local task */
 		return task;
 	}
@@ -370,11 +553,13 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	STARPU_PTHREAD_MUTEX_LOCK_SCHED(victim_sched_mutex);
 
 	if (ws->per_worker[victim].queue_array != NULL && ws->per_worker[victim].queue_array->ntasks > 0)
-		task = _starpu_fifo_pop_task(ws->per_worker[victim].queue_array, workerid);
+		task = ws_pick_task(victim, workerid, sched_ctx_id);
 	if (task)
 	{
 		_STARPU_TRACE_WORK_STEALING(workerid, victim);
-		record_worker_locality(task, workerid);
+		record_data_locality(task, workerid);
+		record_worker_locality(task, workerid, sched_ctx_id);
+		locality_popped_task(task, victim, sched_ctx_id);
 	}
 
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(victim_sched_mutex);
@@ -383,9 +568,10 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	if(!task)
 	{
 		if (ws->per_worker[workerid].queue_array != NULL && ws->per_worker[workerid].queue_array->ntasks > 0)
-			task = _starpu_fifo_pop_task(ws->per_worker[workerid].queue_array, workerid);
+			task = ws_pick_task(workerid, workerid, sched_ctx_id);
 		if (task)
 		{
+			locality_popped_task(task, workerid, sched_ctx_id);
 			/* there was a local task */
 			return task;
 		}
@@ -423,7 +609,7 @@ pick_worker:
 	if (ws->per_worker[workerid].queue_array == NULL)
 		goto pick_worker;
 
-	record_worker_locality(task, workerid);
+	record_data_locality(task, workerid);
 
 #ifdef HAVE_AYUDAME_H
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
@@ -435,6 +621,7 @@ pick_worker:
 #endif
 
 	_starpu_fifo_push_task(ws->per_worker[workerid].queue_array, task);
+	locality_pushed_task(task, workerid, sched_ctx_id);
 
 	starpu_push_task_end(task);
 

+ 1 - 1
tests/datawizard/locality.c

@@ -108,7 +108,7 @@ int main(int argc, char *argv[])
 	{
 		for (i = 1; i < N-1; i++)
 		{
-			printf("%d ", task_worker[i][loop]);
+			printf("%02d ", task_worker[i][loop]);
 		}
 		printf("\n");
 	}