Parcourir la source

add theoretical bound computation infrastructure

Samuel Thibault il y a 14 ans
Parent
commit
66a9b3039a
7 fichiers modifiés avec 262 ajouts et 0 suppressions
  1. 1 0
      Makefile.am
  2. 2 0
      configure.ac
  3. 13 0
      examples/lu/lu_example.c
  4. 2 0
      src/Makefile.am
  5. 4 0
      src/core/task.c
  6. 213 0
      src/profiling/bound.c
  7. 27 0
      src/profiling/bound.h

+ 1 - 0
Makefile.am

@@ -40,6 +40,7 @@ include_HEADERS = 				\
 	include/starpu_opencl.h			\
 	include/starpu_expert.h			\
 	include/starpu_profiling.h		\
+	include/starpu_bound.h			\
 	include/starpu_scheduler.h
 
 noinst_HEADERS = \

+ 2 - 0
configure.ac

@@ -594,6 +594,8 @@ if test x$enable_stats = xyes; then
         AC_DEFINE(STARPU_DATA_STATS, [1], [enable statistics])
 fi
 
+#AC_CHECK_HEADERS([glpk.h])
+#AC_HAVE_LIBRARY([glpk])
 
 ###############################################################################
 #                                                                             #

+ 13 - 0
examples/lu/lu_example.c

@@ -31,6 +31,7 @@ static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
 static unsigned profile = 0;
+static unsigned bound = 0;
 
 TYPE *A, *A_saved;
 
@@ -66,6 +67,10 @@ static void parse_args(int argc, char **argv)
 		if (strcmp(argv[i], "-profile") == 0) {
 			profile = 1;
 		}
+
+		if (strcmp(argv[i], "-bound") == 0) {
+			bound = 1;
+		}
 	}
 }
 
@@ -261,6 +266,9 @@ int main(int argc, char **argv)
 
 	display_matrix(A, size, size, "A");
 
+	if (bound)
+		starpu_bound_start();
+
 	if (profile)
 		starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
 
@@ -309,6 +317,11 @@ int main(int argc, char **argv)
 		starpu_bus_profiling_helper_display_summary();
 	}
 
+	if (bound) {
+		starpu_bound_stop();
+		starpu_bound_print_lp(stderr);
+	}
+
 	if (check)
 	{
 		if (pivot)

+ 2 - 0
src/Makefile.am

@@ -97,6 +97,7 @@ noinst_HEADERS = 						\
 	drivers/cuda/driver_cuda.h				\
 	drivers/opencl/driver_opencl.h				\
 	drivers/opencl/driver_opencl_utils.h			\
+	profiling/bound.h					\
 	profiling/profiling.h
 
 libstarpu_la_SOURCES = 						\
@@ -168,6 +169,7 @@ libstarpu_la_SOURCES = 						\
 	util/starpu_task_list.c					\
 	debug/latency.c						\
 	profiling/profiling.c					\
+	profiling/bound.c					\
 	profiling/bus_profiling_helpers.c
 
 if STARPU_USE_CPU

+ 4 - 0
src/core/task.c

@@ -22,6 +22,7 @@
 #include <common/config.h>
 #include <common/utils.h>
 #include <profiling/profiling.h>
+#include <profiling/bound.h>
 
 /* XXX this should be reinitialized when StarPU is shutdown (or we should make
  * sure that no task remains !) */
@@ -243,6 +244,9 @@ int starpu_task_submit(struct starpu_task *task)
 		j = (struct starpu_job_s *)task->starpu_private;
 	}
 
+	/* notify bound computation of a new task */
+	_starpu_bound_record(j);
+
 	ret = _starpu_submit_job(j, 0);
 
 	if (is_sync)

+ 213 - 0
src/profiling/bound.c

@@ -0,0 +1,213 @@
+/*
+ * StarPU
+ * Copyright (C) Université Bordeaux 1, CNRS 2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * Record which kinds of tasks have been executed, to later on compute an upper
+ * bound of the performance that could have theoretically been achieved
+ */
+
+#include <starpu.h>
+#include <profiling/bound.h>
+#include <core/jobs.h>
+
+struct task_pool {
+	/* Which codelet has been executed */
+	struct starpu_codelet_t *cl;
+	/* Task footprint key */
+	uint32_t footprint;
+	/* Number of tasks of this kind */
+	unsigned long n;
+	/* Other tasks */
+	struct task_pool *next;
+};
+
+static struct task_pool *task_pools, *last;
+static int recording;
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void starpu_bound_start(void)
+{
+	struct task_pool *tp;
+
+	PTHREAD_MUTEX_LOCK(&mutex);
+	tp = task_pools;
+	task_pools = NULL;
+	last = NULL;
+	recording = 1;
+	PTHREAD_MUTEX_UNLOCK(&mutex);
+
+	for ( ; tp; tp = tp->next)
+		free(tp);
+}
+
+void _starpu_bound_record(starpu_job_t j)
+{
+	struct task_pool *tp;
+
+	if (!recording)
+		return;
+
+	/* No codelet, nothing to measure */
+	if (!j->task->cl)
+		return;
+	/* No performance model, no time duration estimation */
+	if (!j->task->cl->model)
+		return;
+	/* Only support history based */
+	if (j->task->cl->model->type != STARPU_HISTORY_BASED)
+		return;
+
+	PTHREAD_MUTEX_LOCK(&mutex);
+	/* Re-check, this time with mutex held */
+	if (!recording) {
+		PTHREAD_MUTEX_UNLOCK(&mutex);
+		return;
+	}
+
+	if (STARPU_UNLIKELY(!j->footprint_is_computed))
+		_starpu_compute_buffers_footprint(j);
+
+	if (last && last->cl == j->task->cl && last->footprint == j->footprint)
+		tp = last;
+	else
+		for (tp = task_pools; tp; tp = tp->next)
+			if (tp->cl == j->task->cl && tp->footprint == j->footprint)
+				break;
+
+	if (!tp) {
+		tp = malloc(sizeof(*tp));
+		tp->cl = j->task->cl;
+		tp->footprint = j->footprint;
+		tp->n = 0;
+		tp->next = task_pools;
+		task_pools = tp;
+	}
+
+	/* One more task of this kind */
+	tp->n++;
+
+	PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+void starpu_bound_stop(void)
+{
+	PTHREAD_MUTEX_LOCK(&mutex);
+	recording = 0;
+	PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt]) {
+	struct task_pool *tp;
+	int w, t;
+	for (w = 0; w < nw; w++) {
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			struct starpu_job_s j = {
+				.footprint = tp->footprint,
+				.footprint_is_computed = 1,
+			};
+			enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
+			times[w][t] = _starpu_history_based_job_expected_length(tp->cl->model, arch, &j) / 1000.;
+		}
+	}
+}
+
+void starpu_bound_print_lp(FILE *output)
+{
+	struct task_pool *tp;
+	int nt; /* Number of different kinds of tasks */
+	int nw; /* Number of different workers */
+	int t, w;
+
+	PTHREAD_MUTEX_LOCK(&mutex);
+
+	nw = starpu_worker_get_count();
+	nt = 0;
+	for (tp = task_pools; tp; tp = tp->next)
+		nt++;
+
+	{
+		double times[nw][nt];
+
+		_starpu_get_tasks_times(nw, nt, times);
+
+		fprintf(output, "/* StarPU upper bound linear programming problem */\n");
+		fprintf(output, "/* We want to minimize total execution time (ms) */\n");
+		fprintf(output, "min: tmax;\n\n");
+
+		fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
+		for (w = 0; w < nw; w++) {
+			char name[32];
+			starpu_worker_get_name(w, name, sizeof(name));
+			fprintf(output, "/* worker %s */\n", name);
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+				fprintf(output, "\t%+f * w%dt%dn", (float) times[w][t], w, t);
+			fprintf(output, " <= tmax;\n");
+		}
+		fprintf(output, "\n");
+
+		/* And we have to have computed exactly all tasks */
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			fprintf(output, "/* task %s key %lx */\n", tp->cl->model->symbol, (unsigned long) tp->footprint);
+			for (w = 0; w < nw; w++)
+				fprintf(output, "\t+w%dt%dn", w, t);
+			fprintf(output, " = %ld;\n", tp->n);
+			/* Show actual values */
+			fprintf(output, "/*");
+			for (w = 0; w < nw; w++)
+				fprintf(output, "\t+%ld", tp->cl->per_worker_stats[w]);
+			fprintf(output, "\t*/\n\n");
+		}
+	}
+
+	PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+void starpu_bound_print_mps(FILE *output)
+{
+	struct task_pool * tp;
+	int nt; /* Number of different kinds of tasks */
+	int nw; /* Number of different workers */
+	int t, w;
+
+	PTHREAD_MUTEX_LOCK(&mutex);
+
+	nw = starpu_worker_get_count();
+	nt = 0;
+	for (tp = task_pools; tp; tp = tp->next)
+		nt++;
+
+	{
+		double times[nw][nt];
+
+		_starpu_get_tasks_times(nw, nt, times);
+
+		fprintf(output, "NAME           StarPU theoretical bound");
+		fprintf(output, "ROWS");
+		fprintf(output, "TODO");
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			for (w = 0; w < nw; w++)
+				;
+		}
+	}
+
+	PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+void starpu_bound_print(FILE *output)
+{
+	fprintf(output, "TODO: use glpk");
+}

+ 27 - 0
src/profiling/bound.h

@@ -0,0 +1,27 @@
+/*
+ * StarPU
+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __BOUND_H__
+#define __BOUND_H__
+
+#include <starpu.h>
+#include <starpu_bound.h>
+#include <core/jobs.h>
+
+/* Record task for bound computation */
+extern void _starpu_bound_record(starpu_job_t j);
+
+#endif // __BOUND_H__