Explorar o código

Add task scalability benchmark

Samuel Thibault %!s(int64=12) %!d(string=hai) anos
pai
achega
81cfe0c214

+ 1 - 0
tests/Makefile.am

@@ -216,6 +216,7 @@ noinst_PROGRAMS =				\
 	microbenchs/async_tasks_overhead	\
 	microbenchs/sync_tasks_overhead		\
 	microbenchs/tasks_overhead		\
+	microbenchs/tasks_size_overhead		\
 	microbenchs/prefetch_data_on_node 	\
 	microbenchs/redundant_buffer		\
 	microbenchs/local_pingpong		\

+ 209 - 0
tests/microbenchs/tasks_size_overhead.c

@@ -0,0 +1,209 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This benchmark creates a thousand tasks of the same (small) duration, with
+ * various number of cpus and various durations.
+ *
+ * Use ./tasks_size_overhead.sh to generate a plot of the result.
+ *
+ * Thanks Martin Tillenius for the idea.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <starpu.h>
+#include "../helper.h"
+
+#define START 1000
+#define STOP 1000000
+#define FACTOR 4
+
+starpu_data_handle_t data_handles[8];
+float *buffers[8];
+
+static unsigned ntasks = 1000;
+static unsigned nbuffers = 0;
+
+struct starpu_task *tasks;
+
+static void func(void *descr[] __attribute__ ((unused)), void *arg)
+{
+	unsigned n = (uintptr_t)arg;
+	volatile unsigned i;
+	for (i = 0; i < n ; i++)
+		;
+}
+
+static struct starpu_codelet codelet = 
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {func, NULL},
+	.nbuffers = 0,
+	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
+};
+
+static void parse_args(int argc, char **argv)
+{
+	int c;
+	while ((c = getopt(argc, argv, "i:b:h")) != -1)
+	switch(c)
+	{
+		case 'i':
+			ntasks = atoi(optarg);
+			break;
+		case 'b':
+			nbuffers = atoi(optarg);
+			codelet.nbuffers = nbuffers;
+			break;
+		case 'h':
+			fprintf(stderr, "Usage: %s [-i ntasks] [-b nbuffers] [-h]\n", argv[0]);
+			break;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	unsigned i;
+	unsigned size;
+	unsigned totcpus, ncpus;
+
+	double timing;
+	struct timeval start;
+	struct timeval end;
+
+	struct starpu_conf conf;
+
+	unsigned buffer;
+
+	parse_args(argc, argv);
+
+	/* Get number of CPUs */
+	starpu_conf_init(&conf);
+	conf.ncuda = 0;
+	conf.nopencl = 0;
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	totcpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
+
+	starpu_shutdown();
+
+	/* Allocate data */
+	for (buffer = 0; buffer < nbuffers; buffer++)
+		buffers[buffer] = (float *) malloc(16*sizeof(float));
+
+	tasks = (struct starpu_task *) calloc(1, ntasks*sizeof(struct starpu_task));
+
+	/* Emit headers and compute raw tasks speed */
+	FPRINTF(stdout, "# tasks : %u buffers : %u\n", ntasks, nbuffers);
+	FPRINTF(stdout, "# ncpus\t");
+	for (size = START; size < STOP; size *= FACTOR)
+		FPRINTF(stdout, "%d iters(us)\ttotal(s)\t", size);
+	FPRINTF(stdout, "\n");
+	FPRINTF(stdout, "\"\"\t");
+	for (size = START; size < STOP; size *= FACTOR) {
+		double start,end;
+		start = starpu_timing_now();
+		func(NULL, (void*) (uintptr_t) size);
+		end = starpu_timing_now();
+		FPRINTF(stdout, "%.0f       \t\"        \"\t", end-start);
+	}
+	FPRINTF(stdout, "\n");
+	fflush(stdout);
+
+	/* For each number of cpus, benchmark */
+	for (ncpus= 1; ncpus <= totcpus; ncpus++) {
+		FPRINTF(stdout, "%d\t", ncpus);
+		fflush(stdout);
+
+		conf.ncpus = ncpus;
+		ret = starpu_init(&conf);
+
+		for (buffer = 0; buffer < nbuffers; buffer++)
+			starpu_vector_data_register(&data_handles[buffer], 0, (uintptr_t)buffers[buffer], 16, sizeof(float));
+
+		for (size = START; size < STOP; size *= FACTOR)
+		{
+			/* submit tasks */
+			gettimeofday(&start, NULL);
+			for (i = 0; i < ntasks; i++)
+			{
+				starpu_task_init(&tasks[i]);
+				tasks[i].callback_func = NULL;
+				tasks[i].cl = &codelet;
+				tasks[i].cl_arg = (void*) (uintptr_t) size;
+				tasks[i].synchronous = 0;
+
+				/* we have 8 buffers at most */
+				for (buffer = 0; buffer < nbuffers; buffer++)
+				{
+					tasks[i].handles[buffer] = data_handles[buffer];
+				}
+
+				ret = starpu_task_submit(&tasks[i]);
+				if (ret == -ENODEV) goto enodev;
+				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task");
+			}
+			ret = starpu_task_wait_for_all();
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+			gettimeofday(&end, NULL);
+
+			timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+			FPRINTF(stdout, "%f\t%f\t", (timing*ncpus)/ntasks, timing/1000000);
+			fflush(stdout);
+
+			{
+				char *output_dir = getenv("STARPU_BENCH_DIR");
+				char *bench_id = getenv("STARPU_BENCH_ID");
+
+				if (output_dir && bench_id)
+				{
+					char file[1024];
+					FILE *f;
+
+					sprintf(file, "%s/tasks_size_overhead_total.dat", output_dir);
+					f = fopen(file, "a");
+					fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
+					fclose(f);
+				}
+			}
+		}
+
+		for (buffer = 0; buffer < nbuffers; buffer++)
+		{
+			starpu_data_unregister(data_handles[buffer]);
+		}
+
+		starpu_shutdown();
+
+		FPRINTF(stdout, "\n");
+	}
+
+	return EXIT_SUCCESS;
+
+enodev:
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}

+ 24 - 0
tests/microbenchs/tasks_size_overhead.gp

@@ -0,0 +1,24 @@
+#!/bin/sh
+OUTPUT=tasks_size_overhead.output
+VALS=$(sed -n -e '4p' < $OUTPUT)
+VAL1=$(echo "$VALS" | cut -d '	' -f 3)
+VAL2=$(echo "$VALS" | cut -d '	' -f 5)
+VAL3=$(echo "$VALS" | cut -d '	' -f 7)
+VAL4=$(echo "$VALS" | cut -d '	' -f 9)
+VAL5=$(echo "$VALS" | cut -d '	' -f 11)
+VAL6=$(echo "$VALS" | cut -d '	' -f 13)
+VAL7=$(echo "$VALS" | cut -d '	' -f 15)
+gnuplot << EOF
+set terminal eps
+set output "tasks_size_overhead.eps"
+set key top left
+plot \
+	"$OUTPUT" using 1:($VAL1)/(\$3) with linespoints title columnheader(2), \
+	"$OUTPUT" using 1:($VAL2)/(\$5) with linespoints title columnheader(4), \
+	"$OUTPUT" using 1:($VAL3)/(\$7) with linespoints title columnheader(6), \
+	"$OUTPUT" using 1:($VAL4)/(\$9) with linespoints title columnheader(8), \
+	"$OUTPUT" using 1:($VAL5)/(\$11) with linespoints title columnheader(10), \
+	"$OUTPUT" using 1:($VAL6)/(\$13) with linespoints title columnheader(12), \
+	"$OUTPUT" using 1:($VAL7)/(\$15) with linespoints title columnheader(14), \
+	x
+EOF

+ 4 - 0
tests/microbenchs/tasks_size_overhead.sh

@@ -0,0 +1,4 @@
+#!/bin/sh
+./tasks_size_overhead > tasks_size_overhead.output
+./tasks_size_overhead.gp
+gv tasks_size_overhead.eps