| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2013  Université de Bordeaux 1 * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. *//* This benchmark creates a thousand tasks of the same (small) duration, with * various number of cpus and various durations. * * Use ./tasks_size_overhead.sh to generate a plot of the result. * * Thanks Martin Tillenius for the idea. */#include <sys/time.h>#include <stdio.h>#include <unistd.h>#include <starpu.h>#include "../helper.h"#define START 4#define STOP 4096#ifdef STARPU_QUICK_CHECK#define FACTOR 8#else#define FACTOR 2#endifstarpu_data_handle_t data_handles[8];float *buffers[8];#ifdef STARPU_QUICK_CHECKstatic unsigned ntasks = 10;#elsestatic unsigned ntasks = 1000;#endifstatic unsigned nbuffers = 0;struct starpu_task *tasks;static void func(void *descr[] __attribute__ ((unused)), void *arg){	struct timeval tv1, tv2;	unsigned n = (uintptr_t)arg;	long usec = 0;	gettimeofday(&tv1, NULL);	do	{		gettimeofday(&tv2, NULL);		if (tv2.tv_usec < tv1.tv_usec)		{			tv2.tv_usec += 1000000;			tv2.tv_sec--;		}		usec = (tv2.tv_sec-tv1.tv_sec)*1000000			+ (tv2.tv_usec - tv1.tv_usec);	}	while (usec < n);}static struct starpu_codelet codelet ={	.cpu_funcs = {func, NULL},	.nbuffers = 0,	.modes = {STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R}};static void parse_args(int argc, char **argv){	int c;	while ((c = getopt(argc, argv, "i:b:h")) != -1)	switch(c)	{		case 'i':			ntasks = atoi(optarg);			break;		case 'b':			nbuffers = atoi(optarg);			codelet.nbuffers = nbuffers;			break;		case 'h':			fprintf(stderr, "Usage: %s [-i ntasks] [-b nbuffers] [-h]\n", argv[0]);			break;	}}int main(int argc, char **argv){	int ret;	unsigned i;	unsigned size;	unsigned totcpus, ncpus;	double timing;	struct timeval start;	struct timeval end;	struct starpu_conf conf;	unsigned buffer;	parse_args(argc, argv);	/* Get number of CPUs */	starpu_conf_init(&conf);	conf.ncuda = 0;	conf.nopencl = 0;	ret = starpu_init(&conf);	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");	totcpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER);	starpu_shutdown();	/* Allocate data */	for (buffer = 0; buffer < nbuffers; buffer++)		buffers[buffer] = (float *) malloc(16*sizeof(float));	tasks = (struct starpu_task *) calloc(1, ntasks*sizeof(struct starpu_task));	/* Emit headers and compute raw tasks speed */	FPRINTF(stdout, "# tasks : %u buffers : %u\n", ntasks, nbuffers);	FPRINTF(stdout, "# ncpus\t");	for (size = START; size <= STOP; size *= FACTOR)		FPRINTF(stdout, "%u iters(us)\ttotal(s)\t", size);	FPRINTF(stdout, "\n");	FPRINTF(stdout, "\"seq\"\t");	for (size = START; size <= STOP; size *= FACTOR)	{		double dstart, dend;		dstart = starpu_timing_now();		for (i = 0; i < ntasks; i++)			func(NULL, (void*) (uintptr_t) size);		dend = starpu_timing_now();		FPRINTF(stdout, "%.0f       \t%f\t", (dend-dstart)/ntasks, (dend-dstart)/1000000);	}	FPRINTF(stdout, "\n");	fflush(stdout);	/* For each number of cpus, benchmark */	for (ncpus= 1; ncpus <= totcpus; ncpus++)	{		FPRINTF(stdout, "%u\t", ncpus);		fflush(stdout);		conf.ncpus = ncpus;		ret = starpu_init(&conf);		if (ret == -ENODEV) return STARPU_TEST_SKIPPED;		STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");		for (buffer = 0; buffer < nbuffers; buffer++)			starpu_vector_data_register(&data_handles[buffer], 0, (uintptr_t)buffers[buffer], 16, sizeof(float));		for (size = START; size <= STOP; size *= FACTOR)		{			/* submit tasks */			gettimeofday(&start, NULL);			for (i = 0; i < ntasks; i++)			{				starpu_task_init(&tasks[i]);				tasks[i].callback_func = NULL;				tasks[i].cl = &codelet;				tasks[i].cl_arg = (void*) (uintptr_t) size;				tasks[i].synchronous = 0;				/* we have 8 buffers at most */				for (buffer = 0; buffer < nbuffers; buffer++)				{					tasks[i].handles[buffer] = data_handles[buffer];				}				ret = starpu_task_submit(&tasks[i]);				if (ret == -ENODEV) goto enodev;				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task");			}			ret = starpu_task_wait_for_all();			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");			gettimeofday(&end, NULL);			for (i = 0; i < ntasks; i++)				starpu_task_clean(&tasks[i]);			timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));			FPRINTF(stdout, "%u\t%f\t", size, timing/1000000);			fflush(stdout);			{				char *output_dir = getenv("STARPU_BENCH_DIR");				char *bench_id = getenv("STARPU_BENCH_ID");				if (output_dir && bench_id)				{					char file[1024];					FILE *f;					sprintf(file, "%s/tasks_size_overhead_total.dat", output_dir);					f = fopen(file, "a");					fprintf(f, "%s\t%f\n", bench_id, timing/1000000);					fclose(f);				}			}		}		for (buffer = 0; buffer < nbuffers; buffer++)		{			starpu_data_unregister(data_handles[buffer]);		}		starpu_shutdown();		FPRINTF(stdout, "\n");	}	free(tasks);	return EXIT_SUCCESS;enodev:	fprintf(stderr, "WARNING: No one can execute this task\n");	/* yes, we do not perform the computation but we did detect that no one 	 * could perform the kernel, so this is not an error from StarPU */	starpu_shutdown();	free(tasks);	return STARPU_TEST_SKIPPED;}
 |