Selaa lähdekoodia

Add support for directly drawing GFlops curves

Samuel Thibault 12 vuotta sitten
vanhempi
commit
438cb42e66

+ 1 - 0
ChangeLog

@@ -103,6 +103,7 @@ Small features:
     the profile of a codelet.
   * File STARPU-REVISION --- containing the SVN revision number from which
     StarPU was compiled --- is installed in the share/doc/starpu directory
+  * starpu_perfmodel_plot can now directly draw GFlops curves.
 
 Changes:
   * Fix the block filter functions.

+ 7 - 1
doc/chapters/advanced-api.texi

@@ -38,7 +38,7 @@ The arguments following the codelets can be of the following types:
 @item
 the specific values @code{STARPU_VALUE}, @code{STARPU_CALLBACK},
 @code{STARPU_CALLBACK_ARG}, @code{STARPU_CALLBACK_WITH_ARG},
-@code{STARPU_PRIORITY}, @code{STARPU_TAG}, followed by the appropriated objects
+@code{STARPU_PRIORITY}, @code{STARPU_TAG}, @code{STARPU_FLOPS}, followed by the appropriated objects
 as defined below.
 @end itemize
 
@@ -85,6 +85,12 @@ this macro is used when calling @code{starpu_insert_task}, and must be
 followed by a tag.
 @end defmac
 
+@defmac STARPU_FLOPS
+this macro is used when calling @code{starpu_insert_task}, and must be followed
+by an amount of floating point operations, as a double. The user may have to
+explicitly cast into double, otherwise parameter passing will not work.
+@end defmac
+
 @deftypefun void starpu_codelet_pack_args ({char **}@var{arg_buffer}, {size_t *}@var{arg_buffer_size}, ...)
 Pack arguments of type @code{STARPU_VALUE} into a buffer which can be
 given to a codelet and later unpacked with the function

+ 6 - 0
doc/chapters/basic-api.texi

@@ -1849,6 +1849,11 @@ A pointer to the next task. This should only be used by StarPU.
 This is only used for tasks that use multiformat handle. This should only be
 used by StarPU.
 
+@item @code{double flops}
+This can be set to the number of floating points operations that the task
+will have to achieve. This is useful for easily getting GFlops curves from
+@code{starpu_perfmodel_plot}, and for the hypervisor load balancing.
+
 @item @code{void *starpu_private}
 This is private to StarPU, do not modify. If the task is allocated by hand
 (without starpu_task_create), this field should be set to NULL.
@@ -1857,6 +1862,7 @@ This is private to StarPU, do not modify. If the task is allocated by hand
 This field is set when initializing a task. It prevents a task from being
 submitted if it has not been properly initialized.
 @end table
+
 @end deftp
 
 @deftypefun void starpu_task_init ({struct starpu_task} *@var{task})

+ 17 - 7
doc/chapters/perf-feedback.texi

@@ -411,7 +411,7 @@ display the regression formula, and in the case of non-linear regression, the
 same performance log as for history-based performance models:
 
 @example
-$ starpu_perfmodel_display -s non_linear_memset_regression_based.type
+$ starpu_perfmodel_display -s non_linear_memset_regression_based
 performance model for cpu_impl_0
 	Regression : #sample = 1400
 	Linear: y = alpha size ^ beta
@@ -429,15 +429,25 @@ a3d3725e	4096           	4.763200e+00   	7.650928e-01   	100
 ...
 @end example
 
-The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
-It writes a @code{.gp} file in the current directory, to be run in the
-@code{gnuplot} tool, which shows the corresponding curve.
-
 The same can also be achieved by using StarPU's library API, see
 @ref{Performance Model API} and notably the @code{starpu_perfmodel_load_symbol}
 function. The source code of the @code{starpu_perfmodel_display} tool can be a
 useful example.
 
+The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
+It writes a @code{.gp} file in the current directory, to be run in the
+@code{gnuplot} tool, which shows the corresponding curve.
+
+When the @code{flops} field of tasks is set, @code{starpu_perfmodel_plot} can
+directly draw a GFlops curve, by simply adding the @code{-f} option:
+
+@example
+$ starpu_perfmodel_display -f -s chol_model_11
+@end example
+
+This will however disable displaying the regression model, for which we can not
+compute GFlops.
+
 When the FxT trace file @code{filename} has been generated, it is possible to
 get a profiling of each codelet by calling:
 @example
@@ -453,10 +463,10 @@ This is also available in the @code{starpu_perfmodel_plot} tool, by passing it
 the fxt trace:
 
 @example
-$ starpu_perfmodel_display -s non_linear_memset_regression_based.type -i /tmp/prof_file_foo_0
+$ starpu_perfmodel_plot -s non_linear_memset_regression_based -i /tmp/prof_file_foo_0
 @end example
 
-It willd produce a @code{.gp} file which contains both the performance model
+It will produce a @code{.gp} file which contains both the performance model
 curves, and the profiling measurements.
 
 If you have the R statistical tool installed, you can additionally use

+ 1 - 1
doc/chapters/sched_ctx_hypervisor.texi

@@ -200,7 +200,7 @@ or
 @smallexample
 starpu_insert_task(&codelet,
                     ...,
-                    STARPU_FLOPS, 100,
+                    STARPU_FLOPS, (double) 100,
                     0);
 @end smallexample
 @end cartouche

+ 58 - 0
examples/cholesky/cholesky.h

@@ -55,6 +55,64 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
+/* This is from magma
+
+  -- Innovative Computing Laboratory
+  -- Electrical Engineering and Computer Science Department
+  -- University of Tennessee
+  -- (C) Copyright 2009
+
+  Redistribution  and  use  in  source and binary forms, with or without
+  modification,  are  permitted  provided  that the following conditions
+  are met:
+
+  * Redistributions  of  source  code  must  retain  the above copyright
+    notice,  this  list  of  conditions  and  the  following  disclaimer.
+  * Redistributions  in  binary  form must reproduce the above copyright
+    notice,  this list of conditions and the following disclaimer in the
+    documentation  and/or other materials provided with the distribution.
+  * Neither  the  name of the University of Tennessee, Knoxville nor the
+    names of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+  THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+  LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  */
+
+#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
+#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)      ) * (double)(__n) - (1. / 6.)))
+
+#define FLOPS_SPOTRF(__n) (     FMULS_POTRF((__n)) +       FADDS_POTRF((__n)) )
+
+#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
+#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
+
+#define FMULS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m)) )
+#define FADDS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m)) )
+
+#define FMULS_TRSM FMULS_TRMM
+#define FADDS_TRSM FMULS_TRMM
+
+#define FLOPS_STRSM(__m, __n) (     FMULS_TRSM((__m), (__n)) +       FADDS_TRSM((__m), (__n)) )
+
+
+#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+
+#define FLOPS_SGEMM(__m, __n, __k) (     FMULS_GEMM((__m), (__n), (__k)) +       FADDS_GEMM((__m), (__n), (__k)) )
+
+/* End of magma code */
+
 static unsigned size = 4*1024;
 static unsigned nblocks = 16;
 static unsigned nbigblocks = 8;

+ 10 - 1
examples/cholesky/cholesky_grain_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -68,6 +68,9 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 }
 
@@ -110,6 +113,9 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, un
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
@@ -157,6 +163,9 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;

+ 6 - 2
examples/cholesky/cholesky_implicit.c

@@ -85,6 +85,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	double end;
 
 	unsigned i,j,k;
+	unsigned long n = starpu_matrix_get_nx(dataA);
+	unsigned long nn = n/nblocks;
 
 	int prio_level = noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO;
 
@@ -101,6 +103,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 					 STARPU_PRIORITY, prio_level,
 					 STARPU_RW, sdatakk,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
+					 STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 					 0);
 		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -113,6 +116,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 						 STARPU_PRIORITY, (j == k+1)?prio_level:STARPU_DEFAULT_PRIO,
 						 STARPU_R, sdatakk,
 						 STARPU_RW, sdatakj,
+						 STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 						 0);
 			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -129,6 +133,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 								 STARPU_R, sdataki,
 								 STARPU_R, sdatakj,
 								 STARPU_RW, sdataij,
+								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 								 0);
 					if (ret == -ENODEV) return 77;
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -144,9 +149,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	end = starpu_timing_now();
 
 	double timing = end - start;
-	unsigned long n = starpu_matrix_get_nx(dataA);
 
-	double flop = (1.0f*n*n*n)/3.0f;
+	double flop = FLOPS_SPOTRF(n);
 
 	if(with_ctxs || with_noctxs || chole1 || chole2)
 		update_sched_ctx_timing_results((flop/timing/1000.0f), (timing/1000000.0f));

+ 10 - 1
examples/cholesky/cholesky_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -69,6 +69,9 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 }
 
@@ -109,6 +112,9 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	int ret = starpu_task_submit(task);
         if (STARPU_UNLIKELY(ret == -ENODEV))
 	{
@@ -158,6 +164,9 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	int ret = starpu_task_submit(task);
         if (STARPU_UNLIKELY(ret == -ENODEV))
 	{

+ 10 - 1
examples/cholesky/cholesky_tile_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -71,6 +71,9 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks)
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 }
 
@@ -113,6 +116,9 @@ static int create_task_21(unsigned k, unsigned j)
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
@@ -160,6 +166,9 @@ static int create_task_22(unsigned k, unsigned i, unsigned j)
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;

+ 3 - 1
include/starpu_perfmodel.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
@@ -102,6 +102,8 @@ struct starpu_perfmodel_history_entry
 #else
 	size_t size; /* in bytes */
 #endif
+
+	double flops; /* Provided by the application */
 };
 
 struct starpu_perfmodel_history_list

+ 2 - 2
include/starpu_task_util.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -45,7 +45,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
 #define STARPU_TAG       (1<<12) /* Tag */
 #define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
-#define STARPU_HYPERVISOR_FLOPS	(1<<14)	/* Used to specify the number of flops needed to be executed by a task */
+#define STARPU_FLOPS	(1<<14)	/* Used to specify the number of flops needed to be executed by a task */
 
 /* Wrapper to create a task. */
 int starpu_insert_task(struct starpu_codelet *cl, ...);

+ 30 - 13
src/core/perfmodel/perfmodel_history.c

@@ -180,7 +180,7 @@ static void scan_reg_model(FILE *f, struct starpu_perfmodel_regression_model *re
 
 static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 {
-	fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
+	fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
 }
 
 static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
@@ -192,28 +192,36 @@ static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *e
 	/* In case entry is NULL, we just drop these values */
 	unsigned nsample;
 	uint32_t footprint;
-#ifdef STARPU_HAVE_WINDOWS
-	unsigned size; /* in bytes */
-#else
-	size_t size; /* in bytes */
-#endif
+	unsigned long size; /* in bytes */
+	double flops;
 	double mean;
 	double deviation;
 	double sum;
 	double sum2;
 
+	char line[256];
+	char *ret;
+
+	ret = fgets(line, sizeof(line), f);
+	STARPU_ASSERT(ret);
+	STARPU_ASSERT(strchr(line, '\n'));
+
 	/* Read the values from the file */
-	res = fscanf(f, "%x\t%"
-#ifndef STARPU_HAVE_WINDOWS
-	"z"
-#endif
-	"u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
-	STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
+	res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &flops, &mean, &deviation, &sum, &sum2, &nsample);
+
+	if (res != 8)
+	{
+		flops = 0.;
+		/* Read the values from the file */
+		res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
+		STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
+	}
 
 	if (entry)
 	{
 		entry->footprint = footprint;
 		entry->size = size;
+		entry->flops = flops;
 		entry->mean = mean;
 		entry->deviation = deviation;
 		entry->sum = sum;
@@ -393,7 +401,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 	/* Dump the history into the model file in case it is necessary */
 	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 	{
-		fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
+		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
 		while (ptr)
 		{
@@ -1152,6 +1160,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				entry->sum2 = measured*measured;
 
 				entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
+				entry->flops = j->task->flops;
 
 				entry->footprint = key;
 				entry->nsample = 1;
@@ -1168,6 +1177,14 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				unsigned n = entry->nsample;
 				entry->mean = entry->sum / n;
 				entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
+				if (j->task->flops != 0.)
+				{
+					if (entry->flops == 0.)
+						entry->flops = j->task->flops;
+					else if (entry->flops != j->task->flops)
+						/* Incoherent flops! forget about trying to record flops */
+						entry->flops = NAN;
+				}
 			}
 
 			STARPU_ASSERT(entry);

+ 3 - 3
src/core/perfmodel/perfmodel_print.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
@@ -38,8 +38,8 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 			if (!parameter)
 			{
 				/* There isn't a parameter that is explicitely requested, so we display all parameters */
-				printf("%08x\t%-15lu\t%-15le\t%-15le\t%u\n", entry->footprint,
-					(unsigned long) entry->size, entry->mean, entry->deviation, entry->nsample);
+				printf("%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint,
+					(unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample);
 			}
 			else
 			{

+ 2 - 2
src/util/starpu_insert_task_utils.c

@@ -287,9 +287,9 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 			int hypervisor_tag = va_arg(varg_list, int);
 			(*task)->hypervisor_tag = hypervisor_tag;
 		}
-		else if (arg_type==STARPU_HYPERVISOR_FLOPS)
+		else if (arg_type==STARPU_FLOPS)
 		{
-			int flops = va_arg(varg_list, int);
+			double flops = va_arg(varg_list, double);
 			(*task)->flops = flops;
 		}
 

+ 22 - 5
tools/starpu_perfmodel_plot.c

@@ -43,6 +43,7 @@ static char *symbol = NULL;
 static char *archname = NULL;
 /* Unless a FxT file is specified, we just display the model */
 static int no_fxt_file = 1;
+static int gflops = 0;
 
 #ifdef STARPU_USE_FXT
 static struct starpu_fxt_codelet_event *dumped_codelets;
@@ -67,6 +68,7 @@ given perfmodel\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
+	fprintf(stderr, "   -f                  draw GFlops instead of time\n");
 	fprintf(stderr, "   -i <Fxt files>      input FxT files generated by StarPU\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:x, cuda, cuda_d, opencl, opencl_d)\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
@@ -119,6 +121,12 @@ static void parse_args(int argc, char **argv)
 			continue;
 		}
 
+		if (strcmp(argv[i], "-f") == 0)
+		{
+			gflops = 1;
+			continue;
+		}
+
 		if (strcmp(argv[i], "-a") == 0)
 		{
 			archname = argv[++i];
@@ -184,7 +192,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 		fprintf(stderr,"Arch: %s\n", arch_name);
 
 #ifdef STARPU_USE_FXT
-	if (!no_fxt_file && archtype_is_found[arch] && nimpl == 0)
+	if (!gflops && !no_fxt_file && archtype_is_found[arch] && nimpl == 0)
 	{
 		print_comma(gnuplot_file, first);
 		fprintf(gnuplot_file, "\"< grep -w \\^%d %s\" using 2:3 title \"Profiling %s\"", arch, data_file_name, arch_name);
@@ -192,7 +200,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 #endif
 
 	/* Only display the regression model if we could actually build a model */
-	if (arch_model->regression.valid && !arch_model->regression.nl_valid)
+	if (!gflops && arch_model->regression.valid && !arch_model->regression.nl_valid)
 	{
 		print_comma(gnuplot_file, first);
 
@@ -204,7 +212,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 			arch_model->regression.alpha, arch_model->regression.beta, arch_name);
 	}
 
-	if (arch_model->regression.nl_valid)
+	if (!gflops && arch_model->regression.nl_valid)
 	{
 		print_comma(gnuplot_file, first);
 
@@ -286,7 +294,13 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 					struct starpu_perfmodel_history_entry *entry = ptr->entry;
 					if (entry->size == minimum)
 					{
-						fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation);
+						if (gflops)
+							fprintf(datafile, "\t%-15le\t%-15le", entry->flops / (entry->mean * 1000),
+									entry->flops / ((entry->mean + entry->deviation) * 1000) -
+									entry->flops / (entry->mean * 1000)
+									);
+						else
+							fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation);
 						break;
 					}
 				}
@@ -346,7 +360,10 @@ static void display_selected_models(FILE *gnuplot_file, struct starpu_perfmodel
 	fprintf(gnuplot_file, "set output \"starpu_%s.eps\"\n", symbol);
 	fprintf(gnuplot_file, "set title \"Model for codelet %s\"\n", symbol);
 	fprintf(gnuplot_file, "set xlabel \"Total data size\"\n");
-	fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n");
+	if (gflops)
+		fprintf(gnuplot_file, "set ylabel \"GFlops\"\n");
+	else
+		fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n");
 	fprintf(gnuplot_file, "\n");
 	fprintf(gnuplot_file, "set key top left\n");
 	fprintf(gnuplot_file, "set logscale x\n");