Browse Source

Add support for directly drawing GFlops curves

Samuel Thibault 12 years ago
parent
commit
438cb42e66

+ 1 - 0
ChangeLog

@@ -103,6 +103,7 @@ Small features:
     the profile of a codelet.
     the profile of a codelet.
   * File STARPU-REVISION --- containing the SVN revision number from which
   * File STARPU-REVISION --- containing the SVN revision number from which
     StarPU was compiled --- is installed in the share/doc/starpu directory
     StarPU was compiled --- is installed in the share/doc/starpu directory
+  * starpu_perfmodel_plot can now directly draw GFlops curves.
 
 
 Changes:
 Changes:
   * Fix the block filter functions.
   * Fix the block filter functions.

+ 7 - 1
doc/chapters/advanced-api.texi

@@ -38,7 +38,7 @@ The arguments following the codelets can be of the following types:
 @item
 @item
 the specific values @code{STARPU_VALUE}, @code{STARPU_CALLBACK},
 the specific values @code{STARPU_VALUE}, @code{STARPU_CALLBACK},
 @code{STARPU_CALLBACK_ARG}, @code{STARPU_CALLBACK_WITH_ARG},
 @code{STARPU_CALLBACK_ARG}, @code{STARPU_CALLBACK_WITH_ARG},
-@code{STARPU_PRIORITY}, @code{STARPU_TAG}, followed by the appropriated objects
+@code{STARPU_PRIORITY}, @code{STARPU_TAG}, @code{STARPU_FLOPS}, followed by the appropriated objects
 as defined below.
 as defined below.
 @end itemize
 @end itemize
 
 
@@ -85,6 +85,12 @@ this macro is used when calling @code{starpu_insert_task}, and must be
 followed by a tag.
 followed by a tag.
 @end defmac
 @end defmac
 
 
+@defmac STARPU_FLOPS
+this macro is used when calling @code{starpu_insert_task}, and must be followed
+by an amount of floating point operations, as a double. The user may have to
+explicitly cast into double, otherwise parameter passing will not work.
+@end defmac
+
 @deftypefun void starpu_codelet_pack_args ({char **}@var{arg_buffer}, {size_t *}@var{arg_buffer_size}, ...)
 @deftypefun void starpu_codelet_pack_args ({char **}@var{arg_buffer}, {size_t *}@var{arg_buffer_size}, ...)
 Pack arguments of type @code{STARPU_VALUE} into a buffer which can be
 Pack arguments of type @code{STARPU_VALUE} into a buffer which can be
 given to a codelet and later unpacked with the function
 given to a codelet and later unpacked with the function

+ 6 - 0
doc/chapters/basic-api.texi

@@ -1849,6 +1849,11 @@ A pointer to the next task. This should only be used by StarPU.
 This is only used for tasks that use multiformat handle. This should only be
 This is only used for tasks that use multiformat handle. This should only be
 used by StarPU.
 used by StarPU.
 
 
+@item @code{double flops}
+This can be set to the number of floating points operations that the task
+will have to achieve. This is useful for easily getting GFlops curves from
+@code{starpu_perfmodel_plot}, and for the hypervisor load balancing.
+
 @item @code{void *starpu_private}
 @item @code{void *starpu_private}
 This is private to StarPU, do not modify. If the task is allocated by hand
 This is private to StarPU, do not modify. If the task is allocated by hand
 (without starpu_task_create), this field should be set to NULL.
 (without starpu_task_create), this field should be set to NULL.
@@ -1857,6 +1862,7 @@ This is private to StarPU, do not modify. If the task is allocated by hand
 This field is set when initializing a task. It prevents a task from being
 This field is set when initializing a task. It prevents a task from being
 submitted if it has not been properly initialized.
 submitted if it has not been properly initialized.
 @end table
 @end table
+
 @end deftp
 @end deftp
 
 
 @deftypefun void starpu_task_init ({struct starpu_task} *@var{task})
 @deftypefun void starpu_task_init ({struct starpu_task} *@var{task})

+ 17 - 7
doc/chapters/perf-feedback.texi

@@ -411,7 +411,7 @@ display the regression formula, and in the case of non-linear regression, the
 same performance log as for history-based performance models:
 same performance log as for history-based performance models:
 
 
 @example
 @example
-$ starpu_perfmodel_display -s non_linear_memset_regression_based.type
+$ starpu_perfmodel_display -s non_linear_memset_regression_based
 performance model for cpu_impl_0
 performance model for cpu_impl_0
 	Regression : #sample = 1400
 	Regression : #sample = 1400
 	Linear: y = alpha size ^ beta
 	Linear: y = alpha size ^ beta
@@ -429,15 +429,25 @@ a3d3725e	4096           	4.763200e+00   	7.650928e-01   	100
 ...
 ...
 @end example
 @end example
 
 
-The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
-It writes a @code{.gp} file in the current directory, to be run in the
-@code{gnuplot} tool, which shows the corresponding curve.
-
 The same can also be achieved by using StarPU's library API, see
 The same can also be achieved by using StarPU's library API, see
 @ref{Performance Model API} and notably the @code{starpu_perfmodel_load_symbol}
 @ref{Performance Model API} and notably the @code{starpu_perfmodel_load_symbol}
 function. The source code of the @code{starpu_perfmodel_display} tool can be a
 function. The source code of the @code{starpu_perfmodel_display} tool can be a
 useful example.
 useful example.
 
 
+The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
+It writes a @code{.gp} file in the current directory, to be run in the
+@code{gnuplot} tool, which shows the corresponding curve.
+
+When the @code{flops} field of tasks is set, @code{starpu_perfmodel_plot} can
+directly draw a GFlops curve, by simply adding the @code{-f} option:
+
+@example
+$ starpu_perfmodel_display -f -s chol_model_11
+@end example
+
+This will however disable displaying the regression model, for which we can not
+compute GFlops.
+
 When the FxT trace file @code{filename} has been generated, it is possible to
 When the FxT trace file @code{filename} has been generated, it is possible to
 get a profiling of each codelet by calling:
 get a profiling of each codelet by calling:
 @example
 @example
@@ -453,10 +463,10 @@ This is also available in the @code{starpu_perfmodel_plot} tool, by passing it
 the fxt trace:
 the fxt trace:
 
 
 @example
 @example
-$ starpu_perfmodel_display -s non_linear_memset_regression_based.type -i /tmp/prof_file_foo_0
+$ starpu_perfmodel_plot -s non_linear_memset_regression_based -i /tmp/prof_file_foo_0
 @end example
 @end example
 
 
-It willd produce a @code{.gp} file which contains both the performance model
+It will produce a @code{.gp} file which contains both the performance model
 curves, and the profiling measurements.
 curves, and the profiling measurements.
 
 
 If you have the R statistical tool installed, you can additionally use
 If you have the R statistical tool installed, you can additionally use

+ 1 - 1
doc/chapters/sched_ctx_hypervisor.texi

@@ -200,7 +200,7 @@ or
 @smallexample
 @smallexample
 starpu_insert_task(&codelet,
 starpu_insert_task(&codelet,
                     ...,
                     ...,
-                    STARPU_FLOPS, 100,
+                    STARPU_FLOPS, (double) 100,
                     0);
                     0);
 @end smallexample
 @end smallexample
 @end cartouche
 @end cartouche

+ 58 - 0
examples/cholesky/cholesky.h

@@ -55,6 +55,64 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
 
+/* This is from magma
+
+  -- Innovative Computing Laboratory
+  -- Electrical Engineering and Computer Science Department
+  -- University of Tennessee
+  -- (C) Copyright 2009
+
+  Redistribution  and  use  in  source and binary forms, with or without
+  modification,  are  permitted  provided  that the following conditions
+  are met:
+
+  * Redistributions  of  source  code  must  retain  the above copyright
+    notice,  this  list  of  conditions  and  the  following  disclaimer.
+  * Redistributions  in  binary  form must reproduce the above copyright
+    notice,  this list of conditions and the following disclaimer in the
+    documentation  and/or other materials provided with the distribution.
+  * Neither  the  name of the University of Tennessee, Knoxville nor the
+    names of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+  THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+  LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  */
+
+#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
+#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)      ) * (double)(__n) - (1. / 6.)))
+
+#define FLOPS_SPOTRF(__n) (     FMULS_POTRF((__n)) +       FADDS_POTRF((__n)) )
+
+#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
+#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
+
+#define FMULS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m)) )
+#define FADDS_TRMM(__m, __n) ( /*( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m)) )
+
+#define FMULS_TRSM FMULS_TRMM
+#define FADDS_TRSM FMULS_TRMM
+
+#define FLOPS_STRSM(__m, __n) (     FMULS_TRSM((__m), (__n)) +       FADDS_TRSM((__m), (__n)) )
+
+
+#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
+
+#define FLOPS_SGEMM(__m, __n, __k) (     FMULS_GEMM((__m), (__n), (__k)) +       FADDS_GEMM((__m), (__n), (__k)) )
+
+/* End of magma code */
+
 static unsigned size = 4*1024;
 static unsigned size = 4*1024;
 static unsigned nblocks = 16;
 static unsigned nblocks = 16;
 static unsigned nbigblocks = 8;
 static unsigned nbigblocks = 8;

+ 10 - 1
examples/cholesky/cholesky_grain_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
@@ -68,6 +68,9 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 	return task;
 }
 }
 
 
@@ -110,6 +113,9 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, un
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
 	return ret;
@@ -157,6 +163,9 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
 	return ret;

+ 6 - 2
examples/cholesky/cholesky_implicit.c

@@ -85,6 +85,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	double end;
 	double end;
 
 
 	unsigned i,j,k;
 	unsigned i,j,k;
+	unsigned long n = starpu_matrix_get_nx(dataA);
+	unsigned long nn = n/nblocks;
 
 
 	int prio_level = noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO;
 	int prio_level = noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO;
 
 
@@ -101,6 +103,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 					 STARPU_PRIORITY, prio_level,
 					 STARPU_PRIORITY, prio_level,
 					 STARPU_RW, sdatakk,
 					 STARPU_RW, sdatakk,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
+					 STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 					 0);
 					 0);
 		if (ret == -ENODEV) return 77;
 		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -113,6 +116,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 						 STARPU_PRIORITY, (j == k+1)?prio_level:STARPU_DEFAULT_PRIO,
 						 STARPU_PRIORITY, (j == k+1)?prio_level:STARPU_DEFAULT_PRIO,
 						 STARPU_R, sdatakk,
 						 STARPU_R, sdatakk,
 						 STARPU_RW, sdatakj,
 						 STARPU_RW, sdatakj,
+						 STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 						 0);
 						 0);
 			if (ret == -ENODEV) return 77;
 			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -129,6 +133,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 								 STARPU_R, sdataki,
 								 STARPU_R, sdataki,
 								 STARPU_R, sdatakj,
 								 STARPU_R, sdatakj,
 								 STARPU_RW, sdataij,
 								 STARPU_RW, sdataij,
+								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 								 0);
 								 0);
 					if (ret == -ENODEV) return 77;
 					if (ret == -ENODEV) return 77;
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
@@ -144,9 +149,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	end = starpu_timing_now();
 	end = starpu_timing_now();
 
 
 	double timing = end - start;
 	double timing = end - start;
-	unsigned long n = starpu_matrix_get_nx(dataA);
 
 
-	double flop = (1.0f*n*n*n)/3.0f;
+	double flop = FLOPS_SPOTRF(n);
 
 
 	if(with_ctxs || with_noctxs || chole1 || chole2)
 	if(with_ctxs || with_noctxs || chole1 || chole2)
 		update_sched_ctx_timing_results((flop/timing/1000.0f), (timing/1000000.0f));
 		update_sched_ctx_timing_results((flop/timing/1000.0f), (timing/1000000.0f));

+ 10 - 1
examples/cholesky/cholesky_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
@@ -69,6 +69,9 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 	return task;
 }
 }
 
 
@@ -109,6 +112,9 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	int ret = starpu_task_submit(task);
 	int ret = starpu_task_submit(task);
         if (STARPU_UNLIKELY(ret == -ENODEV))
         if (STARPU_UNLIKELY(ret == -ENODEV))
 	{
 	{
@@ -158,6 +164,9 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	int ret = starpu_task_submit(task);
 	int ret = starpu_task_submit(task);
         if (STARPU_UNLIKELY(ret == -ENODEV))
         if (STARPU_UNLIKELY(ret == -ENODEV))
 	{
 	{

+ 10 - 1
examples/cholesky/cholesky_tile_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -71,6 +71,9 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks)
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SPOTRF(n);
+
 	return task;
 	return task;
 }
 }
 
 
@@ -113,6 +116,9 @@ static int create_task_21(unsigned k, unsigned j)
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_STRSM(n, n);
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
 	return ret;
@@ -160,6 +166,9 @@ static int create_task_22(unsigned k, unsigned i, unsigned j)
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 	}
 
 
+	int n = starpu_matrix_get_nx(task->handles[0]);
+	task->flops = FLOPS_SGEMM(n, n, n);
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	return ret;
 	return ret;

+ 3 - 1
include/starpu_perfmodel.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -102,6 +102,8 @@ struct starpu_perfmodel_history_entry
 #else
 #else
 	size_t size; /* in bytes */
 	size_t size; /* in bytes */
 #endif
 #endif
+
+	double flops; /* Provided by the application */
 };
 };
 
 
 struct starpu_perfmodel_history_list
 struct starpu_perfmodel_history_list

+ 2 - 2
include/starpu_task_util.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -45,7 +45,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
 #define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
 #define STARPU_TAG       (1<<12) /* Tag */
 #define STARPU_TAG       (1<<12) /* Tag */
 #define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
 #define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
-#define STARPU_HYPERVISOR_FLOPS	(1<<14)	/* Used to specify the number of flops needed to be executed by a task */
+#define STARPU_FLOPS	(1<<14)	/* Used to specify the number of flops needed to be executed by a task */
 
 
 /* Wrapper to create a task. */
 /* Wrapper to create a task. */
 int starpu_insert_task(struct starpu_codelet *cl, ...);
 int starpu_insert_task(struct starpu_codelet *cl, ...);

+ 30 - 13
src/core/perfmodel/perfmodel_history.c

@@ -180,7 +180,7 @@ static void scan_reg_model(FILE *f, struct starpu_perfmodel_regression_model *re
 
 
 static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 {
 {
-	fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
+	fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
 }
 }
 
 
 static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
@@ -192,28 +192,36 @@ static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *e
 	/* In case entry is NULL, we just drop these values */
 	/* In case entry is NULL, we just drop these values */
 	unsigned nsample;
 	unsigned nsample;
 	uint32_t footprint;
 	uint32_t footprint;
-#ifdef STARPU_HAVE_WINDOWS
-	unsigned size; /* in bytes */
-#else
-	size_t size; /* in bytes */
-#endif
+	unsigned long size; /* in bytes */
+	double flops;
 	double mean;
 	double mean;
 	double deviation;
 	double deviation;
 	double sum;
 	double sum;
 	double sum2;
 	double sum2;
 
 
+	char line[256];
+	char *ret;
+
+	ret = fgets(line, sizeof(line), f);
+	STARPU_ASSERT(ret);
+	STARPU_ASSERT(strchr(line, '\n'));
+
 	/* Read the values from the file */
 	/* Read the values from the file */
-	res = fscanf(f, "%x\t%"
-#ifndef STARPU_HAVE_WINDOWS
-	"z"
-#endif
-	"u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
-	STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
+	res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &flops, &mean, &deviation, &sum, &sum2, &nsample);
+
+	if (res != 8)
+	{
+		flops = 0.;
+		/* Read the values from the file */
+		res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
+		STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
+	}
 
 
 	if (entry)
 	if (entry)
 	{
 	{
 		entry->footprint = footprint;
 		entry->footprint = footprint;
 		entry->size = size;
 		entry->size = size;
+		entry->flops = flops;
 		entry->mean = mean;
 		entry->mean = mean;
 		entry->deviation = deviation;
 		entry->deviation = deviation;
 		entry->sum = sum;
 		entry->sum = sum;
@@ -393,7 +401,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 	/* Dump the history into the model file in case it is necessary */
 	/* Dump the history into the model file in case it is necessary */
 	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 	{
 	{
-		fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
+		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
 		ptr = per_arch_model->list;
 		while (ptr)
 		while (ptr)
 		{
 		{
@@ -1152,6 +1160,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				entry->sum2 = measured*measured;
 				entry->sum2 = measured*measured;
 
 
 				entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
 				entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
+				entry->flops = j->task->flops;
 
 
 				entry->footprint = key;
 				entry->footprint = key;
 				entry->nsample = 1;
 				entry->nsample = 1;
@@ -1168,6 +1177,14 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				unsigned n = entry->nsample;
 				unsigned n = entry->nsample;
 				entry->mean = entry->sum / n;
 				entry->mean = entry->sum / n;
 				entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
 				entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
+				if (j->task->flops != 0.)
+				{
+					if (entry->flops == 0.)
+						entry->flops = j->task->flops;
+					else if (entry->flops != j->task->flops)
+						/* Incoherent flops! forget about trying to record flops */
+						entry->flops = NAN;
+				}
 			}
 			}
 
 
 			STARPU_ASSERT(entry);
 			STARPU_ASSERT(entry);

+ 3 - 3
src/core/perfmodel/perfmodel_print.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -38,8 +38,8 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 			if (!parameter)
 			if (!parameter)
 			{
 			{
 				/* There isn't a parameter that is explicitely requested, so we display all parameters */
 				/* There isn't a parameter that is explicitely requested, so we display all parameters */
-				printf("%08x\t%-15lu\t%-15le\t%-15le\t%u\n", entry->footprint,
-					(unsigned long) entry->size, entry->mean, entry->deviation, entry->nsample);
+				printf("%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint,
+					(unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample);
 			}
 			}
 			else
 			else
 			{
 			{

+ 2 - 2
src/util/starpu_insert_task_utils.c

@@ -287,9 +287,9 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 			int hypervisor_tag = va_arg(varg_list, int);
 			int hypervisor_tag = va_arg(varg_list, int);
 			(*task)->hypervisor_tag = hypervisor_tag;
 			(*task)->hypervisor_tag = hypervisor_tag;
 		}
 		}
-		else if (arg_type==STARPU_HYPERVISOR_FLOPS)
+		else if (arg_type==STARPU_FLOPS)
 		{
 		{
-			int flops = va_arg(varg_list, int);
+			double flops = va_arg(varg_list, double);
 			(*task)->flops = flops;
 			(*task)->flops = flops;
 		}
 		}
 
 

+ 22 - 5
tools/starpu_perfmodel_plot.c

@@ -43,6 +43,7 @@ static char *symbol = NULL;
 static char *archname = NULL;
 static char *archname = NULL;
 /* Unless a FxT file is specified, we just display the model */
 /* Unless a FxT file is specified, we just display the model */
 static int no_fxt_file = 1;
 static int no_fxt_file = 1;
+static int gflops = 0;
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 static struct starpu_fxt_codelet_event *dumped_codelets;
 static struct starpu_fxt_codelet_event *dumped_codelets;
@@ -67,6 +68,7 @@ given perfmodel\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
+	fprintf(stderr, "   -f                  draw GFlops instead of time\n");
 	fprintf(stderr, "   -i <Fxt files>      input FxT files generated by StarPU\n");
 	fprintf(stderr, "   -i <Fxt files>      input FxT files generated by StarPU\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:x, cuda, cuda_d, opencl, opencl_d)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:x, cuda, cuda_d, opencl, opencl_d)\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
@@ -119,6 +121,12 @@ static void parse_args(int argc, char **argv)
 			continue;
 			continue;
 		}
 		}
 
 
+		if (strcmp(argv[i], "-f") == 0)
+		{
+			gflops = 1;
+			continue;
+		}
+
 		if (strcmp(argv[i], "-a") == 0)
 		if (strcmp(argv[i], "-a") == 0)
 		{
 		{
 			archname = argv[++i];
 			archname = argv[++i];
@@ -184,7 +192,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 		fprintf(stderr,"Arch: %s\n", arch_name);
 		fprintf(stderr,"Arch: %s\n", arch_name);
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-	if (!no_fxt_file && archtype_is_found[arch] && nimpl == 0)
+	if (!gflops && !no_fxt_file && archtype_is_found[arch] && nimpl == 0)
 	{
 	{
 		print_comma(gnuplot_file, first);
 		print_comma(gnuplot_file, first);
 		fprintf(gnuplot_file, "\"< grep -w \\^%d %s\" using 2:3 title \"Profiling %s\"", arch, data_file_name, arch_name);
 		fprintf(gnuplot_file, "\"< grep -w \\^%d %s\" using 2:3 title \"Profiling %s\"", arch, data_file_name, arch_name);
@@ -192,7 +200,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 #endif
 #endif
 
 
 	/* Only display the regression model if we could actually build a model */
 	/* Only display the regression model if we could actually build a model */
-	if (arch_model->regression.valid && !arch_model->regression.nl_valid)
+	if (!gflops && arch_model->regression.valid && !arch_model->regression.nl_valid)
 	{
 	{
 		print_comma(gnuplot_file, first);
 		print_comma(gnuplot_file, first);
 
 
@@ -204,7 +212,7 @@ static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel *mode
 			arch_model->regression.alpha, arch_model->regression.beta, arch_name);
 			arch_model->regression.alpha, arch_model->regression.beta, arch_name);
 	}
 	}
 
 
-	if (arch_model->regression.nl_valid)
+	if (!gflops && arch_model->regression.nl_valid)
 	{
 	{
 		print_comma(gnuplot_file, first);
 		print_comma(gnuplot_file, first);
 
 
@@ -286,7 +294,13 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 					struct starpu_perfmodel_history_entry *entry = ptr->entry;
 					struct starpu_perfmodel_history_entry *entry = ptr->entry;
 					if (entry->size == minimum)
 					if (entry->size == minimum)
 					{
 					{
-						fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation);
+						if (gflops)
+							fprintf(datafile, "\t%-15le\t%-15le", entry->flops / (entry->mean * 1000),
+									entry->flops / ((entry->mean + entry->deviation) * 1000) -
+									entry->flops / (entry->mean * 1000)
+									);
+						else
+							fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation);
 						break;
 						break;
 					}
 					}
 				}
 				}
@@ -346,7 +360,10 @@ static void display_selected_models(FILE *gnuplot_file, struct starpu_perfmodel
 	fprintf(gnuplot_file, "set output \"starpu_%s.eps\"\n", symbol);
 	fprintf(gnuplot_file, "set output \"starpu_%s.eps\"\n", symbol);
 	fprintf(gnuplot_file, "set title \"Model for codelet %s\"\n", symbol);
 	fprintf(gnuplot_file, "set title \"Model for codelet %s\"\n", symbol);
 	fprintf(gnuplot_file, "set xlabel \"Total data size\"\n");
 	fprintf(gnuplot_file, "set xlabel \"Total data size\"\n");
-	fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n");
+	if (gflops)
+		fprintf(gnuplot_file, "set ylabel \"GFlops\"\n");
+	else
+		fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n");
 	fprintf(gnuplot_file, "\n");
 	fprintf(gnuplot_file, "\n");
 	fprintf(gnuplot_file, "set key top left\n");
 	fprintf(gnuplot_file, "set key top left\n");
 	fprintf(gnuplot_file, "set logscale x\n");
 	fprintf(gnuplot_file, "set logscale x\n");