Browse Source

Let the application provide the task footprint itself

Samuel Thibault 11 years ago
parent
commit
09974968a8

+ 21 - 10
doc/doxygen/chapters/12online_performance_tools.doxy

@@ -363,17 +363,28 @@ of the task in micro-seconds, one per architecture.
 </ul>
 </ul>
 
 
 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and
 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and
-::STARPU_NL_REGRESSION_BASED, the total size of task data (both input
-and output) is used as an index by default. The field
-starpu_perfmodel::size_base however permits the application to
-override that, when for instance some of the data do not matter for
-task cost (e.g. mere reference table), or when using sparse
+::STARPU_NL_REGRESSION_BASED, the dimensions of task data (both input
+and output) are used as an index by default. ::STARPU_HISTORY_BASED uses a CRC
+hash of the dimensions as an index to distinguish histories, and
+::REGRESSION_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED use the total
+size as an index for the regression.
+
+The starpu_perfmodel::size_base and starpu_perfmodel::footprint fields however
+permit the application to override that, when for instance some of the data
+do not matter for task cost (e.g. mere reference table), or when using sparse
 structures (in which case it is the number of non-zeros which matter), or when
 structures (in which case it is the number of non-zeros which matter), or when
-there is some hidden parameter such as the number of iterations, or when the application
-actually has a very good idea of the complexity of the algorithm, and just not
-the speed of the processor, etc.
-The example in the directory <c>examples/pi</c> uses this to include
-the number of iterations in the base.
+there is some hidden parameter such as the number of iterations, or when the
+application actually has a very good idea of the complexity of the algorithm,
+and just not the speed of the processor, etc.  The example in the directory
+<c>examples/pi</c> uses this to include the number of iterations in the base
+size. starpu_perfmodel::size_base should be used when the variance of the actual
+performance is known (i.e. bigger returned value is longer execution
+time), and thus particularly useful for ::STARPU_REGRESSION_BASED or
+::STARPU_NL_REGRESSION_BASED. starpu_perfmodel::footprint can be used when the
+variance of the actual performance is unknown (irregular performance behavior,
+etc.), and thus only useful for ::STARPU_HISTORY_BASED.
+starpu_task_data_footprint() can be used as a base and combined with other
+parameters through starpu_hash_crc32c_be for instance.
 
 
 StarPU will automatically determine when the performance model is calibrated,
 StarPU will automatically determine when the performance model is calibrated,
 or rather, it will assume the performance model is calibrated until the
 or rather, it will assume the performance model is calibrated until the

+ 6 - 2
doc/doxygen/chapters/api/performance_model.doxy

@@ -103,8 +103,12 @@ must return a task duration estimation in micro-seconds.
 \var starpu_perfmodel::size_base
 \var starpu_perfmodel::size_base
 Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
 Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
 ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
 ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
-implementation number, and returns the size to be used as index for
-history and regression.
+implementation number, and returns the size to be used as index to distinguish
+histories and as a base for regressions.
+\var starpu_perfmodel::footprint
+Used by ::STARPU_HISTORY_BASED. If not NULL, takes a task and returns the
+footprint to be used as index to distinguish histories. The default is to use
+the starpu_task_data_footprint function.
 \var starpu_perfmodel::per_arch
 \var starpu_perfmodel::per_arch
 Used by ::STARPU_PER_ARCH: array of structures starpu_per_arch_perfmodel
 Used by ::STARPU_PER_ARCH: array of structures starpu_per_arch_perfmodel
 \var starpu_perfmodel::is_loaded
 \var starpu_perfmodel::is_loaded

+ 6 - 1
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -128,7 +128,12 @@ otherwise the task may fail to execute.
 
 
 \fn uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 \fn uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 \ingroup API_Scheduling_Policy
 \ingroup API_Scheduling_Policy
-Returns the footprint for a given task
+Returns the footprint for a given task, taking into account user-provided
+perfmodel footprint or size_base functions.
+
+\fn uint32_t starpu_task_data_footprint(struct starpu_task *task)
+\ingroup API_Scheduling_Policy
+Returns the raw footprint for the data of a given task (without taking into account user-provided functions).
 
 
 \fn double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 \fn double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
 \ingroup API_Scheduling_Policy
 \ingroup API_Scheduling_Policy

+ 1 - 0
include/starpu_perfmodel.h

@@ -122,6 +122,7 @@ struct starpu_perfmodel
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 
 
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
+	uint32_t (*footprint)(struct starpu_task *);
 
 
 	struct starpu_perfmodel_per_arch**** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
 	struct starpu_perfmodel_per_arch**** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
 
 

+ 1 - 0
include/starpu_scheduler.h

@@ -69,6 +69,7 @@ int starpu_get_prefetch_flag(void);
 int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
 int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
 
 
 uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
+uint32_t starpu_task_data_footprint(struct starpu_task *task);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
 double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task);
 double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task);

+ 24 - 11
src/datawizard/footprint.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,17 +20,37 @@
 #include <core/task.h>
 #include <core/task.h>
 #include <starpu_scheduler.h>
 #include <starpu_scheduler.h>
 
 
+uint32_t starpu_task_data_footprint(struct starpu_task *task)
+{
+	uint32_t footprint = 0;
+	unsigned buffer;
+
+	for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
+	{
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
+
+		uint32_t handle_footprint = _starpu_data_get_footprint(handle);
+
+		footprint = starpu_hash_crc32c_be(handle_footprint, footprint);
+	}
+
+	return footprint;
+}
+
 uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl, struct _starpu_job *j)
 uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl, struct _starpu_job *j)
 {
 {
 	if (j->footprint_is_computed)
 	if (j->footprint_is_computed)
 		return j->footprint;
 		return j->footprint;
 
 
 	uint32_t footprint = 0;
 	uint32_t footprint = 0;
-	unsigned buffer;
 
 
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 
 
-	if (model != NULL && 
+	if (model != NULL && model->footprint != NULL)
+	{
+		footprint = model->footprint(task);
+	}
+	else if (model != NULL && 
 			model->per_arch[arch->type] != NULL &&
 			model->per_arch[arch->type] != NULL &&
 			model->per_arch[arch->type][arch->devid] != NULL &&
 			model->per_arch[arch->type][arch->devid] != NULL &&
 			model->per_arch[arch->type][arch->devid][arch->ncore] != NULL &&
 			model->per_arch[arch->type][arch->devid][arch->ncore] != NULL &&
@@ -46,14 +66,7 @@ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struc
 	}
 	}
 	else
 	else
 	{
 	{
-		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
-		{
-			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
-
-			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
-
-			footprint = starpu_hash_crc32c_be(handle_footprint, footprint);
-		}
+		footprint = starpu_task_data_footprint(task);
 	}
 	}
 
 
 	j->footprint = footprint;
 	j->footprint = footprint;

+ 1 - 0
tests/Makefile.am

@@ -228,6 +228,7 @@ noinst_PROGRAMS =				\
 	perfmodels/regression_based		\
 	perfmodels/regression_based		\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/feed				\
 	perfmodels/feed				\
+	perfmodels/user_base			\
 	perfmodels/valid_model			\
 	perfmodels/valid_model			\
 	perfmodels/value_nan			\
 	perfmodels/value_nan			\
 	sched_policies/data_locality            \
 	sched_policies/data_locality            \

+ 129 - 0
tests/perfmodels/user_base.c

@@ -0,0 +1,129 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Université Bordeaux
+ * Copyright (C) 2012, 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu.h>
+#include <starpu_scheduler.h>
+#include "../helper.h"
+
+void func(void *descr[], void *arg)
+{
+}
+
+size_t get_size_base(struct starpu_task *task, unsigned nimpl)
+{
+	return 3;
+};
+
+size_t get_footprint(struct starpu_task *task)
+{
+	uint32_t orig = starpu_task_data_footprint(task);
+	return starpu_hash_crc32c_be(42, orig);
+};
+
+static struct starpu_perfmodel rb_model =
+{
+	.type = STARPU_REGRESSION_BASED,
+	.symbol = "valid_model_regression_based",
+	.size_base = get_size_base,
+};
+
+static struct starpu_perfmodel nlrb_model =
+{
+	.type = STARPU_NL_REGRESSION_BASED,
+	.symbol = "valid_model_non_linear_regression_based",
+	.size_base = get_size_base,
+};
+
+static struct starpu_perfmodel hb_model =
+{
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "valid_model_history_based",
+	.size_base = get_size_base,
+};
+
+static struct starpu_perfmodel hb_model_foot =
+{
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "valid_model_history_based_footprint",
+	.footprint = get_footprint,
+};
+
+static struct starpu_codelet mycodelet =
+{
+	.cuda_funcs = {func, NULL},
+	.opencl_funcs = {func, NULL},
+	.cpu_funcs = {func, NULL},
+	.nbuffers = 1,
+	.modes = {STARPU_W}
+};
+
+static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model)
+{
+	int nloops = 123;
+	int loop;
+	starpu_data_handle_t handle;
+	struct starpu_perfmodel lmodel;
+	int ret;
+	int old_nsamples, new_nsamples;
+	struct starpu_conf conf;
+	unsigned archid, archtype, devid, ncore;
+
+	starpu_conf_init(&conf);
+	conf.sched_policy_name = "eager";
+	conf.calibrate = 1;
+
+
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	codelet->model = model;
+
+        starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int));
+	for (loop = 0; loop < nloops; loop++)
+	{
+		ret = starpu_task_insert(codelet, STARPU_W, handle, 0);
+		if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+        starpu_data_unregister(handle);
+	starpu_shutdown();
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	/* Use a linear regression model */
+	ret = submit(&mycodelet, &rb_model);
+	if (ret) return ret;
+
+	/* Use a non-linear regression model */
+	ret = submit(&mycodelet, &nlrb_model);
+	if (ret) return ret;
+
+	/* Use a history model */
+	ret = submit(&mycodelet, &hb_model);
+	if (ret) return ret;
+
+	/* Use a history model with footprints*/
+	ret = submit(&mycodelet, &hb_model_foot);
+	if (ret) return ret;
+
+	return EXIT_SUCCESS;
+}