13 years ago · 39c06b2141
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,8 @@ New features:
 
																   * OpenGL interoperability support.
															
 
																   * Capability to store compiled OpenCL kernels on the file system
															
 
																   * Capability to load compiled OpenCL kernels
															
 
																+  * Performance models measurements can now be provided explicitly by
															
 
																+    applications.
															
 
																 Changes:
															
 
																   * The FxT code can now be used on systems other than Linux.
															
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -323,7 +323,10 @@ for further executions, and can be observed by using the
 
																 the @code{starpu_perfmodel_plot} (@pxref{Performance model calibration}).  The
															
 
																 models are indexed by machine name. To
															
 
																 share the models between machines (e.g. for a homogeneous cluster), use
															
 
																-@code{export STARPU_HOSTNAME=some_global_name}. Measurements are only done when using a task scheduler which makes use of it, such as @code{heft} or @code{dmda}.
															
 
																+@code{export STARPU_HOSTNAME=some_global_name}. Measurements are only done
															
 
																+when using a task scheduler which makes use of it, such as @code{heft} or
															
 
																+@code{dmda}. Measurements can also be provided explicitly by the application, by
															
 
																+using the @code{starpu_perfmodel_update_history} function.
															
 
																 The following is a small code example.
															
@@ -369,7 +372,9 @@ trust the regression unless there is at least 10% difference between the minimum
 
																 and maximum observed input size. It can be useful to set the
															
 
																 @code{STARPU_CALIBRATE} environment variable to @code{1} and run the application
															
 
																 on varying input sizes, so as to feed the performance model for a variety of
															
 
																-inputs. The @code{starpu_perfmodel_display} and @code{starpu_perfmodel_plot}
															
 
																+inputs, or to provide the measurements explictly by using
															
 
																+@code{starpu_perfmodel_update_history}. The @code{starpu_perfmodel_display} and
															
 
																+@code{starpu_perfmodel_plot}
															
 
																 tools can be used to observe how much the performance model is calibrated (@pxref{Performance model calibration}); when
															
 
																 their output look good, @code{STARPU_CALIBRATE} can be reset to @code{0} to let
															
 
																 StarPU use the resulting performance model without recording new measures. If
															
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -1962,6 +1962,14 @@ prints a matrix of bus bandwidths on @var{f}.
 
																 prints the affinity devices on @var{f}.
															
 
																 @end deftypefun
															
 
																+@deftypefun void starpu_perfmodel_update_history ({struct starpu_perfmodel *}@var{model}, {struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{cpuid}, unsigned @var{nimpl}, double @var{measured});
															
 
																+This feeds the performance model @var{model} with an explicit measurement
															
 
																+@var{measured}, in addition to measurements done by StarPU itself. This can be
															
 
																+useful when the application already has an existing set of measurements done
															
 
																+in good conditions, that StarPU could benefit from instead of doing on-line
															
 
																+measurements. And example of use can be see in @ref{Performance model example}.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @node Profiling API
															
 
																 @section Profiling API
															
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -210,6 +210,46 @@ disables data transfer / computation overlapping, and should thus not be used
 
																 for eventual benchmarks. Note 2: history-based performance models get calibrated
															
 
																 only if a performance-model-based scheduler is chosen.
															
 
																+The history-based performance models can also be explicitly filled by the
															
 
																+application without execution, if e.g. the application already has a series of
															
 
																+measurements. This can be done by using @code{starpu_perfmodel_update_history},
															
 
																+for instance:
															
 
																+
															
 
																+@example
															
 
																+static struct starpu_perfmodel perf_model = @{
															
 
																+    .type = STARPU_HISTORY_BASED,
															
 
																+    .symbol = "my_perfmodel",
															
 
																+@};
															
 
																+
															
 
																+struct starpu_codelet cl = @{
															
 
																+    .where = STARPU_CUDA,
															
 
																+    .cuda_funcs = @{ cuda_func1, cuda_func2, NULL @},
															
 
																+    .nbuffers = 1,
															
 
																+    .modes = @{STARPU_W@},
															
 
																+    .model = &perf_model
															
 
																+@};
															
 
																+
															
 
																+void feed(void) @{
															
 
																+    struct my_measure *measure;
															
 
																+    struct starpu_task task;
															
 
																+    starpu_task_init(&task);
															
 
																+
															
 
																+    task.cl = &cl;
															
 
																+
															
 
																+    for (measure = &measures[0]; measure < measures[last]; measure++) @{
															
 
																+        starpu_data_handle_t handle;
															
 
																+	starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float));
															
 
																+	task.handles[0] = handle;
															
 
																+	starpu_perfmodel_update_history(&perf_model, &task, STARPU_CUDA_DEFAULT + measure->cudadev, 0, measure->implementation, measure->time);
															
 
																+	starpu_task_deinit(&task);
															
 
																+	starpu_data_unregister(handle);
															
 
																+    @}
															
 
																+@}
															
 
																+@end example
															
 
																+
															
 
																+Measurement has to be provided in milliseconds for the completion time models,
															
 
																+and in Joules for the energy consumption models.
															
 
																+
															
 
																 @node Task distribution vs Data transfer
															
 
																 @section Task distribution vs Data transfer
															
@@ -268,6 +308,17 @@ be obtained from the machine power supplier.
 
																 The power actually consumed by the total execution can be displayed by setting
															
 
																 @code{export STARPU_PROFILING=1 STARPU_WORKER_STATS=1} .
															
 
																+On-line task consumption measurement is currently only supported through the
															
 
																+@code{CL_PROFILING_POWER_CONSUMED} OpenCL extension, implemented in the MoviSim
															
 
																+simulator. Applications can however provide explicit measurements by using the
															
 
																+@code{starpu_perfmodel_update_history} function (examplified in @ref{Performance
															
 
																+model example} with the @code{power_model} performance model. Fine-grain
															
 
																+measurement is often not feasible with the feedback provided by the hardware, so
															
 
																+the user can for instance run a given task a thousand times, measure the global
															
 
																+consumption for that series of tasks, divide it by a thousand, repeat for
															
 
																+varying kinds of tasks and task sizes, and eventually feed StarPU
															
 
																+with these manual measurements through @code{starpu_perfmodel_update_history}.
															
 
																+
															
 
																 @node Profiling
															
 
																 @section Profiling
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010, 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
@@ -209,6 +209,8 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_
 
																 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen, unsigned nimpl);
															
 
																 int starpu_list_models(FILE *output);
															
 
																+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured);
															
 
																+
															
 
																 void starpu_force_bus_sampling(void);
															
 
																 void starpu_bus_print_bandwidth(FILE *f);
															
 
																 void starpu_bus_print_affinity(FILE *f);
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1146,3 +1146,13 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
																 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
															
 
																 	}
															
 
																 }
															
 
																+
															
 
																+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured) {
															
 
																+	struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
															
 
																+
															
 
																+	_starpu_load_perfmodel(model);
															
 
																+	/* Record measurement */
															
 
																+	_starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
															
 
																+	/* and save perfmodel on termination */
															
 
																+	_starpu_set_calibrate_flag(1);
															
 
																+}
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -218,7 +218,8 @@ noinst_PROGRAMS =				\
 
																 	parallel_tasks/parallel_kernels		\
															
 
																 	parallel_tasks/parallel_kernels_spmd	\
															
 
																 	perfmodels/regression_based		\
															
 
																-	perfmodels/non_linear_regression_based  \
															
 
																+	perfmodels/non_linear_regression_based	\
															
 
																+	perfmodels/feed				\
															
 
																 	sched_policies/data_locality            \
															
 
																 	sched_policies/execute_all_tasks        \
															
 
																 	sched_policies/simple_deps              \
															
@@ -550,6 +551,9 @@ perfmodels_non_linear_regression_based_SOURCES+=\
 
																 	perfmodels/opencl_memset.c
															
 
																 endif
															
 
																+perfmodels_feed_SOURCES=\
															
 
																+	perfmodels/feed.c
															
 
																+
															
 
																 sched_policies_execute_all_tasks_LDFLAGS = -lm
															
 
																 showcheck:
															
--- a/tests/perfmodels/feed.c
+++ b/tests/perfmodels/feed.c
@@ -0,0 +1,85 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Université de Bordeaux 1
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*
															
 
																+ * Test the starpu_perfmodel_update_history function
															
 
																+ */
															
 
																+
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+#include <starpu_opencl.h>
															
 
																+#endif
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+static struct starpu_perfmodel model =
															
 
																+{
															
 
																+	.type = STARPU_REGRESSION_BASED,
															
 
																+	.symbol = "feed"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_perfmodel nl_model =
															
 
																+{
															
 
																+	.type = STARPU_NL_REGRESSION_BASED,
															
 
																+	.symbol = "nlfeed"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl =
															
 
																+{
															
 
																+	.model = &model,
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																+};
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	struct starpu_task task;
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	starpu_task_init(&task);
															
 
																+	task.cl = &cl;
															
 
																+
															
 
																+	int size;
															
 
																+	for (size = 1024; size < 16777216; size *= 2)
															
 
																+	{
															
 
																+		float measured_fast, measured_slow;
															
 
																+		starpu_data_handle_t handle;
															
 
																+		starpu_vector_data_register(&handle, -1, 0, size, sizeof(float));
															
 
																+		task.handles[0] = handle;
															
 
																+
															
 
																+		/* Simulate Fast GPU. In real applications this would be
															
 
																+		 * replaced by fetching from actual measurement */
															
 
																+		measured_fast = 0.002+size*0.00000001;
															
 
																+		measured_slow = 0.001+size*0.0000001;
															
 
																+
															
 
																+		/* Simulate Fast GPU */
															
 
																+		starpu_perfmodel_update_history(&model, &task, STARPU_CUDA_DEFAULT, 0, 0, measured_fast);
															
 
																+		starpu_perfmodel_update_history(&nl_model, &task, STARPU_CUDA_DEFAULT, 0, 0, measured_fast);
															
 
																+		/* Simulate Slow GPU */
															
 
																+		starpu_perfmodel_update_history(&model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
															
 
																+		starpu_perfmodel_update_history(&nl_model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
															
 
																+		starpu_task_deinit(&task);
															
 
																+		starpu_data_unregister(handle);
															
 
																+	}
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+}