před 13 roky · 39c06b2141
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,8 @@ New features:
 
				   * OpenGL interoperability support.
			
 
				   * Capability to store compiled OpenCL kernels on the file system
			
 
				   * Capability to load compiled OpenCL kernels
			
 
				+  * Performance models measurements can now be provided explicitly by
			
 
				+    applications.
			
 
				 
			
 
				 Changes:
			
 
				   * The FxT code can now be used on systems other than Linux.
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -323,7 +323,10 @@ for further executions, and can be observed by using the
 
				 the @code{starpu_perfmodel_plot} (@pxref{Performance model calibration}).  The
			
 
				 models are indexed by machine name. To
			
 
				 share the models between machines (e.g. for a homogeneous cluster), use
			
 
				-@code{export STARPU_HOSTNAME=some_global_name}. Measurements are only done when using a task scheduler which makes use of it, such as @code{heft} or @code{dmda}.
			
 
				+@code{export STARPU_HOSTNAME=some_global_name}. Measurements are only done
			
 
				+when using a task scheduler which makes use of it, such as @code{heft} or
			
 
				+@code{dmda}. Measurements can also be provided explicitly by the application, by
			
 
				+using the @code{starpu_perfmodel_update_history} function.
			
 
				 
			
 
				 The following is a small code example.
			
 
				 
			
@@ -369,7 +372,9 @@ trust the regression unless there is at least 10% difference between the minimum
 
				 and maximum observed input size. It can be useful to set the
			
 
				 @code{STARPU_CALIBRATE} environment variable to @code{1} and run the application
			
 
				 on varying input sizes, so as to feed the performance model for a variety of
			
 
				-inputs. The @code{starpu_perfmodel_display} and @code{starpu_perfmodel_plot}
			
 
				+inputs, or to provide the measurements explictly by using
			
 
				+@code{starpu_perfmodel_update_history}. The @code{starpu_perfmodel_display} and
			
 
				+@code{starpu_perfmodel_plot}
			
 
				 tools can be used to observe how much the performance model is calibrated (@pxref{Performance model calibration}); when
			
 
				 their output look good, @code{STARPU_CALIBRATE} can be reset to @code{0} to let
			
 
				 StarPU use the resulting performance model without recording new measures. If
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -1962,6 +1962,14 @@ prints a matrix of bus bandwidths on @var{f}.
 
				 prints the affinity devices on @var{f}.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun void starpu_perfmodel_update_history ({struct starpu_perfmodel *}@var{model}, {struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{cpuid}, unsigned @var{nimpl}, double @var{measured});
			
 
				+This feeds the performance model @var{model} with an explicit measurement
			
 
				+@var{measured}, in addition to measurements done by StarPU itself. This can be
			
 
				+useful when the application already has an existing set of measurements done
			
 
				+in good conditions, that StarPU could benefit from instead of doing on-line
			
 
				+measurements. And example of use can be see in @ref{Performance model example}.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node Profiling API
			
 
				 @section Profiling API
			
 
				 
			
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -210,6 +210,46 @@ disables data transfer / computation overlapping, and should thus not be used
 
				 for eventual benchmarks. Note 2: history-based performance models get calibrated
			
 
				 only if a performance-model-based scheduler is chosen.
			
 
				 
			
 
				+The history-based performance models can also be explicitly filled by the
			
 
				+application without execution, if e.g. the application already has a series of
			
 
				+measurements. This can be done by using @code{starpu_perfmodel_update_history},
			
 
				+for instance:
			
 
				+
			
 
				+@example
			
 
				+static struct starpu_perfmodel perf_model = @{
			
 
				+    .type = STARPU_HISTORY_BASED,
			
 
				+    .symbol = "my_perfmodel",
			
 
				+@};
			
 
				+
			
 
				+struct starpu_codelet cl = @{
			
 
				+    .where = STARPU_CUDA,
			
 
				+    .cuda_funcs = @{ cuda_func1, cuda_func2, NULL @},
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = @{STARPU_W@},
			
 
				+    .model = &perf_model
			
 
				+@};
			
 
				+
			
 
				+void feed(void) @{
			
 
				+    struct my_measure *measure;
			
 
				+    struct starpu_task task;
			
 
				+    starpu_task_init(&task);
			
 
				+
			
 
				+    task.cl = &cl;
			
 
				+
			
 
				+    for (measure = &measures[0]; measure < measures[last]; measure++) @{
			
 
				+        starpu_data_handle_t handle;
			
 
				+	starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float));
			
 
				+	task.handles[0] = handle;
			
 
				+	starpu_perfmodel_update_history(&perf_model, &task, STARPU_CUDA_DEFAULT + measure->cudadev, 0, measure->implementation, measure->time);
			
 
				+	starpu_task_deinit(&task);
			
 
				+	starpu_data_unregister(handle);
			
 
				+    @}
			
 
				+@}
			
 
				+@end example
			
 
				+
			
 
				+Measurement has to be provided in milliseconds for the completion time models,
			
 
				+and in Joules for the energy consumption models.
			
 
				+
			
 
				 @node Task distribution vs Data transfer
			
 
				 @section Task distribution vs Data transfer
			
 
				 
			
@@ -268,6 +308,17 @@ be obtained from the machine power supplier.
 
				 The power actually consumed by the total execution can be displayed by setting
			
 
				 @code{export STARPU_PROFILING=1 STARPU_WORKER_STATS=1} .
			
 
				 
			
 
				+On-line task consumption measurement is currently only supported through the
			
 
				+@code{CL_PROFILING_POWER_CONSUMED} OpenCL extension, implemented in the MoviSim
			
 
				+simulator. Applications can however provide explicit measurements by using the
			
 
				+@code{starpu_perfmodel_update_history} function (examplified in @ref{Performance
			
 
				+model example} with the @code{power_model} performance model. Fine-grain
			
 
				+measurement is often not feasible with the feedback provided by the hardware, so
			
 
				+the user can for instance run a given task a thousand times, measure the global
			
 
				+consumption for that series of tasks, divide it by a thousand, repeat for
			
 
				+varying kinds of tasks and task sizes, and eventually feed StarPU
			
 
				+with these manual measurements through @code{starpu_perfmodel_update_history}.
			
 
				+
			
 
				 @node Profiling
			
 
				 @section Profiling
			
 
				 
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2011  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -209,6 +209,8 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_
 
				 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen, unsigned nimpl);
			
 
				 int starpu_list_models(FILE *output);
			
 
				 
			
 
				+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured);
			
 
				+
			
 
				 void starpu_force_bus_sampling(void);
			
 
				 void starpu_bus_print_bandwidth(FILE *f);
			
 
				 void starpu_bus_print_affinity(FILE *f);
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1146,3 +1146,13 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured) {
			
 
				+	struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
			
 
				+
			
 
				+	_starpu_load_perfmodel(model);
			
 
				+	/* Record measurement */
			
 
				+	_starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
			
 
				+	/* and save perfmodel on termination */
			
 
				+	_starpu_set_calibrate_flag(1);
			
 
				+}
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -218,7 +218,8 @@ noinst_PROGRAMS =				\
 
				 	parallel_tasks/parallel_kernels		\
			
 
				 	parallel_tasks/parallel_kernels_spmd	\
			
 
				 	perfmodels/regression_based		\
			
 
				-	perfmodels/non_linear_regression_based  \
			
 
				+	perfmodels/non_linear_regression_based	\
			
 
				+	perfmodels/feed				\
			
 
				 	sched_policies/data_locality            \
			
 
				 	sched_policies/execute_all_tasks        \
			
 
				 	sched_policies/simple_deps              \
			
@@ -550,6 +551,9 @@ perfmodels_non_linear_regression_based_SOURCES+=\
 
				 	perfmodels/opencl_memset.c
			
 
				 endif
			
 
				 
			
 
				+perfmodels_feed_SOURCES=\
			
 
				+	perfmodels/feed.c
			
 
				+
			
 
				 sched_policies_execute_all_tasks_LDFLAGS = -lm
			
 
				 
			
 
				 showcheck:
			
--- a/tests/perfmodels/feed.c
+++ b/tests/perfmodels/feed.c
@@ -0,0 +1,85 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Université de Bordeaux 1
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Test the starpu_perfmodel_update_history function
			
 
				+ */
			
 
				+
			
 
				+#include <config.h>
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_REGRESSION_BASED,
			
 
				+	.symbol = "feed"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+	.type = STARPU_NL_REGRESSION_BASED,
			
 
				+	.symbol = "nlfeed"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				+	.model = &model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	struct starpu_task task;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	starpu_task_init(&task);
			
 
				+	task.cl = &cl;
			
 
				+
			
 
				+	int size;
			
 
				+	for (size = 1024; size < 16777216; size *= 2)
			
 
				+	{
			
 
				+		float measured_fast, measured_slow;
			
 
				+		starpu_data_handle_t handle;
			
 
				+		starpu_vector_data_register(&handle, -1, 0, size, sizeof(float));
			
 
				+		task.handles[0] = handle;
			
 
				+
			
 
				+		/* Simulate Fast GPU. In real applications this would be
			
 
				+		 * replaced by fetching from actual measurement */
			
 
				+		measured_fast = 0.002+size*0.00000001;
			
 
				+		measured_slow = 0.001+size*0.0000001;
			
 
				+
			
 
				+		/* Simulate Fast GPU */
			
 
				+		starpu_perfmodel_update_history(&model, &task, STARPU_CUDA_DEFAULT, 0, 0, measured_fast);
			
 
				+		starpu_perfmodel_update_history(&nl_model, &task, STARPU_CUDA_DEFAULT, 0, 0, measured_fast);
			
 
				+		/* Simulate Slow GPU */
			
 
				+		starpu_perfmodel_update_history(&model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
			
 
				+		starpu_perfmodel_update_history(&nl_model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
			
 
				+		starpu_task_deinit(&task);
			
 
				+		starpu_data_unregister(handle);
			
 
				+	}
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}