5 年前 · f630a1399e
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -356,6 +356,10 @@ myPROGRAMS +=				\
 
				 	parallel_tasks/spmd_peager		\
			
 
				 	parallel_tasks/cuda_only		\
			
 
				 	perfmodels/regression_based		\
			
 
				+	perfmodels/regression_based_01		\
			
 
				+	perfmodels/regression_based_02		\
			
 
				+	perfmodels/regression_based_03		\
			
 
				+	perfmodels/regression_based_04		\
			
 
				 	perfmodels/non_linear_regression_based	\
			
 
				 	perfmodels/feed				\
			
 
				 	perfmodels/user_base			\
			
@@ -987,9 +991,25 @@ endif
 
				 perfmodels_regression_based_SOURCES=\
			
 
				 	perfmodels/regression_based.c
			
 
				 
			
 
				+perfmodels_regression_based_01_SOURCES=\
			
 
				+	perfmodels/regression_based_01.c
			
 
				+
			
 
				+perfmodels_regression_based_02_SOURCES=\
			
 
				+	perfmodels/regression_based_02.c
			
 
				+
			
 
				+perfmodels_regression_based_03_SOURCES=\
			
 
				+	perfmodels/regression_based_03.c
			
 
				+
			
 
				+perfmodels_regression_based_04_SOURCES=\
			
 
				+	perfmodels/regression_based_04.c
			
 
				+
			
 
				 if STARPU_USE_OPENCL
			
 
				 perfmodels_regression_based_SOURCES+=\
			
 
				 	perfmodels/opencl_memset.c
			
 
				+
			
 
				+perfmodels_regression_based_04_SOURCES+=\
			
 
				+	perfmodels/opencl_memset.c
			
 
				+
			
 
				 nobase_STARPU_OPENCL_DATA_DATA += \
			
 
				 	perfmodels/opencl_memset_kernel.cl
			
 
				 endif
			
--- a/tests/perfmodels/opencl_memset.c
+++ b/tests/perfmodels/opencl_memset.c
@@ -22,7 +22,7 @@
 
				 
			
 
				 extern struct starpu_opencl_program opencl_program;
			
 
				 
			
 
				-void memset_opencl(void *buffers[], void *args)
			
 
				+void _memset_opencl(void *buffers[], void *args, const char *name)
			
 
				 {
			
 
				 	(void) args;
			
 
				 	int id, devid;
			
@@ -36,7 +36,7 @@ void memset_opencl(void *buffers[], void *args)
 
				 	id = starpu_worker_get_id_check();
			
 
				 	devid = starpu_worker_get_devid(id);
			
 
				 
			
 
				-	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_memset_opencl", devid);
			
 
				+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, name, devid);
			
 
				 	if (err != CL_SUCCESS)
			
 
				 		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 
			
@@ -67,3 +67,13 @@ void memset_opencl(void *buffers[], void *args)
 
				 	}
			
 
				 	starpu_opencl_release_kernel(kernel);
			
 
				 }
			
 
				+
			
 
				+void memset_opencl(void *buffers[], void *args, const char *kernel)
			
 
				+{
			
 
				+	_memset_opencl(buffers, args, "_memset_opencl");
			
 
				+}
			
 
				+
			
 
				+void memset0_opencl(void *buffers[], void *args, const char *kernel)
			
 
				+{
			
 
				+	_memset_opencl(buffers, args, "_memset0_opencl");
			
 
				+}
			
--- a/tests/perfmodels/opencl_memset_kernel.cl
+++ b/tests/perfmodels/opencl_memset_kernel.cl
@@ -14,6 +14,13 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+ __kernel void _memset0_opencl(__global int *val, int nx)
			
 
				+{
			
 
				+        const int i = get_global_id(0);
			
 
				+        if (i < nx)
			
 
				+                val[0] += i;
			
 
				+}
			
 
				+
			
 
				 __kernel void _memset_opencl(__global int *val, int nx)
			
 
				 {
			
 
				         const int i = get_global_id(0);
			
--- a/tests/perfmodels/regression_based.c
+++ b/tests/perfmodels/regression_based.c
@@ -23,6 +23,7 @@
 
				  * Benchmark memset with a linear regression
			
 
				  */
			
 
				 
			
 
				+#define STARTlin 1024
			
 
				 #define START 1024
			
 
				 #ifdef STARPU_QUICK_CHECK
			
 
				 #define END 1048576
			
@@ -184,11 +185,14 @@ int main(int argc, char **argv)
 
				 #endif
			
 
				 
			
 
				 	int size;
			
 
				-	for (size = START; size < END; size *= 2)
			
 
				+	for (size = STARTlin; size < END; size *= 2)
			
 
				 	{
			
 
				 		/* Use a linear regression */
			
 
				 		test_memset(size, &memset_cl);
			
 
				+	}
			
 
				 
			
 
				+	for (size = START; size < END; size *= 2)
			
 
				+	{
			
 
				 		/* Use a non-linear regression */
			
 
				 		test_memset(size, &nl_memset_cl);
			
 
				 	}
			
--- a/tests/perfmodels/regression_based_01.c
+++ b/tests/perfmodels/regression_based_01.c
@@ -0,0 +1,271 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <assert.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <unistd.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * - Calibrate the linear model only for large sizes: STARTline 1048576
			
 
				+ * - Separate the test_memset loop in two loops:
			
 
				+ *   - linear: start from 1048576
			
 
				+ *   - non-linear: keep start at 1024
			
 
				+ */
			
 
				+
			
 
				+#define STARTlin 131072
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	unsigned i;
			
 
				+
			
 
				+	usleep(1000);
			
 
				+
			
 
				+	for (i=0; i<n ; i++)
			
 
				+	{
			
 
				+
			
 
				+		ptr[0] += i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_REGRESSION_BASED,
			
 
				+	.symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+	.type = STARPU_NL_REGRESSION_BASED,
			
 
				+	.symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset_cpu"},
			
 
				+	.model = &model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset_cpu"},
			
 
				+	.model = &nl_model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+	int nloops = 100;
			
 
				+	int loop;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+	for (loop = 0; loop < nloops; loop++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(STARPU_TEST_SKIPPED);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	unsigned niter = 100;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+	assert(tasks);
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+		/* We will destroy the task structure by hand so that we can
			
 
				+		 * query the profiling info before the task is destroyed. */
			
 
				+		task->destroy = 0;
			
 
				+
			
 
				+		tasks[i] = task;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+
			
 
				+		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+		{
			
 
				+			FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+			exit(0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	double length_sum = 0.0;
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = tasks[i];
			
 
				+		struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+		/* How long was the task execution ? */
			
 
				+		length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+		/* We don't need the task structure anymore */
			
 
				+		starpu_task_destroy(task);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/* Display the occupancy of all workers during the test */
			
 
				+	unsigned worker;
			
 
				+	for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+	{
			
 
				+		struct starpu_profiling_worker_info worker_info;
			
 
				+		int ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+		STARPU_ASSERT(!ret);
			
 
				+
			
 
				+		char workername[128];
			
 
				+		starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+		unsigned nimpl;
			
 
				+
			
 
				+
			
 
				+		if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER)
			
 
				+		{
			
 
				+			FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+			{
			
 
				+
			
 
				+				FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter));
			
 
				+
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Enable profiling */
			
 
				+	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_data_handle_t handle;
			
 
				+	int ret;
			
 
				+
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "eager";
			
 
				+	conf.calibrate = 2;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	int size;
			
 
				+	for (size = STARTlin; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a linear regression */
			
 
				+		test_memset(size, &memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	for (size = START; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a non-linear regression */
			
 
				+		test_memset(size, &nl_memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	/* Test Phase */
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "eager";
			
 
				+	conf.calibrate = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	/* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+	size = 1234567;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl = &memset_cl;
			
 
				+	task->handles[0] = handle;
			
 
				+	task->destroy = 0;
			
 
				+
			
 
				+	FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+	compare_performance(size, &memset_cl, task);
			
 
				+
			
 
				+	task->cl = &nl_memset_cl;
			
 
				+
			
 
				+	FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+	compare_performance(size, &nl_memset_cl, task);
			
 
				+
			
 
				+	starpu_task_destroy(task);
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
--- a/tests/perfmodels/regression_based_02.c
+++ b/tests/perfmodels/regression_based_02.c
@@ -0,0 +1,305 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * A multi-implementation benchmark with dmda scheduler
			
 
				+ * we aim to test the dmda behavior when we have two implementations
			
 
				+ * dmda choose the implementation which minimises the execution time
			
 
				+ */
			
 
				+
			
 
				+#define STARTlin 1048576
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+// first implementation with an initial delay (100 us)
			
 
				+void memset0_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	unsigned i;
			
 
				+
			
 
				+	usleep(100);
			
 
				+
			
 
				+	for (i=0; i<n ; i++)
			
 
				+	{
			
 
				+		ptr[0] += i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// second implementation without initial delay but 2.5 more loops
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i=0; i<6.5*n ; i++)
			
 
				+	{
			
 
				+		ptr[0] += i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//fonction pour mesurer l'energie
			
 
				+double energy_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				+
			
 
				+{
			
 
				+	double energy;
			
 
				+	int factor;
			
 
				+	if (nimpl == 0)
			
 
				+		factor = 10;
			
 
				+	else
			
 
				+		factor = 1;
			
 
				+
			
 
				+	energy=starpu_task_expected_length(task, arch, nimpl)*factor;
			
 
				+
			
 
				+	return energy;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_REGRESSION_BASED,
			
 
				+	.symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+	.type = STARPU_NL_REGRESSION_BASED,
			
 
				+	.symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_energy_model=
			
 
				+{
			
 
				+	.type = STARPU_PER_ARCH,
			
 
				+	.symbol = "non_linear_energy_model",
			
 
				+	.arch_cost_function=energy_function,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &nl_model,
			
 
				+	.energy_model = &nl_energy_model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+	int nloops = 100;
			
 
				+	int loop;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+	for (loop = 0; loop < nloops; loop++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(STARPU_TEST_SKIPPED);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	unsigned niter = 100;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+	assert(tasks);
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+		/* We will destroy the task structure by hand so that we can
			
 
				+		 * query the profiling info before the task is destroyed. */
			
 
				+		task->destroy = 0;
			
 
				+
			
 
				+		tasks[i] = task;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+
			
 
				+		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+		{
			
 
				+			FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+			exit(0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	double length_sum = 0.0;
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = tasks[i];
			
 
				+		struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+		/* How long was the task execution ? */
			
 
				+		length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+		/* We don't need the task structure anymore */
			
 
				+		starpu_task_destroy(task);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/* Display the occupancy of all workers during the test */
			
 
				+	unsigned worker;
			
 
				+	for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+	{
			
 
				+		struct starpu_profiling_worker_info worker_info;
			
 
				+		int ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+		STARPU_ASSERT(!ret);
			
 
				+
			
 
				+		char workername[128];
			
 
				+		starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+		unsigned nimpl;
			
 
				+
			
 
				+		if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER)
			
 
				+		{
			
 
				+			FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+			{
			
 
				+
			
 
				+				FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter),
			
 
				+						starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl));
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Enable profiling */
			
 
				+	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_data_handle_t handle;
			
 
				+	int ret;
			
 
				+
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 2;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	int size;
			
 
				+#if 0
			
 
				+	for (size = STARTlin; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a linear regression */
			
 
				+		test_memset(size, &memset_cl);
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	for (size = START; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a non-linear regression */
			
 
				+		test_memset(size, &nl_memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	/* Test Phase */
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	/* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+	size = 1234567;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl = &memset_cl;
			
 
				+	task->handles[0] = handle;
			
 
				+	task->destroy = 0;
			
 
				+
			
 
				+	task->cl = &nl_memset_cl;
			
 
				+
			
 
				+	FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+	compare_performance(size, &nl_memset_cl, task);
			
 
				+
			
 
				+	starpu_task_destroy(task);
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
--- a/tests/perfmodels/regression_based_03.c
+++ b/tests/perfmodels/regression_based_03.c
@@ -0,0 +1,304 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * A multi-implementation benchmark with dmda scheduler
			
 
				+ * we aim to test the energy model with the different size of gamma
			
 
				+ * for large size of gamma, dmda choose the second implementation which consumes less energy
			
 
				+ * otherwise, it choose the first implementtaion which minimizes the execution time
			
 
				+ */
			
 
				+
			
 
				+#define STARTlin 1048576
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+// first implementation with an initial delay (100 us)
			
 
				+void memset0_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	unsigned i;
			
 
				+
			
 
				+	usleep(100);
			
 
				+
			
 
				+	for (i=0; i<n ; i++)
			
 
				+	{
			
 
				+		ptr[0] += i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// second implementation without initial delay but 2.5 more loops
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i=0; i<6.5*n ; i++)
			
 
				+	{
			
 
				+		ptr[0] += i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//fonction pour mesurer l'energie
			
 
				+double energy_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				+
			
 
				+{
			
 
				+	double energy;
			
 
				+	int factor;
			
 
				+	if (nimpl == 0)
			
 
				+		factor = 10;
			
 
				+	else
			
 
				+		factor = 1;
			
 
				+
			
 
				+	energy=starpu_task_expected_length(task, arch, nimpl)*factor;
			
 
				+
			
 
				+	return energy;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_REGRESSION_BASED,
			
 
				+	.symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+	.type = STARPU_NL_REGRESSION_BASED,
			
 
				+	.symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_energy_model=
			
 
				+{
			
 
				+	.type = STARPU_PER_ARCH,
			
 
				+	.symbol = "non_linear_energy_model",
			
 
				+	.arch_cost_function=energy_function,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &nl_model,
			
 
				+	.energy_model = &nl_energy_model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+	int nloops = 100;
			
 
				+	int loop;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+	for (loop = 0; loop < nloops; loop++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(STARPU_TEST_SKIPPED);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	unsigned niter = 100;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+	assert(tasks);
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+		/* We will destroy the task structure by hand so that we can
			
 
				+		 * query the profiling info before the task is destroyed. */
			
 
				+		task->destroy = 0;
			
 
				+
			
 
				+		tasks[i] = task;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+
			
 
				+		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+		{
			
 
				+			FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+			exit(0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	double length_sum = 0.0;
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = tasks[i];
			
 
				+		struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+		/* How long was the task execution ? */
			
 
				+		length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+		/* We don't need the task structure anymore */
			
 
				+		starpu_task_destroy(task);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/* Display the occupancy of all workers during the test */
			
 
				+	unsigned worker;
			
 
				+	for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+	{
			
 
				+		struct starpu_profiling_worker_info worker_info;
			
 
				+		int ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+		STARPU_ASSERT(!ret);
			
 
				+
			
 
				+		char workername[128];
			
 
				+		starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+		unsigned nimpl;
			
 
				+
			
 
				+		if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER)
			
 
				+		{
			
 
				+			FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+			{
			
 
				+
			
 
				+				FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter),
			
 
				+						starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl));
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Enable profiling */
			
 
				+	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_data_handle_t handle;
			
 
				+	int ret;
			
 
				+
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 2;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	int size;
			
 
				+	for (size = STARTlin; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a linear regression */
			
 
				+		test_memset(size, &memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	for (size = START; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a non-linear regression */
			
 
				+		test_memset(size, &nl_memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	/* Test Phase */
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	/* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+	size = 1234567;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl = &memset_cl;
			
 
				+	task->handles[0] = handle;
			
 
				+	task->destroy = 0;
			
 
				+
			
 
				+	task->cl = &nl_memset_cl;
			
 
				+
			
 
				+	FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+	compare_performance(size, &nl_memset_cl, task);
			
 
				+
			
 
				+	starpu_task_destroy(task);
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
--- a/tests/perfmodels/regression_based_04.c
+++ b/tests/perfmodels/regression_based_04.c
@@ -0,0 +1,387 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * A multi-implementation benchmark with dmda scheduler
			
 
				+ * we aim to test OPENCL workers and calculate the estimated time for each type of worker (CPU or OPENCL or CUDA)
			
 
				+ * dmda choose OPENCL workers for lage size (variable size of compare_performance) size=1234567
			
 
				+ * dmda choose CPU workers for small size (size=1234)
			
 
				+ */
			
 
				+
			
 
				+#define STARTlin (512*1024)
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void memset_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+	cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void memset0_opencl(void *buffers[], void *args);
			
 
				+extern void memset_opencl(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+void memset0_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+	unsigned i;
			
 
				+
			
 
				+	//starpu_usleep(100);
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+
			
 
				+		ptr[0] += i;
			
 
				+}
			
 
				+
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+	//starpu_usleep(10);
			
 
				+	memset(ptr, 42, n * sizeof(*ptr));
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_REGRESSION_BASED,
			
 
				+	.symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+	.type = STARPU_NL_REGRESSION_BASED,
			
 
				+	.symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {memset_cuda},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {memset0_opencl, memset_opencl},
			
 
				+	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {memset_cuda},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {memset0_opencl, memset_opencl},
			
 
				+	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+	.cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+	.cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+	.model = &nl_model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+	int nloops = 100;
			
 
				+	int loop;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+	for (loop = 0; loop < nloops; loop++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(STARPU_TEST_SKIPPED);
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	unsigned niter = 100;
			
 
				+	starpu_data_handle_t handle;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+	assert(tasks);
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+		task->cl = codelet;
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+		/* We will destroy the task structure by hand so that we can
			
 
				+		 * query the profiling info before the task is destroyed. */
			
 
				+		task->destroy = 0;
			
 
				+
			
 
				+		tasks[i] = task;
			
 
				+
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+
			
 
				+		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+		{
			
 
				+			FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+			exit(0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	double length_cpu_sum = 0.0;
			
 
				+	double length_gpu_sum = 0.0;
			
 
				+
			
 
				+	enum starpu_worker_archtype archi;
			
 
				+
			
 
				+	for (i = 0; i < niter; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = tasks[i];
			
 
				+		struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+		//archi=starpu_worker_get_type(0);
			
 
				+		archi=starpu_worker_get_type(info->workerid);
			
 
				+
			
 
				+		switch (archi)
			
 
				+		{
			
 
				+		case STARPU_CPU_WORKER:
			
 
				+			FPRINTF(stdout, "cpuuu\n");
			
 
				+			/* How long was the task execution ? */
			
 
				+			length_cpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+			break;
			
 
				+
			
 
				+		case STARPU_OPENCL_WORKER:
			
 
				+
			
 
				+			FPRINTF(stdout, "openclllllll\n");
			
 
				+			/* How long was the task execution ? */
			
 
				+			length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+			break;
			
 
				+
			
 
				+		case STARPU_CUDA_WORKER:
			
 
				+
			
 
				+			FPRINTF(stdout, "cudaaaaaa\n");
			
 
				+			/* How long was the task execution ? */
			
 
				+			length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+			break;
			
 
				+
			
 
				+
			
 
				+	default:
			
 
				+			FPRINTF(stdout, "unsupported!\n");
			
 
				+		break;
			
 
				+		}
			
 
				+
			
 
				+		/* We don't need the task structure anymore */
			
 
				+		starpu_task_destroy(task);
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	unsigned worker;
			
 
				+
			
 
				+	/* Display the occupancy of all workers during the test */
			
 
				+	unsigned ncpus =  starpu_cpu_worker_get_count();
			
 
				+	unsigned ngpus =  starpu_opencl_worker_get_count()+starpu_cuda_worker_get_count();
			
 
				+	//unsigned ncpu= starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
			
 
				+
			
 
				+	FPRINTF(stderr, "ncpus %u \n", ncpus);
			
 
				+	FPRINTF(stderr, "ngpus %u \n", ngpus);
			
 
				+	for (worker= 0; worker< starpu_worker_get_count(); worker++)
			
 
				+	{
			
 
				+
			
 
				+		struct starpu_profiling_worker_info worker_info;
			
 
				+		int ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+		STARPU_ASSERT(!ret);
			
 
				+
			
 
				+		char workername[128];
			
 
				+		starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+		unsigned nimpl;
			
 
				+
			
 
				+		FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				+			switch (starpu_worker_get_type(worker))
			
 
				+
			
 
				+			{
			
 
				+			case STARPU_CPU_WORKER:
			
 
				+
			
 
				+				FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f \n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_cpu_sum)/niter));
			
 
				+
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_OPENCL_WORKER:
			
 
				+
			
 
				+				FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter));
			
 
				+
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_CUDA_WORKER:
			
 
				+
			
 
				+				FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n",
			
 
				+						size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter));
			
 
				+
			
 
				+				break;
			
 
				+
			
 
				+			default:
			
 
				+				FPRINTF(stdout, "unsupported!\n");
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Enable profiling */
			
 
				+	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_data_handle_t handle;
			
 
				+	int ret;
			
 
				+
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 2;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+			&opencl_program, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				+
			
 
				+	int size;
			
 
				+	for (size = STARTlin; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a linear regression */
			
 
				+		test_memset(size, &memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	for (size = START*1.5; size < END; size *= 2)
			
 
				+	{
			
 
				+		/* Use a non-linear regression */
			
 
				+		test_memset(size, &nl_memset_cl);
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	/* Test Phase */
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "dmda";
			
 
				+	conf.calibrate = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+			&opencl_program, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+
			
 
				+	/* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+	size = 1234567;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl = &memset_cl;
			
 
				+	task->handles[0] = handle;
			
 
				+	task->destroy = 0;
			
 
				+
			
 
				+	//FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+	//compare_performance(size, &memset_cl, task);
			
 
				+
			
 
				+	task->cl = &nl_memset_cl;
			
 
				+
			
 
				+	FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+	compare_performance(size, &nl_memset_cl, task);
			
 
				+
			
 
				+	starpu_task_destroy(task);
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}