6 anos atrás · ed7279f57e
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -766,7 +766,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 
				 	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				 	unsigned nentries = 0;
			
 
				 
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
@@ -784,7 +784,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 
				 	check_reg_model(model, comb, impl);
			
 
				 
			
 
				 	/* Dump the history into the model file in case it is necessary */
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		ptr = per_arch_model->list;
			
 
				 		while (ptr)
			
@@ -803,7 +803,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 
				 	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				 	unsigned nentries = 0;
			
 
				 
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
@@ -819,12 +819,12 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 
				 	starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
			
 
				 	fprintf(f, "#####\n");
			
 
				 	fprintf(f, "# Model for %s\n", archname);
			
 
				-	fprintf(f, "# number of entries\n%u\n", nentries);
			
 
				+	fprintf(f, "# number of entriess\n%u\n", nentries);
			
 
				 
			
 
				 	dump_reg_model(f, model, comb, impl);
			
 
				 
			
 
				 	/* Dump the history into the model file in case it is necessary */
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\tsum\t\tsum2\t\tn\n");
			
 
				 		ptr = per_arch_model->list;
			
@@ -1865,7 +1865,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			model->state->per_arch_is_set[comb][impl] = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 		{
			
 
				 			struct starpu_perfmodel_history_entry *entry;
			
 
				 			struct starpu_perfmodel_history_table *elt;
			
--- a/src/core/perfmodel/starpu_memset_regression_based.eps
+++ b/src/core/perfmodel/starpu_memset_regression_based.eps
--- a/src/core/perfmodel/starpu_memset_regression_based.gp
+++ b/src/core/perfmodel/starpu_memset_regression_based.gp
@@ -0,0 +1,15 @@
 
				+#!/usr/bin/gnuplot -persist
			
 
				+
			
 
				+set term postscript eps enhanced color
			
 
				+set output "starpu_memset_regression_based.eps"
			
 
				+set title "Model for codelet memset-regression-based"
			
 
				+set xlabel "Total data size"
			
 
				+set ylabel "Time (ms)"
			
 
				+
			
 
				+set key top left
			
 
				+set logscale x
			
 
				+set logscale y
			
 
				+
			
 
				+set xrange [1:10**9]
			
 
				+
			
 
				+plot	0.001 * 0.000290 * x ** 0.855140 title "Linear Regression cpu0_impl0 (Comb0)"
			
--- a/src/core/perfmodel/starpu_memset_regression_based_avg.data
+++ b/src/core/perfmodel/starpu_memset_regression_based_avg.data
--- a/src/core/perfmodel/starpu_non_linear_memset_regression_based.gp
+++ b/src/core/perfmodel/starpu_non_linear_memset_regression_based.gp
@@ -0,0 +1,16 @@
 
				+#!/usr/bin/gnuplot -persist
			
 
				+
			
 
				+set term postscript eps enhanced color
			
 
				+set output "starpu_non_linear_memset_regression_based.eps"
			
 
				+set title "Model for codelet non-linear-memset-regression-based"
			
 
				+set xlabel "Total data size"
			
 
				+set ylabel "Time (ms)"
			
 
				+
			
 
				+set key top left
			
 
				+set logscale x
			
 
				+set logscale y
			
 
				+
			
 
				+set xrange [1:10**9]
			
 
				+
			
 
				+plot	0.001 * 0.000072 * x ** 0.950363 + 0.001 * 0.685542 title "Non-Linear Regression cpu0_impl0 (Comb0)",\
			
 
				+	"starpu_non_linear_memset_regression_based_avg.data" using 1:2:3 with errorlines title "Average cpu0-impl0 (Comb0)"
			
--- a/src/core/perfmodel/starpu_non_linear_memset_regression_based_avg.data
+++ b/src/core/perfmodel/starpu_non_linear_memset_regression_based_avg.data
@@ -0,0 +1,14 @@
 
				+4096            	8.505405e-04   	1.452811e-04   
			
 
				+8192            	1.192532e-03   	2.238007e-04   
			
 
				+16384           	1.571296e-03   	3.645427e-04   
			
 
				+32768           	2.243088e-03   	3.376360e-04   
			
 
				+65536           	3.406326e-03   	3.076534e-04   
			
 
				+131072          	6.133410e-03   	4.352014e-04   
			
 
				+262144          	1.062855e-02   	5.846631e-04   
			
 
				+524288          	1.049214e-02   	5.642853e-04   
			
 
				+1048576         	3.789903e-02   	1.984554e-03   
			
 
				+2097152         	7.187580e-02   	2.000736e-03   
			
 
				+4194304         	1.433617e-01   	4.154247e-03   
			
 
				+8388608         	2.835415e-01   	1.540519e-02   
			
 
				+16777216        	6.276299e-01   	3.369057e-02   
			
 
				+33554432        	1.221168e+00   	4.589201e-02   
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -352,6 +352,10 @@ myPROGRAMS +=				\
 
				 	parallel_tasks/spmd_peager		\
			
 
				 	parallel_tasks/cuda_only		\
			
 
				 	perfmodels/regression_based		\
			
 
				+	perfmodels/regression_based_01		\
			
 
				+	perfmodels/regression_based_02		\
			
 
				+	perfmodels/regression_based_03		\
			
 
				+        perfmodels/regression_based_04		\
			
 
				 	perfmodels/non_linear_regression_based	\
			
 
				 	perfmodels/feed				\
			
 
				 	perfmodels/user_base			\
			
@@ -957,6 +961,18 @@ endif
 
				 perfmodels_regression_based_SOURCES=\
			
 
				 	perfmodels/regression_based.c
			
 
				 
			
 
				+perfmodels_regression_based_01_SOURCES=\
			
 
				+	perfmodels/regression_based_01.c
			
 
				+
			
 
				+perfmodels_regression_based_02_SOURCES=\
			
 
				+	perfmodels/regression_based_02.c
			
 
				+
			
 
				+perfmodels_regression_based_03_SOURCES=\
			
 
				+	perfmodels/regression_based_03.c
			
 
				+
			
 
				+perfmodels_regression_based_03_SOURCES=\
			
 
				+	perfmodels/regression_based_04.c
			
 
				+
			
 
				 if STARPU_USE_OPENCL
			
 
				 perfmodels_regression_based_SOURCES+=\
			
 
				 	perfmodels/opencl_memset.c
			
--- a/tests/perfmodels/regression_based.c
+++ b/tests/perfmodels/regression_based.c
@@ -32,6 +32,7 @@
 
				 #define END 16777216
			
 
				 #endif
			
 
				 
			
 
				+unsigned int usecs=100;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 static void memset_cuda(void *descr[], void *arg)
			
 
				 {
			
@@ -57,7 +58,12 @@ void memset_cpu(void *descr[], void *arg)
 
				 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	memset(ptr, 42, n * sizeof(*ptr));
			
 
				+        //boucle for
			
 
				+        for (int i=0; i<n*100; i++)
			
 
				+        {
			
 
				+
			
 
				+          }
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static struct starpu_perfmodel model =
			
@@ -184,9 +190,22 @@ int main(int argc, char **argv)
 
				 	ret = starpu_task_wait_for_all();
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				 
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	/* Test Phase */
			
 
				+	starpu_conf_init(&conf);
			
 
				+
			
 
				+	conf.sched_policy_name = "eager";
			
 
				+	conf.calibrate = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				 	/* Now create a dummy task just to estimate its duration according to the regression */
			
 
				 
			
 
				-	size = 12345;
			
 
				+	size = 1000;
			
 
				 
			
 
				 	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				 
			
@@ -195,9 +214,11 @@ int main(int argc, char **argv)
 
				 	task->handles[0] = handle;
			
 
				 	task->destroy = 0;
			
 
				 
			
 
				+        FPRINTF(stdout, "linear regression results\n");
			
 
				 	show_task_perfs(size, task);
			
 
				 
			
 
				 	task->cl = &nl_memset_cl;
			
 
				+	FPRINTF(stdout, "non linear regression results\n");
			
 
				 
			
 
				 	show_task_perfs(size, task);
			
 
				 
			
--- a/tests/perfmodels/regression_based_01.c
+++ b/tests/perfmodels/regression_based_01.c
@@ -0,0 +1,316 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012,2016                                Inria
			
 
				+ * Copyright (C) 2010-2015,2017                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This examplifies how to get task execution profiling from the application.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <assert.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <unistd.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				+
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+//static unsigned niter = 500;
			
 
				+
			
 
				+int ret;
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void memset_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void memset_opencl(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    usleep(1000);
			
 
				+
			
 
				+    for (int i=0; i<n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+    .type = STARPU_REGRESSION_BASED,
			
 
				+    .symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+    .type = STARPU_NL_REGRESSION_BASED,
			
 
				+    .symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset_cpu"},
			
 
				+    .model = &model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset_cpu"},
			
 
				+    .model = &nl_model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+    int nloops = 100;
			
 
				+    int loop;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+    for (loop = 0; loop < nloops; loop++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        int ret = starpu_task_submit(task);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(STARPU_TEST_SKIPPED);
			
 
				+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *task)
			
 
				+{
			
 
				+    unsigned i;
			
 
				+    int niter = 100;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+    assert(tasks);
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        task->synchronous = 1;
			
 
				+
			
 
				+        /* We will destroy the task structure by hand so that we can
			
 
				+         * query the profiling info before the task is destroyed. */
			
 
				+        task->destroy = 0;
			
 
				+
			
 
				+        tasks[i] = task;
			
 
				+
			
 
				+        ret = starpu_task_submit(task);
			
 
				+
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+        {
			
 
				+            FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+            exit(0);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+    starpu_task_wait_for_all();
			
 
				+
			
 
				+    double length_sum = 0.0;
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = tasks[i];
			
 
				+        struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+        /* How long was the task execution ? */
			
 
				+        length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+        /* We don't need the task structure anymore */
			
 
				+        starpu_task_destroy(task);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    /* Display the occupancy of all workers during the test */
			
 
				+    unsigned worker;
			
 
				+    for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+    {
			
 
				+        struct starpu_profiling_worker_info worker_info;
			
 
				+        ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+        STARPU_ASSERT(!ret);
			
 
				+
			
 
				+        char workername[128];
			
 
				+        starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+        unsigned nimpl;
			
 
				+
			
 
				+        FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+        for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+        {
			
 
				+
			
 
				+            FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n",
			
 
				+                    size, workername, nimpl,starpu_task_expected_length(task, starpu_worker_get_perf_archtype(worker, task->sched_ctx), nimpl), ((length_sum)/niter));
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+    /* Enable profiling */
			
 
				+    starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+    struct starpu_conf conf;
			
 
				+    starpu_data_handle_t handle;
			
 
				+    int ret;
			
 
				+
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 2;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+            &opencl_program, NULL);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				+    int size;
			
 
				+    for (size = START; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a linear regression */
			
 
				+        test_memset(size, &memset_cl);
			
 
				+
			
 
				+        /* Use a non-linear regression */
			
 
				+        test_memset(size, &nl_memset_cl);
			
 
				+    }
			
 
				+    ret = starpu_task_wait_for_all();
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+    /* Test Phase */
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 0;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+    /* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+    size = 12345;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task *task = starpu_task_create();
			
 
				+    task->cl = &memset_cl;
			
 
				+    task->handles[0] = handle;
			
 
				+    task->destroy = 0;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+    compare_performance(size, &memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    task->cl = &nl_memset_cl;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+    compare_performance(size, &nl_memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    starpu_task_destroy(task);
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
--- a/tests/perfmodels/regression_based_02.c
+++ b/tests/perfmodels/regression_based_02.c
@@ -0,0 +1,321 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012,2016                                Inria
			
 
				+ * Copyright (C) 2010-2015,2017                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This examplifies how to get task execution profiling from the application.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <assert.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <unistd.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				+
			
 
				+#define STARTlin 1048576
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+//static unsigned niter = 500;
			
 
				+
			
 
				+int ret;
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void memset_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void memset_opencl(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    usleep(1000);
			
 
				+
			
 
				+    for (int i=0; i<n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+    .type = STARPU_REGRESSION_BASED,
			
 
				+    .symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+    .type = STARPU_NL_REGRESSION_BASED,
			
 
				+    .symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset_cpu"},
			
 
				+    .model = &model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset_cpu"},
			
 
				+    .model = &nl_model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+    int nloops = 100;
			
 
				+    int loop;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+    for (loop = 0; loop < nloops; loop++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        int ret = starpu_task_submit(task);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(STARPU_TEST_SKIPPED);
			
 
				+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *task)
			
 
				+{
			
 
				+    unsigned i;
			
 
				+    int niter = 100;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+    assert(tasks);
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        task->synchronous = 1;
			
 
				+
			
 
				+        /* We will destroy the task structure by hand so that we can
			
 
				+         * query the profiling info before the task is destroyed. */
			
 
				+        task->destroy = 0;
			
 
				+
			
 
				+        tasks[i] = task;
			
 
				+
			
 
				+        ret = starpu_task_submit(task);
			
 
				+
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+        {
			
 
				+            FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+            exit(0);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+    starpu_task_wait_for_all();
			
 
				+
			
 
				+    double length_sum = 0.0;
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = tasks[i];
			
 
				+        struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+        /* How long was the task execution ? */
			
 
				+        length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+        /* We don't need the task structure anymore */
			
 
				+        starpu_task_destroy(task);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    /* Display the occupancy of all workers during the test */
			
 
				+    unsigned worker;
			
 
				+    for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+    {
			
 
				+        struct starpu_profiling_worker_info worker_info;
			
 
				+        ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+        STARPU_ASSERT(!ret);
			
 
				+
			
 
				+        char workername[128];
			
 
				+        starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+        unsigned nimpl;
			
 
				+
			
 
				+        FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+        for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+        {
			
 
				+
			
 
				+            FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n",
			
 
				+                    size, workername, nimpl,starpu_task_expected_length(task, starpu_worker_get_perf_archtype(worker, task->sched_ctx), nimpl), ((length_sum)/niter));
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+    /* Enable profiling */
			
 
				+    starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				+
			
 
				+    struct starpu_conf conf;
			
 
				+    starpu_data_handle_t handle;
			
 
				+    int ret;
			
 
				+
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 2;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+            &opencl_program, NULL);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				+    int size;
			
 
				+    for (size = STARTlin; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a linear regression */
			
 
				+        test_memset(size, &memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    for (size = START; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a non-linear regression */
			
 
				+        test_memset(size, &nl_memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    ret = starpu_task_wait_for_all();
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+    /* Test Phase */
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 0;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+    /* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+    size = 1234567;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task *task = starpu_task_create();
			
 
				+    task->cl = &memset_cl;
			
 
				+    task->handles[0] = handle;
			
 
				+    task->destroy = 0;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+    compare_performance(size, &memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    task->cl = &nl_memset_cl;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+    compare_performance(size, &nl_memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    starpu_task_destroy(task);
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
--- a/tests/perfmodels/regression_based_03.c
+++ b/tests/perfmodels/regression_based_03.c
@@ -0,0 +1,340 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011,2012,2014                           Inria
			
 
				+ * Copyright (C) 2011-2016,2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2011-2017                                CNRS
			
 
				+ * Copyright (C) 2011                                     Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * Benchmark memset with a linear regression
			
 
				+ */
			
 
				+#define STARTlin 1048576
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void memset_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void memset_opencl(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+int ret;
			
 
				+//1er implémentation
			
 
				+void memset0_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    usleep(10000);
			
 
				+
			
 
				+    for (int i=0; i<n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+//deuxième implémentation sans delai initial usleep() et fait 1.5 plus de tours de boucles
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    for (int i=0; i<1.5*n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+    .type = STARPU_REGRESSION_BASED,
			
 
				+    .symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+    .type = STARPU_NL_REGRESSION_BASED,
			
 
				+    .symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+    .model = &model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+    .model = &nl_model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+    int nloops = 100;
			
 
				+    int loop;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+    for (loop = 0; loop < nloops; loop++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        //choisir l'implementation
			
 
				+        starpu_task_set_implementation(task, 1);
			
 
				+
			
 
				+        int ret = starpu_task_submit(task);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(STARPU_TEST_SKIPPED);
			
 
				+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *task)
			
 
				+{
			
 
				+    unsigned i;
			
 
				+    int niter = 100;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+    assert(tasks);
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        //fabriquer la tache
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        task->synchronous = 1;
			
 
				+
			
 
				+        /* We will destroy the task structure by hand so that we can
			
 
				+         * query the profiling info before the task is destroyed. */
			
 
				+        task->destroy = 0;
			
 
				+
			
 
				+        tasks[i] = task;
			
 
				+
			
 
				+        //choisir l'implementation
			
 
				+        starpu_task_set_implementation(task, 1);
			
 
				+
			
 
				+        //soumettre la tache
			
 
				+        ret = starpu_task_submit(task);
			
 
				+
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+        {
			
 
				+            FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+            exit(0);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+    starpu_task_wait_for_all();
			
 
				+
			
 
				+    double length_sum = 0.0;
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = tasks[i];
			
 
				+
			
 
				+        struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+        /* How long was the task execution ? */
			
 
				+        length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+        /* We don't need the task structure anymore */
			
 
				+        starpu_task_destroy(task);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    /* Display the occupancy of all workers during the test */
			
 
				+    unsigned worker;
			
 
				+    for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+    {
			
 
				+        struct starpu_profiling_worker_info worker_info;
			
 
				+        ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+        STARPU_ASSERT(!ret);
			
 
				+
			
 
				+        char workername[128];
			
 
				+        starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+        unsigned nimpl;
			
 
				+
			
 
				+        FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+        for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+        {
			
 
				+
			
 
				+            FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n",
			
 
				+                    size, workername, nimpl,starpu_task_expected_length(task, starpu_worker_get_perf_archtype(worker, task->sched_ctx), nimpl), ((length_sum)/niter));
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+
			
 
				+    /* Enable profiling */
			
 
				+    starpu_profiling_status_set(1);
			
 
				+
			
 
				+    struct starpu_conf conf;
			
 
				+    starpu_data_handle_t handle;
			
 
				+    int ret;
			
 
				+
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 2;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+            &opencl_program, NULL);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				+
			
 
				+    int size;
			
 
				+    for (size = STARTlin; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a linear regression */
			
 
				+        test_memset(size, &memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    for (size = START; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a non-linear regression */
			
 
				+        test_memset(size, &nl_memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    ret = starpu_task_wait_for_all();
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+    /* Test Phase */
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "eager";
			
 
				+    conf.calibrate = 0;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+    /* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+    size = 1234567;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task *task = starpu_task_create();
			
 
				+    task->cl = &memset_cl;
			
 
				+    task->handles[0] = handle;
			
 
				+    task->destroy = 0;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+    compare_performance(size, &memset_cl,task);
			
 
				+
			
 
				+    task->cl = &nl_memset_cl;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+    compare_performance(size, &nl_memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    starpu_task_destroy(task);
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+    return EXIT_SUCCESS;
			
 
				+}
			
 
				+
			
--- a/tests/perfmodels/regression_based_04.c
+++ b/tests/perfmodels/regression_based_04.c
@@ -0,0 +1,334 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011,2012,2014                           Inria
			
 
				+ * Copyright (C) 2011-2016,2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2011-2017                                CNRS
			
 
				+ * Copyright (C) 2011                                     Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * Benchmark memset with a linear regression
			
 
				+ */
			
 
				+#define STARTlin 1048576
			
 
				+#define START 1024
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define END 1048576
			
 
				+#else
			
 
				+#define END 16777216
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void memset_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void memset_opencl(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+int ret;
			
 
				+//1er implémentation
			
 
				+void memset0_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    usleep(10000);
			
 
				+
			
 
				+    for (int i=0; i<n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+//deuxième implémentation sans delai initial usleep() et fait 1.5 plus de tours de boucles
			
 
				+void memset_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    (void)arg;
			
 
				+    STARPU_SKIP_IF_VALGRIND;
			
 
				+
			
 
				+    int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+    unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+    for (int i=0; i<1.5*n ; i++)
			
 
				+    {
			
 
				+
			
 
				+        ptr[0] += i;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+    .type = STARPU_REGRESSION_BASED,
			
 
				+    .symbol = "memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel nl_model =
			
 
				+{
			
 
				+    .type = STARPU_NL_REGRESSION_BASED,
			
 
				+    .symbol = "non_linear_memset_regression_based"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+    .model = &model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet nl_memset_cl =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+    .cuda_funcs = {memset_cuda},
			
 
				+    .cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    .opencl_funcs = {memset_opencl},
			
 
				+    .opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+    .cpu_funcs = {memset0_cpu, memset_cpu},
			
 
				+    .cpu_funcs_name = {"memset0_cpu", "memset_cpu"},
			
 
				+    .model = &nl_model,
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = {STARPU_W}
			
 
				+};
			
 
				+
			
 
				+static void test_memset(int nelems, struct starpu_codelet *codelet)
			
 
				+{
			
 
				+    int nloops = 100;
			
 
				+    int loop;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
			
 
				+    for (loop = 0; loop < nloops; loop++)
			
 
				+    {
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        int ret = starpu_task_submit(task);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(STARPU_TEST_SKIPPED);
			
 
				+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *task)
			
 
				+{
			
 
				+    unsigned i;
			
 
				+    int niter = 100;
			
 
				+    starpu_data_handle_t handle;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *));
			
 
				+    assert(tasks);
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        //fabriquer la tache
			
 
				+        struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+        task->cl = codelet;
			
 
				+        task->handles[0] = handle;
			
 
				+
			
 
				+        task->synchronous = 1;
			
 
				+
			
 
				+        /* We will destroy the task structure by hand so that we can
			
 
				+         * query the profiling info before the task is destroyed. */
			
 
				+        task->destroy = 0;
			
 
				+
			
 
				+        tasks[i] = task;
			
 
				+
			
 
				+        //soumettre la tache
			
 
				+        ret = starpu_task_submit(task);
			
 
				+
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+        {
			
 
				+            FPRINTF(stderr, "No worker may execute this task\n");
			
 
				+            exit(0);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+    starpu_task_wait_for_all();
			
 
				+
			
 
				+    double length_sum = 0.0;
			
 
				+
			
 
				+    for (i = 0; i < niter; i++)
			
 
				+    {
			
 
				+        struct starpu_task *task = tasks[i];
			
 
				+
			
 
				+        struct starpu_profiling_task_info *info = task->profiling_info;
			
 
				+
			
 
				+
			
 
				+        /* How long was the task execution ? */
			
 
				+        length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
			
 
				+
			
 
				+        /* We don't need the task structure anymore */
			
 
				+        starpu_task_destroy(task);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    /* Display the occupancy of all workers during the test */
			
 
				+    unsigned worker;
			
 
				+    for (worker = 0; worker < starpu_worker_get_count(); worker++)
			
 
				+    {
			
 
				+        struct starpu_profiling_worker_info worker_info;
			
 
				+        ret = starpu_profiling_worker_get_info(worker, &worker_info);
			
 
				+        STARPU_ASSERT(!ret);
			
 
				+
			
 
				+        char workername[128];
			
 
				+        starpu_worker_get_name(worker, workername, sizeof(workername));
			
 
				+        unsigned nimpl;
			
 
				+
			
 
				+        FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername);
			
 
				+
			
 
				+        for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+        {
			
 
				+
			
 
				+            FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n",
			
 
				+                    size, workername, nimpl,starpu_task_expected_length(task, starpu_worker_get_perf_archtype(worker, task->sched_ctx), nimpl), ((length_sum)/niter));
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+
			
 
				+    /* Enable profiling */
			
 
				+    starpu_profiling_status_set(1);
			
 
				+
			
 
				+    struct starpu_conf conf;
			
 
				+    starpu_data_handle_t handle;
			
 
				+    int ret;
			
 
				+
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "dmda";
			
 
				+    conf.calibrate = 2;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
			
 
				+            &opencl_program, NULL);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				+
			
 
				+    int size;
			
 
				+    for (size = STARTlin; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a linear regression */
			
 
				+        test_memset(size, &memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    for (size = START; size < END; size *= 2)
			
 
				+    {
			
 
				+        /* Use a non-linear regression */
			
 
				+        test_memset(size, &nl_memset_cl);
			
 
				+    }
			
 
				+
			
 
				+    ret = starpu_task_wait_for_all();
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+    /* Test Phase */
			
 
				+    starpu_conf_init(&conf);
			
 
				+
			
 
				+    conf.sched_policy_name = "dmda";
			
 
				+    conf.calibrate = 0;
			
 
				+
			
 
				+    ret = starpu_initialize(&conf, &argc, &argv);
			
 
				+    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+    /* Now create a dummy task just to estimate its duration according to the regression */
			
 
				+
			
 
				+    size = 1234567;
			
 
				+
			
 
				+    starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
			
 
				+
			
 
				+    struct starpu_task *task = starpu_task_create();
			
 
				+    task->cl = &memset_cl;
			
 
				+    task->handles[0] = handle;
			
 
				+    task->destroy = 0;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////linear regression results////\n");
			
 
				+    compare_performance(size, &memset_cl,task);
			
 
				+
			
 
				+    task->cl = &nl_memset_cl;
			
 
				+
			
 
				+    FPRINTF(stdout, "\n ////non linear regression results////\n");
			
 
				+
			
 
				+    compare_performance(size, &nl_memset_cl,task);
			
 
				+
			
 
				+
			
 
				+    starpu_task_destroy(task);
			
 
				+
			
 
				+    starpu_data_unregister(handle);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+    ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+    starpu_shutdown();
			
 
				+
			
 
				+    return EXIT_SUCCESS;
			
 
				+}
			
 
				+