Просмотр исходного кода

Permit the application to provide its own size base for performance models

Samuel Thibault лет назад: 14
Родитель
Сommit
37862069c7

+ 9 - 1
doc/chapters/advanced-examples.texi

@@ -346,7 +346,7 @@ struct starpu_codelet cl = @{
 @end cartouche
 @end cartouche
 
 
 @item
 @item
-Measured at runtime and refined by regression (@code{STARPU_REGRESSION_*_BASED}
+Measured at runtime and refined by regression (@code{STARPU_*REGRESSION_BASED}
 model type). This still assumes performance regularity, but can work
 model type). This still assumes performance regularity, but can work
 with various data input sizes, by applying regression over observed
 with various data input sizes, by applying regression over observed
 execution times. STARPU_REGRESSION_BASED uses an a*n^b regression
 execution times. STARPU_REGRESSION_BASED uses an a*n^b regression
@@ -374,6 +374,14 @@ per architecture.
 
 
 @end itemize
 @end itemize
 
 
+For the @code{STARPU_HISTORY_BASED} and @code{STARPU_*REGRESSION_BASE},
+the total size of task data (both input and output) is used as an index by
+default. The @code{size_base} field of @code{struct starpu_perfmodel} however
+permits the application to override that, when for instance some of the data
+do not matter for task cost (e.g. mere reference table), or when using sparse
+structures (in which case it is the number of non-zeros which matter), or when
+there is some hidden parameter such as the number of iterations, etc.
+
 How to use schedulers which can benefit from such performance model is explained
 How to use schedulers which can benefit from such performance model is explained
 in @ref{Task scheduling policy}.
 in @ref{Task scheduling policy}.
 
 

+ 5 - 0
doc/chapters/basic-api.texi

@@ -1460,6 +1460,9 @@ used as file name to store the model.
 implementation number, and must return a task duration estimation in micro-seconds.
 implementation number, and must return a task duration estimation in micro-seconds.
 @item @code{per_arch}: Used by @code{STARPU_PER_ARCH}: array of @code{struct
 @item @code{per_arch}: Used by @code{STARPU_PER_ARCH}: array of @code{struct
 starpu_per_arch_perfmodel} structures.
 starpu_per_arch_perfmodel} structures.
+@item @code{size_base}: Used by @code{STARPU_HISTORY_BASED} and
+@code{STARPU_*REGRESSION_BASED}. If not NULL, takes a task and implementation
+number, and returns the size to be used as index for history and regression.
 @end table
 @end table
 @end deftp
 @end deftp
 
 
@@ -1479,6 +1482,8 @@ records all execution history measures.
 Used by @code{STARPU_HISTORY_REGRESION_BASED} and
 Used by @code{STARPU_HISTORY_REGRESION_BASED} and
 @code{STARPU_NL_REGRESSION_BASED}, contains the estimated factors of the
 @code{STARPU_NL_REGRESSION_BASED}, contains the estimated factors of the
 regression.
 regression.
+@item @code{size_base}: Same as in @code{struct perfmodel}, but per-arch, in
+case it depends on the architecture-specific implementation.
 @end table
 @end table
 @end deftp
 @end deftp
 
 

+ 7 - 0
examples/opt/pi/pi.c

@@ -61,6 +61,12 @@ static void cpu_kernel(void *descr[], void *cl_arg)
 	free(random_numbers);
 	free(random_numbers);
 }
 }
 
 
+/* The amount of work does not depend on the data size at all :) */
+static size_t size_base(struct starpu_task *task, unsigned nimpl)
+{
+	return NSHOT_PER_TASK;
+}
+
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
@@ -114,6 +120,7 @@ int main(int argc, char **argv)
 	static struct starpu_perfmodel model =
 	static struct starpu_perfmodel model =
 	{
 	{
 		.type = STARPU_HISTORY_BASED,
 		.type = STARPU_HISTORY_BASED,
+		.size_base = size_base,
 		.symbol = "monte_carlo_pi"
 		.symbol = "monte_carlo_pi"
 	};
 	};
 
 

+ 13 - 0
examples/opt/pi/pi_redux.c

@@ -189,6 +189,19 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 }
 }
 #endif
 #endif
 
 
+/* The amount of work does not depend on the data size at all :) */
+static size_t size_base(struct starpu_task *task, unsigned nimpl)
+{
+	return NSHOT_PER_TASK;
+}
+
+static struct starpu_perfmodel model =
+{
+	.type = STARPU_HISTORY_BASED,
+	.size_base = size_base,
+	.symbol = "monte_carlo_pi_redux"
+};
+
 static struct starpu_codelet pi_cl =
 static struct starpu_codelet pi_cl =
 {
 {
 	.where =
 	.where =

+ 3 - 0
include/starpu_perfmodel.h

@@ -135,6 +135,7 @@ struct starpu_per_arch_perfmodel
 {
 {
 	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */
 	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */
 	double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
 	double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
+	size_t (*size_base)(struct starpu_task *, enum starpu_perf_archtype arch, unsigned nimpl);
 
 
 	/* internal variables */
 	/* internal variables */
 	struct starpu_htbl32_node *history;
 	struct starpu_htbl32_node *history;
@@ -163,6 +164,8 @@ struct starpu_perfmodel
 	double (*cost_model)(struct starpu_buffer_descr *) STARPU_DEPRECATED;
 	double (*cost_model)(struct starpu_buffer_descr *) STARPU_DEPRECATED;
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 
 
+	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
+
 	/* per-architecture model */
 	/* per-architecture model */
 	struct starpu_per_arch_perfmodel per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS];
 	struct starpu_per_arch_perfmodel per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS];
 
 

+ 4 - 4
src/common/fxt.h

@@ -199,11 +199,11 @@ do {									\
 	}								\
 	}								\
 } while(0);
 } while(0);
 
 
-#define _STARPU_TRACE_END_CODELET_BODY(job, archtype)			\
+#define _STARPU_TRACE_END_CODELET_BODY(job, perf_arch, nimpl, archtype)			\
 do {									\
 do {									\
-	const size_t job_size = _starpu_job_get_data_size((job));	\
-	const uint32_t job_hash = _starpu_compute_buffers_footprint(job);\
-	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, job, (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
+	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
+	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
+	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
 } while(0);
 } while(0);
 
 
 #define _STARPU_TRACE_START_CALLBACK(job)	\
 #define _STARPU_TRACE_START_CALLBACK(job)	\

+ 16 - 12
src/core/jobs.c

@@ -27,22 +27,26 @@
 #include <profiling/bound.h>
 #include <profiling/bound.h>
 #include <starpu_top.h>
 #include <starpu_top.h>
 
 
-size_t _starpu_job_get_data_size(struct _starpu_job *j)
+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
 {
 {
-	size_t size = 0;
-
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 
 
-	unsigned nbuffers = task->cl->nbuffers;
-
-	unsigned buffer;
-	for (buffer = 0; buffer < nbuffers; buffer++)
-	{
-		starpu_data_handle_t handle = task->handles[buffer];
-		size += _starpu_data_get_size(handle);
+	if (model && model->per_arch[arch][nimpl].size_base) {
+		return model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
+	} else if (model && model->size_base) {
+		return model->size_base(task, nimpl);
+	} else {
+		unsigned nbuffers = task->cl->nbuffers;
+		size_t size = 0;
+
+		unsigned buffer;
+		for (buffer = 0; buffer < nbuffers; buffer++)
+		{
+			starpu_data_handle_t handle = task->handles[buffer];
+			size += _starpu_data_get_size(handle);
+		}
+		return size;
 	}
 	}
-
-	return size;
 }
 }
 
 
 /* we need to identify each task to generate the DAG. */
 /* we need to identify each task to generate the DAG. */

+ 2 - 2
src/core/jobs.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -151,7 +151,7 @@ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j, unsigned
 void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_already_locked);
 void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_already_locked);
 
 
 /* Get the sum of the size of the data accessed by the job. */
 /* Get the sum of the size of the data accessed by the job. */
-size_t _starpu_job_get_data_size(struct _starpu_job *j);
+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j);
 
 
 /* Get a task from the local pool of tasks that were explicitly attributed to
 /* Get a task from the local pool of tasks that were explicitly attributed to
  * that worker. */
  * that worker. */

+ 9 - 9
src/core/perfmodel/perfmodel_history.c

@@ -893,7 +893,7 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
 double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
 double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
 {
 {
 	double exp = -1.0;
 	double exp = -1.0;
-	size_t size = _starpu_job_get_data_size(j);
+	size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
 	struct starpu_regression_model *regmodel;
 	struct starpu_regression_model *regmodel;
 
 
 	regmodel = &model->per_arch[arch][nimpl].regression;
 	regmodel = &model->per_arch[arch][nimpl].regression;
@@ -907,7 +907,7 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model
 double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
 double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
 {
 {
 	double exp = -1.0;
 	double exp = -1.0;
-	size_t size = _starpu_job_get_data_size(j);
+	size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
 	struct starpu_regression_model *regmodel;
 	struct starpu_regression_model *regmodel;
 
 
 	regmodel = &model->per_arch[arch][nimpl].regression;
 	regmodel = &model->per_arch[arch][nimpl].regression;
@@ -916,7 +916,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 		exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
 		exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
 	else
 	else
 	{
 	{
-		uint32_t key = _starpu_compute_buffers_footprint(j);
+		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
 		struct starpu_htbl32_node *history = per_arch_model->history;
 		struct starpu_htbl32_node *history = per_arch_model->history;
 		struct starpu_history_entry *entry;
 		struct starpu_history_entry *entry;
@@ -945,7 +945,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 	struct starpu_history_entry *entry;
 	struct starpu_history_entry *entry;
 	struct starpu_htbl32_node *history;
 	struct starpu_htbl32_node *history;
 
 
-	uint32_t key = _starpu_compute_buffers_footprint(j);
+	uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 
 
 	per_arch_model = &model->per_arch[arch][nimpl];
 	per_arch_model = &model->per_arch[arch][nimpl];
 
 
@@ -989,7 +989,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			struct starpu_htbl32_node *history;
 			struct starpu_htbl32_node *history;
 			struct starpu_htbl32_node **history_ptr;
 			struct starpu_htbl32_node **history_ptr;
 			struct starpu_history_list **list;
 			struct starpu_history_list **list;
-			uint32_t key = _starpu_compute_buffers_footprint(j);
+			uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 
 
 			history = per_arch_model->history;
 			history = per_arch_model->history;
 			history_ptr = &per_arch_model->history;
 			history_ptr = &per_arch_model->history;
@@ -1008,7 +1008,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 					entry->deviation = 0.0;
 					entry->deviation = 0.0;
 					entry->sum2 = measured*measured;
 					entry->sum2 = measured*measured;
 
 
-					entry->size = _starpu_job_get_data_size(j);
+					entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
 
 
 					entry->footprint = key;
 					entry->footprint = key;
 					entry->nsample = 1;
 					entry->nsample = 1;
@@ -1037,7 +1037,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			reg_model = &per_arch_model->regression;
 			reg_model = &per_arch_model->regression;
 
 
 			/* update the regression model */
 			/* update the regression model */
-			size_t job_size = _starpu_job_get_data_size(j);
+			size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
 			double logy, logx;
 			double logy, logx;
 			logx = log((double)job_size);
 			logx = log((double)job_size);
 			logy = log(measured);
 			logy = log(measured);
@@ -1069,11 +1069,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 		FILE * debug_file = per_arch_model->debug_file;
 		FILE * debug_file = per_arch_model->debug_file;
 
 
 		if (!j->footprint_is_computed)
 		if (!j->footprint_is_computed)
-			(void) _starpu_compute_buffers_footprint(j);
+			(void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 
 
 		STARPU_ASSERT(j->footprint_is_computed);
 		STARPU_ASSERT(j->footprint_is_computed);
 
 
-		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, task->predicted, task->predicted_transfer, cpuid);
+		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
 		unsigned i;
 		unsigned i;
 
 
 		for (i = 0; i < task->cl->nbuffers; i++)
 		for (i = 0; i < task->cl->nbuffers; i++)

+ 17 - 9
src/datawizard/footprint.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,7 +18,7 @@
 #include <datawizard/footprint.h>
 #include <datawizard/footprint.h>
 #include <common/hash.h>
 #include <common/hash.h>
 
 
-uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j)
+uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
 {
 {
 	if (j->footprint_is_computed)
 	if (j->footprint_is_computed)
 		return j->footprint;
 		return j->footprint;
@@ -28,13 +28,21 @@ uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j)
 
 
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 
 
-	for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
-	{
-		starpu_data_handle_t handle = task->handles[buffer];
-
-		uint32_t handle_footprint = _starpu_data_get_footprint(handle);
-
-		footprint = _starpu_crc32_be(handle_footprint, footprint);
+	if (model && model->per_arch[arch][nimpl].size_base) {
+		size_t size = model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
+		footprint = _starpu_crc32_be_n(&size, sizeof(size), footprint);
+	} else if (model && model->size_base) {
+		size_t size = model->size_base(task, nimpl);
+		footprint = _starpu_crc32_be_n(&size, sizeof(size), footprint);
+	} else {
+		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
+		{
+			starpu_data_handle_t handle = task->handles[buffer];
+
+			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
+
+			footprint = _starpu_crc32_be(handle_footprint, footprint);
+		}
 	}
 	}
 
 
 	j->footprint = footprint;
 	j->footprint = footprint;

+ 2 - 2
src/datawizard/footprint.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,7 +24,7 @@
 
 
 /* Compute the footprint that characterizes the job and cache it into the job
 /* Compute the footprint that characterizes the job and cache it into the job
  * structure. */
  * structure. */
-uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j);
+uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j);
 
 
 /* Compute the footprint that characterizes the layout of the data handle. */
 /* Compute the footprint that characterizes the layout of the data handle. */
 uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle);
 uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle);

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -60,7 +60,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 		func(task->interfaces, task->cl_arg);
 		func(task->interfaces, task->cl_arg);
 	}
 	}
 
 
-	_starpu_driver_end_job(cpu_args, j, &codelet_end, rank);
+	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank);
 
 
 	if (is_parallel_task)
 	if (is_parallel_task)
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -221,7 +221,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	STARPU_ASSERT(func);
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 	func(task->interfaces, task->cl_arg);
 
 
-	_starpu_driver_end_job(args, j, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
 
 
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
 
 

+ 2 - 2
src/drivers/driver_common/driver_common.c

@@ -60,7 +60,7 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	_STARPU_TRACE_START_CODELET_BODY(j);
 	_STARPU_TRACE_START_CODELET_BODY(j);
 }
 }
 
 
-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_end, int rank)
+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch, struct timespec *codelet_end, int rank)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_codelet *cl = task->cl;
@@ -71,7 +71,7 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 	unsigned calibrate_model = 0;
 	unsigned calibrate_model = 0;
 	enum starpu_perf_archtype archtype STARPU_ATTRIBUTE_UNUSED = args->perf_arch;
 	enum starpu_perf_archtype archtype STARPU_ATTRIBUTE_UNUSED = args->perf_arch;
 
 
-	_STARPU_TRACE_END_CODELET_BODY(j, archtype);
+	_STARPU_TRACE_END_CODELET_BODY(j, perf_arch, j->nimpl, archtype);
 
 
 	if (cl->model && cl->model->benchmarking)
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 		calibrate_model = 1;

+ 1 - 1
src/drivers/driver_common/driver_common.h

@@ -25,7 +25,7 @@
 
 
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
 			      struct timespec *codelet_start, int rank);
 			      struct timespec *codelet_start, int rank);
-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
 			    struct timespec *codelet_end, int rank);
 			    struct timespec *codelet_end, int rank);
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 					enum starpu_perf_archtype perf_arch,
 					enum starpu_perf_archtype perf_arch,

+ 1 - 1
src/drivers/opencl/driver_opencl.c

@@ -597,7 +597,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 	STARPU_ASSERT(func);
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 	func(task->interfaces, task->cl_arg);
 
 
-	_starpu_driver_end_job(args, j, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
 
 
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
 							&codelet_start, &codelet_end);
 							&codelet_start, &codelet_end);

+ 2 - 2
src/profiling/bound.c

@@ -181,7 +181,7 @@ static void new_task(struct _starpu_job *j)
 	t->tag_id = j->task->tag_id;
 	t->tag_id = j->task->tag_id;
 	t->use_tag = j->task->use_tag;
 	t->use_tag = j->task->use_tag;
 	t->cl = j->task->cl;
 	t->cl = j->task->cl;
-	t->footprint = _starpu_compute_buffers_footprint(j);
+	t->footprint = _starpu_compute_buffers_footprint(NULL, 0, 0, j);
 	t->priority = j->task->priority;
 	t->priority = j->task->priority;
 	t->deps = NULL;
 	t->deps = NULL;
 	t->depsn = 0;
 	t->depsn = 0;
@@ -214,7 +214,7 @@ void _starpu_bound_record(struct _starpu_job *j)
 	{
 	{
 		struct bound_task_pool *tp;
 		struct bound_task_pool *tp;
 
 
-		_starpu_compute_buffers_footprint(j);
+		_starpu_compute_buffers_footprint(NULL, 0, 0, j);
 
 
 		if (last && last->cl == j->task->cl && last->footprint == j->footprint)
 		if (last && last->cl == j->task->cl && last->footprint == j->footprint)
 			tp = last;
 			tp = last;