лет назад: 14 · 37862069c7
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -346,7 +346,7 @@ struct starpu_codelet cl = @{
 
				 @end cartouche
			
 
				 
			
 
				 @item
			
 
				-Measured at runtime and refined by regression (@code{STARPU_REGRESSION_*_BASED}
			
 
				+Measured at runtime and refined by regression (@code{STARPU_*REGRESSION_BASED}
			
 
				 model type). This still assumes performance regularity, but can work
			
 
				 with various data input sizes, by applying regression over observed
			
 
				 execution times. STARPU_REGRESSION_BASED uses an a*n^b regression
			
@@ -374,6 +374,14 @@ per architecture.
 
				 
			
 
				 @end itemize
			
 
				 
			
 
				+For the @code{STARPU_HISTORY_BASED} and @code{STARPU_*REGRESSION_BASE},
			
 
				+the total size of task data (both input and output) is used as an index by
			
 
				+default. The @code{size_base} field of @code{struct starpu_perfmodel} however
			
 
				+permits the application to override that, when for instance some of the data
			
 
				+do not matter for task cost (e.g. mere reference table), or when using sparse
			
 
				+structures (in which case it is the number of non-zeros which matter), or when
			
 
				+there is some hidden parameter such as the number of iterations, etc.
			
 
				+
			
 
				 How to use schedulers which can benefit from such performance model is explained
			
 
				 in @ref{Task scheduling policy}.
			
 
				 
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -1460,6 +1460,9 @@ used as file name to store the model.
 
				 implementation number, and must return a task duration estimation in micro-seconds.
			
 
				 @item @code{per_arch}: Used by @code{STARPU_PER_ARCH}: array of @code{struct
			
 
				 starpu_per_arch_perfmodel} structures.
			
 
				+@item @code{size_base}: Used by @code{STARPU_HISTORY_BASED} and
			
 
				+@code{STARPU_*REGRESSION_BASED}. If not NULL, takes a task and implementation
			
 
				+number, and returns the size to be used as index for history and regression.
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
@@ -1479,6 +1482,8 @@ records all execution history measures.
 
				 Used by @code{STARPU_HISTORY_REGRESION_BASED} and
			
 
				 @code{STARPU_NL_REGRESSION_BASED}, contains the estimated factors of the
			
 
				 regression.
			
 
				+@item @code{size_base}: Same as in @code{struct perfmodel}, but per-arch, in
			
 
				+case it depends on the architecture-specific implementation.
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
--- a/examples/opt/pi/pi.c
+++ b/examples/opt/pi/pi.c
@@ -61,6 +61,12 @@ static void cpu_kernel(void *descr[], void *cl_arg)
 
				 	free(random_numbers);
			
 
				 }
			
 
				 
			
 
				+/* The amount of work does not depend on the data size at all :) */
			
 
				+static size_t size_base(struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	return NSHOT_PER_TASK;
			
 
				+}
			
 
				+
			
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
@@ -114,6 +120,7 @@ int main(int argc, char **argv)
 
				 	static struct starpu_perfmodel model =
			
 
				 	{
			
 
				 		.type = STARPU_HISTORY_BASED,
			
 
				+		.size_base = size_base,
			
 
				 		.symbol = "monte_carlo_pi"
			
 
				 	};
			
 
				 
			
--- a/examples/opt/pi/pi_redux.c
+++ b/examples/opt/pi/pi_redux.c
@@ -189,6 +189,19 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/* The amount of work does not depend on the data size at all :) */
			
 
				+static size_t size_base(struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	return NSHOT_PER_TASK;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel model =
			
 
				+{
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.size_base = size_base,
			
 
				+	.symbol = "monte_carlo_pi_redux"
			
 
				+};
			
 
				+
			
 
				 static struct starpu_codelet pi_cl =
			
 
				 {
			
 
				 	.where =
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -135,6 +135,7 @@ struct starpu_per_arch_perfmodel
 
				 {
			
 
				 	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */
			
 
				 	double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
			
 
				+	size_t (*size_base)(struct starpu_task *, enum starpu_perf_archtype arch, unsigned nimpl);
			
 
				 
			
 
				 	/* internal variables */
			
 
				 	struct starpu_htbl32_node *history;
			
@@ -163,6 +164,8 @@ struct starpu_perfmodel
 
				 	double (*cost_model)(struct starpu_buffer_descr *) STARPU_DEPRECATED;
			
 
				 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				 
			
 
				+	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
			
 
				+
			
 
				 	/* per-architecture model */
			
 
				 	struct starpu_per_arch_perfmodel per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -199,11 +199,11 @@ do {									\
 
				 	}								\
			
 
				 } while(0);
			
 
				 
			
 
				-#define _STARPU_TRACE_END_CODELET_BODY(job, archtype)			\
			
 
				+#define _STARPU_TRACE_END_CODELET_BODY(job, perf_arch, nimpl, archtype)			\
			
 
				 do {									\
			
 
				-	const size_t job_size = _starpu_job_get_data_size((job));	\
			
 
				-	const uint32_t job_hash = _starpu_compute_buffers_footprint(job);\
			
 
				-	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, job, (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
			
 
				+	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
			
 
				+	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
			
 
				+	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
			
 
				 } while(0);
			
 
				 
			
 
				 #define _STARPU_TRACE_START_CALLBACK(job)	\
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -27,22 +27,26 @@
 
				 #include <profiling/bound.h>
			
 
				 #include <starpu_top.h>
			
 
				 
			
 
				-size_t _starpu_job_get_data_size(struct _starpu_job *j)
			
 
				+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
			
 
				 {
			
 
				-	size_t size = 0;
			
 
				-
			
 
				 	struct starpu_task *task = j->task;
			
 
				 
			
 
				-	unsigned nbuffers = task->cl->nbuffers;
			
 
				-
			
 
				-	unsigned buffer;
			
 
				-	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				-	{
			
 
				-		starpu_data_handle_t handle = task->handles[buffer];
			
 
				-		size += _starpu_data_get_size(handle);
			
 
				+	if (model && model->per_arch[arch][nimpl].size_base) {
			
 
				+		return model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
			
 
				+	} else if (model && model->size_base) {
			
 
				+		return model->size_base(task, nimpl);
			
 
				+	} else {
			
 
				+		unsigned nbuffers = task->cl->nbuffers;
			
 
				+		size_t size = 0;
			
 
				+
			
 
				+		unsigned buffer;
			
 
				+		for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+		{
			
 
				+			starpu_data_handle_t handle = task->handles[buffer];
			
 
				+			size += _starpu_data_get_size(handle);
			
 
				+		}
			
 
				+		return size;
			
 
				 	}
			
 
				-
			
 
				-	return size;
			
 
				 }
			
 
				 
			
 
				 /* we need to identify each task to generate the DAG. */
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -151,7 +151,7 @@ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j, unsigned
 
				 void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_already_locked);
			
 
				 
			
 
				 /* Get the sum of the size of the data accessed by the job. */
			
 
				-size_t _starpu_job_get_data_size(struct _starpu_job *j);
			
 
				+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j);
			
 
				 
			
 
				 /* Get a task from the local pool of tasks that were explicitly attributed to
			
 
				  * that worker. */
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -893,7 +893,7 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
 
				 double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				-	size_t size = _starpu_job_get_data_size(j);
			
 
				+	size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				 	struct starpu_regression_model *regmodel;
			
 
				 
			
 
				 	regmodel = &model->per_arch[arch][nimpl].regression;
			
@@ -907,7 +907,7 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model
 
				 double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				-	size_t size = _starpu_job_get_data_size(j);
			
 
				+	size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				 	struct starpu_regression_model *regmodel;
			
 
				 
			
 
				 	regmodel = &model->per_arch[arch][nimpl].regression;
			
@@ -916,7 +916,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 		exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
			
 
				 	else
			
 
				 	{
			
 
				-		uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				+		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
			
 
				 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
			
 
				 		struct starpu_htbl32_node *history = per_arch_model->history;
			
 
				 		struct starpu_history_entry *entry;
			
@@ -945,7 +945,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 	struct starpu_history_entry *entry;
			
 
				 	struct starpu_htbl32_node *history;
			
 
				 
			
 
				-	uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				+	uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
			
 
				 
			
 
				 	per_arch_model = &model->per_arch[arch][nimpl];
			
 
				 
			
@@ -989,7 +989,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			struct starpu_htbl32_node *history;
			
 
				 			struct starpu_htbl32_node **history_ptr;
			
 
				 			struct starpu_history_list **list;
			
 
				-			uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				+			uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
			
 
				 
			
 
				 			history = per_arch_model->history;
			
 
				 			history_ptr = &per_arch_model->history;
			
@@ -1008,7 +1008,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 					entry->deviation = 0.0;
			
 
				 					entry->sum2 = measured*measured;
			
 
				 
			
 
				-					entry->size = _starpu_job_get_data_size(j);
			
 
				+					entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				 
			
 
				 					entry->footprint = key;
			
 
				 					entry->nsample = 1;
			
@@ -1037,7 +1037,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			reg_model = &per_arch_model->regression;
			
 
				 
			
 
				 			/* update the regression model */
			
 
				-			size_t job_size = _starpu_job_get_data_size(j);
			
 
				+			size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				 			double logy, logx;
			
 
				 			logx = log((double)job_size);
			
 
				 			logy = log(measured);
			
@@ -1069,11 +1069,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 		FILE * debug_file = per_arch_model->debug_file;
			
 
				 
			
 
				 		if (!j->footprint_is_computed)
			
 
				-			(void) _starpu_compute_buffers_footprint(j);
			
 
				+			(void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
			
 
				 
			
 
				 		STARPU_ASSERT(j->footprint_is_computed);
			
 
				 
			
 
				-		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, task->predicted, task->predicted_transfer, cpuid);
			
 
				+		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
			
 
				 		unsigned i;
			
 
				 
			
 
				 		for (i = 0; i < task->cl->nbuffers; i++)
			
--- a/src/datawizard/footprint.c
+++ b/src/datawizard/footprint.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,7 +18,7 @@
 
				 #include <datawizard/footprint.h>
			
 
				 #include <common/hash.h>
			
 
				 
			
 
				-uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j)
			
 
				+uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
			
 
				 {
			
 
				 	if (j->footprint_is_computed)
			
 
				 		return j->footprint;
			
@@ -28,13 +28,21 @@ uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j)
 
				 
			
 
				 	struct starpu_task *task = j->task;
			
 
				 
			
 
				-	for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
			
 
				-	{
			
 
				-		starpu_data_handle_t handle = task->handles[buffer];
			
 
				-
			
 
				-		uint32_t handle_footprint = _starpu_data_get_footprint(handle);
			
 
				-
			
 
				-		footprint = _starpu_crc32_be(handle_footprint, footprint);
			
 
				+	if (model && model->per_arch[arch][nimpl].size_base) {
			
 
				+		size_t size = model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
			
 
				+		footprint = _starpu_crc32_be_n(&size, sizeof(size), footprint);
			
 
				+	} else if (model && model->size_base) {
			
 
				+		size_t size = model->size_base(task, nimpl);
			
 
				+		footprint = _starpu_crc32_be_n(&size, sizeof(size), footprint);
			
 
				+	} else {
			
 
				+		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
			
 
				+		{
			
 
				+			starpu_data_handle_t handle = task->handles[buffer];
			
 
				+
			
 
				+			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
			
 
				+
			
 
				+			footprint = _starpu_crc32_be(handle_footprint, footprint);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	j->footprint = footprint;
			
--- a/src/datawizard/footprint.h
+++ b/src/datawizard/footprint.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -24,7 +24,7 @@
 
				 
			
 
				 /* Compute the footprint that characterizes the job and cache it into the job
			
 
				  * structure. */
			
 
				-uint32_t _starpu_compute_buffers_footprint(struct _starpu_job *j);
			
 
				+uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j);
			
 
				 
			
 
				 /* Compute the footprint that characterizes the layout of the data handle. */
			
 
				 uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle);
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -60,7 +60,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
				 		func(task->interfaces, task->cl_arg);
			
 
				 	}
			
 
				 
			
 
				-	_starpu_driver_end_job(cpu_args, j, &codelet_end, rank);
			
 
				+	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank);
			
 
				 
			
 
				 	if (is_parallel_task)
			
 
				 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -221,7 +221,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
				 	STARPU_ASSERT(func);
			
 
				 	func(task->interfaces, task->cl_arg);
			
 
				 
			
 
				-	_starpu_driver_end_job(args, j, &codelet_end, 0);
			
 
				+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
			
 
				 
			
 
				 	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
			
 
				 
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -60,7 +60,7 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 
				 	_STARPU_TRACE_START_CODELET_BODY(j);
			
 
				 }
			
 
				 
			
 
				-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_end, int rank)
			
 
				+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch, struct timespec *codelet_end, int rank)
			
 
				 {
			
 
				 	struct starpu_task *task = j->task;
			
 
				 	struct starpu_codelet *cl = task->cl;
			
@@ -71,7 +71,7 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 
				 	unsigned calibrate_model = 0;
			
 
				 	enum starpu_perf_archtype archtype STARPU_ATTRIBUTE_UNUSED = args->perf_arch;
			
 
				 
			
 
				-	_STARPU_TRACE_END_CODELET_BODY(j, archtype);
			
 
				+	_STARPU_TRACE_END_CODELET_BODY(j, perf_arch, j->nimpl, archtype);
			
 
				 
			
 
				 	if (cl->model && cl->model->benchmarking)
			
 
				 		calibrate_model = 1;
			
--- a/src/drivers/driver_common/driver_common.h
+++ b/src/drivers/driver_common/driver_common.h
@@ -25,7 +25,7 @@
 
				 
			
 
				 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
			
 
				 			      struct timespec *codelet_start, int rank);
			
 
				-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
			
 
				+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
			
 
				 			    struct timespec *codelet_end, int rank);
			
 
				 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
			
 
				 					enum starpu_perf_archtype perf_arch,
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -597,7 +597,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				 	STARPU_ASSERT(func);
			
 
				 	func(task->interfaces, task->cl_arg);
			
 
				 
			
 
				-	_starpu_driver_end_job(args, j, &codelet_end, 0);
			
 
				+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
			
 
				 
			
 
				 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
			
 
				 							&codelet_start, &codelet_end);
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -181,7 +181,7 @@ static void new_task(struct _starpu_job *j)
 
				 	t->tag_id = j->task->tag_id;
			
 
				 	t->use_tag = j->task->use_tag;
			
 
				 	t->cl = j->task->cl;
			
 
				-	t->footprint = _starpu_compute_buffers_footprint(j);
			
 
				+	t->footprint = _starpu_compute_buffers_footprint(NULL, 0, 0, j);
			
 
				 	t->priority = j->task->priority;
			
 
				 	t->deps = NULL;
			
 
				 	t->depsn = 0;
			
@@ -214,7 +214,7 @@ void _starpu_bound_record(struct _starpu_job *j)
 
				 	{
			
 
				 		struct bound_task_pool *tp;
			
 
				 
			
 
				-		_starpu_compute_buffers_footprint(j);
			
 
				+		_starpu_compute_buffers_footprint(NULL, 0, 0, j);
			
 
				 
			
 
				 		if (last && last->cl == j->task->cl && last->footprint == j->footprint)
			
 
				 			tp = last;