14 years ago · 9b7c782713
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1358,15 +1358,10 @@ tries to minimize is @code{alpha * T_execution + beta * T_data_transfer}, where
 
																 accurate), and @code{T_data_transfer} is the estimated data transfer time. The
															
 
																 latter is however estimated based on bus calibration before execution start,
															
 
																 i.e. with an idle machine. You can force bus re-calibration by running
															
 
																-@code{starpu_calibrate_bus}. When StarPU manages several GPUs, such estimation
															
 
																-is not accurate any more. Beta can then be used to correct this by hand. For
															
 
																-instance, you can use @code{export STARPU_BETA=2} to double the transfer
															
 
																-time estimation, e.g. because there are two GPUs in the machine. This is of
															
 
																-course imprecise, but in practice, a rough estimation already gives the good
															
 
																-results that a precise estimation would give.
															
 
																-
															
 
																-Measuring the actual data transfer time is however on our TODO-list to
															
 
																-accurately estimate data transfer penalty without the need of a hand-tuned beta parameter.
															
 
																+@code{starpu_calibrate_bus}. The beta parameter defaults to 1, but it can be
															
 
																+worth trying to tweak it by using @code{export STARPU_BETA=2} for instance.
															
 
																+This is of course imprecise, but in practice, a rough estimation already gives
															
 
																+the good results that a precise estimation would give.
															
 
																 @node Power-based scheduling
															
 
																 @section Power-based scheduling
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -1028,6 +1028,7 @@ double _starpu_predict_transfer_time(unsigned src_node, unsigned dst_node, size_
 
																 {
															
 
																 	double bandwidth = bandwidth_matrix[src_node][dst_node];
															
 
																 	double latency = latency_matrix[src_node][dst_node];
															
 
																+	struct starpu_machine_topology_s *topology = &_starpu_get_machine_config()->topology;
															
 
																-	return latency + size/bandwidth;
															
 
																+	return latency + (size/bandwidth)*2*(topology->ncudagpus+topology->nopenclgpus);
															
 
																 }