|
@@ -454,12 +454,18 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
|
|
if( elapsed_flops != 0.0)
|
|
if( elapsed_flops != 0.0)
|
|
{
|
|
{
|
|
double curr_time = starpu_timing_now();
|
|
double curr_time = starpu_timing_now();
|
|
|
|
+ size_t elapsed_data_used = sc_w->elapsed_data[worker];
|
|
double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0;
|
|
double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0;
|
|
enum starpu_archtype arch = starpu_worker_get_type(worker);
|
|
enum starpu_archtype arch = starpu_worker_get_type(worker);
|
|
if(arch == STARPU_CUDA_WORKER)
|
|
if(arch == STARPU_CUDA_WORKER)
|
|
{
|
|
{
|
|
- double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
|
|
+
|
|
- elapsed_time += (elapsed_data_used / transfer_velocity) / 1000000 ;
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
double latency = starpu_get_latency_RAM_CUDA(worker);
|
|
double latency = starpu_get_latency_RAM_CUDA(worker);
|
|
|
|
|
|
elapsed_time += (elapsed_tasks * latency)/1000000;
|
|
elapsed_time += (elapsed_tasks * latency)/1000000;
|