浏览代码

fix ispeed_lp (consider the bw only if the gpu does not belong to the ctx, in this case a forced data transfer could be needed)

Andra Hugo 12 年之前
父节点
当前提交
a0074841dc

+ 10 - 0
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -49,6 +49,16 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 			{
 				enum starpu_archtype arch = starpu_worker_get_type(worker);
 				velocity[s][w] = sched_ctx_hypervisor_get_velocity(sc_w, arch);
+				if(arch == STARPU_CUDA_WORKER)
+				{
+					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx);
+					if(!worker_in_ctx)
+					{
+						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker) / 1000;
+						velocity[s][w] = (velocity[s][w] * transfer_velocity) / (velocity[s][w] + transfer_velocity);
+					}
+				}
+
 			}
 			
 //			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);

+ 8 - 2
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -454,12 +454,18 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
         if( elapsed_flops != 0.0)
         {
                 double curr_time = starpu_timing_now();
+		size_t elapsed_data_used = sc_w->elapsed_data[worker];
                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  		enum starpu_archtype arch = starpu_worker_get_type(worker);
 		if(arch == STARPU_CUDA_WORKER)
 		{
-			double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
-			elapsed_time +=  (elapsed_data_used / transfer_velocity) / 1000000 ;
+/* 			unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); */
+/* 			if(!worker_in_ctx) */
+/* 			{ */
+
+/* 				double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker); */
+/* 				elapsed_time +=  (elapsed_data_used / transfer_velocity) / 1000000 ; */
+/* 			} */
 			double latency = starpu_get_latency_RAM_CUDA(worker);
 //			printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks);
 			elapsed_time += (elapsed_tasks * latency)/1000000;