Browse Source

minor cleanups to avoid some dereferencing

Cédric Augonnet 16 years ago
parent
commit
1e41ce878c

+ 12 - 11
src/core/perfmodel/perfmodel.c

@@ -25,7 +25,7 @@
  * PER ARCH model
  */
 
-static double per_arch_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
+static double per_arch_task_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
 {
 	double exp = -1.0;
 	double (*per_arch_cost_model)(struct starpu_buffer_descr_t *);
@@ -48,7 +48,7 @@ static double per_arch_job_expected_length(struct starpu_perfmodel_t *model, enu
 	per_arch_cost_model = model->per_arch[arch].cost_model;
 
 	if (per_arch_cost_model)
-		exp = per_arch_cost_model(j->task->buffers);
+		exp = per_arch_cost_model(task->buffers);
 
 	return exp;
 }
@@ -57,13 +57,13 @@ static double per_arch_job_expected_length(struct starpu_perfmodel_t *model, enu
  * Common model
  */
 
-static double common_job_expected_length(struct starpu_perfmodel_t *model, uint32_t who, struct job_s *j)
+static double common_task_expected_length(struct starpu_perfmodel_t *model, uint32_t who, struct starpu_task *task)
 {
 	double exp;
 
 	if (model->cost_model) {
 		float alpha;
-		exp = model->cost_model(j->task->buffers);
+		exp = model->cost_model(task->buffers);
 		switch (who) {
 			case CORE:
 				alpha = CORE_ALPHA;
@@ -87,15 +87,16 @@ static double common_job_expected_length(struct starpu_perfmodel_t *model, uint3
 
 double job_expected_length(uint32_t who, struct job_s *j, enum starpu_perf_archtype arch)
 {
-	struct starpu_perfmodel_t *model = j->task->cl->model;
+	struct starpu_task *task = j->task;
+	struct starpu_perfmodel_t *model = task->cl->model;
 
 	if (model) {
 		switch (model->type) {
 			case PER_ARCH:
-				return per_arch_job_expected_length(model, arch, j);
+				return per_arch_task_expected_length(model, arch, task);
 
 			case COMMON:
-				return common_job_expected_length(model, who, j);
+				return common_task_expected_length(model, who, task);
 
 			case HISTORY_BASED:
 				return history_based_job_expected_length(model, arch, j);
@@ -113,19 +114,19 @@ double job_expected_length(uint32_t who, struct job_s *j, enum starpu_perf_archt
 }
 
 /* Data transfer performance modeling */
-double data_expected_penalty(struct jobq_s *q, struct job_s *j)
+double data_expected_penalty(struct jobq_s *q, struct starpu_task *task)
 {
 	uint32_t memory_node = q->memory_node;
-	unsigned nbuffers = j->task->cl->nbuffers;
+	unsigned nbuffers = task->cl->nbuffers;
 	unsigned buffer;
 
 	double penalty = 0.0;
 
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
-		starpu_data_handle handle = j->task->buffers[buffer].handle;
+		starpu_data_handle handle = task->buffers[buffer].handle;
 
-		if (j->task->buffers[buffer].mode == STARPU_W)
+		if (task->buffers[buffer].mode == STARPU_W)
 			break;
 
 		if (!is_data_present_or_requested(handle, memory_node))

+ 1 - 1
src/core/perfmodel/perfmodel.h

@@ -95,7 +95,7 @@ double regression_based_job_expected_length(struct starpu_perfmodel_t *model,
 void update_perfmodel_history(struct job_s *j, enum starpu_perf_archtype arch,
 				unsigned cpuid, double measured);
 
-double data_expected_penalty(struct jobq_s *q, struct job_s *j);
+double data_expected_penalty(struct jobq_s *q, struct starpu_task *task);
 
 void create_sampling_directory_if_needed(void);
 

+ 10 - 8
src/core/policies/deque-modeling-policy-data-aware.c

@@ -38,15 +38,15 @@ static job_t dmda_pop_task(struct jobq_s *q)
 	return j;
 }
 
-static void update_data_requests(struct jobq_s *q, struct job_s *j)
+static void update_data_requests(struct jobq_s *q, struct starpu_task *task)
 {
 	uint32_t memory_node = q->memory_node;
-	unsigned nbuffers = j->task->cl->nbuffers;
+	unsigned nbuffers = task->cl->nbuffers;
 	unsigned buffer;
 
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
-		starpu_data_handle handle = j->task->buffers[buffer].handle;
+		starpu_data_handle handle = task->buffers[buffer].handle;
 
 		set_data_requested_flag_if_needed(handle, memory_node);
 	}
@@ -73,6 +73,8 @@ static int _dmda_push_task(struct jobq_s *q __attribute__ ((unused)) , job_t j,
 	double model_best = 0.0;
 	double penality_best = 0.0;
 
+	struct starpu_task *task = j->task;
+
 	for (worker = 0; worker < nworkers; worker++)
 	{
 		fifo = queue_array[worker]->queue;
@@ -80,7 +82,7 @@ static int _dmda_push_task(struct jobq_s *q __attribute__ ((unused)) , job_t j,
 		fifo->exp_start = STARPU_MAX(fifo->exp_start, timing_now());
 		fifo->exp_end = STARPU_MAX(fifo->exp_end, timing_now());
 
-		if ((queue_array[worker]->who & j->task->cl->where) == 0)
+		if ((queue_array[worker]->who & task->cl->where) == 0)
 		{
 			/* no one on that queue may execute this task */
 			continue;
@@ -90,7 +92,7 @@ static int _dmda_push_task(struct jobq_s *q __attribute__ ((unused)) , job_t j,
 							j, queue_array[worker]->arch);
 
 		//local_data_penalty[worker] = 0;
-		local_data_penalty[worker] = data_expected_penalty(queue_array[worker], j);
+		local_data_penalty[worker] = data_expected_penalty(queue_array[worker], task);
 
 		if (local_task_length[worker] == -1.0)
 		{
@@ -118,7 +120,7 @@ static int _dmda_push_task(struct jobq_s *q __attribute__ ((unused)) , job_t j,
 		{
 			fifo = queue_array[worker]->queue;
 	
-			if ((queue_array[worker]->who & j->task->cl->where) == 0)
+			if ((queue_array[worker]->who & task->cl->where) == 0)
 			{
 				/* no one on that queue may execute this task */
 				continue;
@@ -164,10 +166,10 @@ static int _dmda_push_task(struct jobq_s *q __attribute__ ((unused)) , job_t j,
 	j->predicted = model_best;
 	j->penality = penality_best;
 
-	update_data_requests(queue_array[best], j);
+	update_data_requests(queue_array[best], task);
 	
 	if (use_prefetch)
-		prefetch_task_input_on_node(j->task, queue_array[best]->memory_node);
+		prefetch_task_input_on_node(task, queue_array[best]->memory_node);
 
 	if (prio) {
 		return fifo_push_prio_task(queue_array[best], j);

+ 2 - 5
src/drivers/cuda/driver_cuda.c

@@ -85,7 +85,7 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 		calibrate_model = 1;
 
 	/* we do not take communication into account when modeling the performance */
-	if (calibrate_model || BENCHMARK_COMM)
+	if (BENCHMARK_COMM)
 	{
 		cures = cudaThreadSynchronize();
 		if (STARPU_UNLIKELY(cures))
@@ -117,7 +117,7 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 	GET_TICK(codelet_start);
 	func(task->interface, task->cl_arg);
 
-	task->cl->per_worker_stats[args->workerid]++;
+	cl->per_worker_stats[args->workerid]++;
 
 	GET_TICK(codelet_end);
 
@@ -125,8 +125,6 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 
 	TRACE_END_CODELET_BODY(j);	
 
-//#ifdef MODEL_DEBUG
-	
 	if (calibrate_model || BENCHMARK_COMM)
 	{
 		double measured = timing_delay(&codelet_start, &codelet_end);
@@ -142,7 +140,6 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 		if (calibrate_model)
 			update_perfmodel_history(j, args->perf_arch, (unsigned)args->id, measured);
 	}
-//#endif
 
 	args->jobq->total_job_performed++;