il y a 16 ans · 487d630b25
--- a/include/starpu-perfmodel.h
+++ b/include/starpu-perfmodel.h
@@ -33,7 +33,7 @@ struct starpu_buffer_descr_t;
 
				 
			
 
				 /* on most system we will consider one or two architectures as all accelerators
			
 
				    are likely to be identical */
			
 
				-#define NARCH_VARIATIONS	2
			
 
				+#define NARCH_VARIATIONS	3
			
 
				 
			
 
				 enum starpu_perf_archtype {
			
 
				 	STARPU_CORE_DEFAULT = 0,
			
--- a/include/starpu-task.h
+++ b/include/starpu-task.h
@@ -17,6 +17,7 @@
 
				 #ifndef __STARPU_TASK_H__
			
 
				 #define __STARPU_TASK_H__
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 /* this is a randomly choosen value ... */
			
--- a/src/core/mechanisms/deque_queues.c
+++ b/src/core/mechanisms/deque_queues.c
@@ -155,6 +155,70 @@ job_t deque_pop_task(struct jobq_s *q)
 
				 	return j;
			
 
				 }
			
 
				 
			
 
				+struct job_list_s * deque_pop_every_task(struct jobq_s *q, uint32_t where)
			
 
				+{
			
 
				+	struct job_list_s *new_list, *old_list;
			
 
				+
			
 
				+	STARPU_ASSERT(q);
			
 
				+	struct deque_jobq_s *deque_queue = q->queue;
			
 
				+
			
 
				+	/* block until some task is available in that queue */
			
 
				+	pthread_mutex_lock(&q->activity_mutex);
			
 
				+
			
 
				+	if (deque_queue->njobs == 0)
			
 
				+	{
			
 
				+		new_list = NULL;
			
 
				+	}
			
 
				+	else {
			
 
				+		/* there is a task */
			
 
				+		old_list = deque_queue->jobq;
			
 
				+		new_list = job_list_new();
			
 
				+
			
 
				+		unsigned new_list_size = 0;
			
 
				+
			
 
				+		job_itor_t i;
			
 
				+		job_t next_job;
			
 
				+		/* note that this starts at the _head_ of the list, so we put
			
 
				+ 		 * elements at the back of the new list */
			
 
				+		for(i = job_list_begin(old_list);
			
 
				+			i != job_list_end(old_list);
			
 
				+			i  = next_job)
			
 
				+		{
			
 
				+			next_job = job_list_next(i);
			
 
				+
			
 
				+			if (i->task->cl->where & where)
			
 
				+			{
			
 
				+				/* this elements can be moved into the new list */
			
 
				+				new_list_size++;
			
 
				+				
			
 
				+				job_list_erase(old_list, i);
			
 
				+				job_list_push_back(new_list, i);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (new_list_size == 0)
			
 
				+		{
			
 
				+			/* the new list is empty ... */
			
 
				+			job_list_delete(new_list);
			
 
				+			new_list = NULL;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			deque_queue->njobs -= new_list_size;
			
 
				+	
			
 
				+			/* we are sure that we got it now, so at worst, some people thought
			
 
				+			 * there remained some work and will soon discover it is not true */
			
 
				+			pthread_mutex_lock(sched_mutex);
			
 
				+			total_number_of_jobs -= new_list_size;
			
 
				+			pthread_mutex_unlock(sched_mutex);
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				+	pthread_mutex_unlock(&q->activity_mutex);
			
 
				+
			
 
				+	return new_list;
			
 
				+}
			
 
				+
			
 
				 job_t deque_non_blocking_pop_task(struct jobq_s *q)
			
 
				 {
			
 
				 	job_t j = NULL;
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -118,6 +118,7 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned
 
				 {
			
 
				 	parse_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT], scan_history);
			
 
				 	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT], scan_history);
			
 
				+	parse_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT], scan_history);
			
 
				 }
			
 
				 
			
 
				 static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model)
			
@@ -153,6 +154,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
 
				 {
			
 
				 	dump_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT]);
			
 
				 	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT]);
			
 
				+	dump_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT]);
			
 
				 }
			
 
				 
			
 
				 static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
			
@@ -165,6 +167,7 @@ static void initialize_model(struct starpu_perfmodel_t *model)
 
				 {
			
 
				 	initialize_per_arch_model(&model->per_arch[STARPU_CORE_DEFAULT]);
			
 
				 	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_DEFAULT]);
			
 
				+	initialize_per_arch_model(&model->per_arch[STARPU_GORDON_DEFAULT]);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_model_list_t *registered_models = NULL;
			
@@ -208,6 +211,10 @@ void register_model(struct starpu_perfmodel_t *model)
 
				 	get_model_debug_path(model, "core", debugpath, 256);
			
 
				 	model->per_arch[STARPU_CORE_DEFAULT].debug_file = fopen(debugpath, "a+");
			
 
				 	STARPU_ASSERT(model->per_arch[STARPU_CORE_DEFAULT].debug_file);
			
 
				+
			
 
				+	get_model_debug_path(model, "gordon", debugpath, 256);
			
 
				+	model->per_arch[STARPU_GORDON_DEFAULT].debug_file = fopen(debugpath, "a+");
			
 
				+	STARPU_ASSERT(model->per_arch[STARPU_GORDON_DEFAULT].debug_file);
			
 
				 #endif
			
 
				 
			
 
				 	return;
			
@@ -249,6 +256,7 @@ void save_history_based_model(struct starpu_perfmodel_t *model)
 
				 	fclose(f);
			
 
				 
			
 
				 #ifdef DEBUG_MODEL
			
 
				+	fclose(model->gordon_debug_file);
			
 
				 	fclose(model->cuda_debug_file);
			
 
				 	fclose(model->core_debug_file);
			
 
				 #endif
			
--- a/src/core/policies/deque-modeling-policy.c
+++ b/src/core/policies/deque-modeling-policy.c
@@ -37,6 +37,31 @@ static job_t dm_pop_task(struct jobq_s *q)
 
				 	return j;
			
 
				 }
			
 
				 
			
 
				+static struct job_list_s *dm_pop_every_task(struct jobq_s *q, uint32_t where)
			
 
				+{
			
 
				+	struct job_list_s *new_list;
			
 
				+
			
 
				+	new_list = fifo_pop_every_task(q, where);
			
 
				+	if (new_list) {
			
 
				+		job_itor_t i;
			
 
				+		for(i = job_list_begin(new_list);
			
 
				+			i != job_list_end(new_list);
			
 
				+			i = job_list_next(i))
			
 
				+		{
			
 
				+			struct fifo_jobq_s *fifo = q->queue;
			
 
				+			double model = i->predicted;
			
 
				+	
			
 
				+			fifo->exp_len -= model;
			
 
				+			fifo->exp_start = timing_now()/1000000 + model;
			
 
				+			fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return new_list;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 static int _dm_push_task(struct jobq_s *q __attribute__ ((unused)), job_t j, unsigned prio)
			
 
				 {
			
 
				 	/* find the queue */
			
@@ -130,6 +155,7 @@ static struct jobq_s *init_dm_fifo(void)
 
				 	q->push_task = dm_push_task; 
			
 
				 	q->push_prio_task = dm_push_prio_task; 
			
 
				 	q->pop_task = dm_pop_task;
			
 
				+	q->pop_every_task = dm_pop_every_task;
			
 
				 	q->who = 0;
			
 
				 
			
 
				 	queue_array[nworkers++] = q;
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -234,12 +234,24 @@ static void gordon_callback_list_func(void *arg)
 
				 
			
 
				 //	fprintf(stderr, "gordon callback : push job j %p\n", task_wrapper->j);
			
 
				 
			
 
				+	unsigned task_cnt = 0;
			
 
				+
			
 
				 	/* XXX 0 was hardcoded */
			
 
				 	take_mutex(&terminated_list_mutexes[0]);
			
 
				 	while (!job_list_empty(wrapper_list))
			
 
				 	{
			
 
				 		job_t j = job_list_pop_back(wrapper_list);
			
 
				+
			
 
				+		struct gordon_ppu_job_s * gordon_task = &task_wrapper->gordon_job[task_cnt];
			
 
				+		
			
 
				+		if (j->task->cl->model && j->task->cl->model->benchmarking)
			
 
				+		{
			
 
				+			//fprintf(stderr, "gordon_task -> execution time %lx\n", gordon_task->measured);
			
 
				+			update_perfmodel_history(j, STARPU_GORDON_DEFAULT, gordon_task->measured);
			
 
				+		}
			
 
				+
			
 
				 		job_list_push_back(terminated_list, j);
			
 
				+		task_cnt++;
			
 
				 	}
			
 
				 
			
 
				 	/* the job list was allocated by the gordon driver itself */
			
@@ -338,6 +350,9 @@ int inject_task_list(struct job_list_s *list, struct worker_s *worker)
 
				 
			
 
				 		gordon_jobs[index].index = task->cl->gordon_func;
			
 
				 
			
 
				+		if (j->task->cl->model && j->task->cl->model->benchmarking)
			
 
				+			gordon_jobs[index].sampling = 1;
			
 
				+
			
 
				 		/* we should not hardcore the memory node ... XXX */
			
 
				 		unsigned memory_node = 0;
			
 
				 		starpu_to_gordon_buffers(j, &gordon_jobs[index], memory_node);