10 jaren geleden · 305100c749
--- a/Makefile.am
+++ b/Makefile.am
@@ -74,7 +74,8 @@ versinclude_HEADERS = 				\
 
																 	include/starpu_profiling.h		\
															
 
																 	include/starpu_bound.h			\
															
 
																 	include/starpu_scheduler.h		\
															
 
																-	include/starpu_sched_component.h		\
															
 
																+	include/schedulers/heteroprio.h		\
															
 
																+	include/starpu_sched_component.h	\
															
 
																 	include/starpu_sched_ctx.h		\
															
 
																 	include/starpu_sched_ctx_hypervisor.h	\
															
 
																 	include/starpu_top.h			\
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -195,6 +195,7 @@ STARPU_EXAMPLES =				\
 
																 	matvecmult/matvecmult			\
															
 
																 	profiling/profiling			\
															
 
																 	scheduler/dummy_sched			\
															
 
																+	scheduler/heteroprio_test		\
															
 
																 	sched_ctx/sched_ctx			\
															
 
																 	sched_ctx/prio				\
															
 
																 	sched_ctx/dummy_sched_with_ctx		\
															
--- a/examples/scheduler/heteroprio_test.c
+++ b/examples/scheduler/heteroprio_test.c
@@ -0,0 +1,199 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2015  INRIA
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <schedulers/heteroprio.h>
															
 
																+#include <unistd.h>
															
 
																+
															
 
																+void initSchedulerCallback(){
															
 
																+	// CPU uses 3 buckets
															
 
																+	starpu_heteroprio_set_nb_prios(0, FSTARPU_CPU_IDX, 3);
															
 
																+	// It uses direct mapping idx => idx
															
 
																+	unsigned idx;
															
 
																+	for(idx = 0; idx < 3; ++idx){
															
 
																+		starpu_heteroprio_set_mapping(0, FSTARPU_CPU_IDX, idx, idx);
															
 
																+		starpu_heteroprio_set_faster_arch(0, FSTARPU_CPU_IDX, idx);
															
 
																+	}
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	// OpenCL is enabled and uses 2 buckets
															
 
																+	starpu_heteroprio_set_nb_prios(0, FSTARPU_OPENCL_IDX, 2);
															
 
																+	// OpenCL will first look to priority 2
															
 
																+	starpu_heteroprio_set_mapping(0, FSTARPU_OPENCL_IDX, 0, 2);
															
 
																+	// For this bucket OpenCL is the fastest
															
 
																+	starpu_heteroprio_set_faster_arch(0, FSTARPU_OPENCL_IDX, 2);
															
 
																+	// And CPU is 4 times slower
															
 
																+	starpu_heteroprio_set_arch_slow_factor(0, FSTARPU_CPU_IDX, 2, 4.0f);
															
 
																+
															
 
																+	starpu_heteroprio_set_mapping(0, FSTARPU_OPENCL_IDX, 1, 1);
															
 
																+	// We let the CPU as the fastest and tell that OpenCL is 1.7 times slower
															
 
																+	starpu_heteroprio_set_arch_slow_factor(0, FSTARPU_OPENCL_IDX, 1, 1.7f);
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+
															
 
																+void callback_a_cpu(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+
															
 
																+void callback_b_cpu(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+
															
 
																+void callback_c_cpu(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+void callback_a_opencl(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+
															
 
																+void callback_b_opencl(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+
															
 
																+void callback_c_opencl(void *buffers[], void *cl_arg){
															
 
																+	usleep(100000);
															
 
																+	printf("COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stdout);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+int main(int argc, char** argv){
															
 
																+	unsigned ret;
															
 
																+	struct starpu_conf conf;
															
 
																+	ret = starpu_conf_init(&conf);
															
 
																+	assert(ret == 0);
															
 
																+
															
 
																+	conf.sched_policy_name = "heteroprio";
															
 
																+	conf.sched_policy_init = &initSchedulerCallback;
															
 
																+
															
 
																+	ret = starpu_init(&conf);
															
 
																+	assert(ret == 0);
															
 
																+
															
 
																+	starpu_pause();
															
 
																+
															
 
																+	printf("Worker = %d\n",  starpu_worker_get_count());
															
 
																+	printf("Worker CPU = %d\n", starpu_cpu_worker_get_count());
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	printf("Worker OpenCL = %d\n", starpu_cpu_worker_get_count());
															
 
																+#endif
															
 
																+
															
 
																+	struct starpu_codelet codeleteA;
															
 
																+	{
															
 
																+		memset(&codeleteA, 0, sizeof(codeleteA));
															
 
																+		codeleteA.nbuffers = 2;
															
 
																+		codeleteA.modes[0] = STARPU_RW;
															
 
																+		codeleteA.modes[1] = STARPU_RW;
															
 
																+		codeleteA.name = "codeleteA";
															
 
																+		codeleteA.where = STARPU_CPU;
															
 
																+		codeleteA.cpu_funcs[0] = callback_a_cpu;
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+		codeleteA.where |= STARPU_OPENCL;
															
 
																+		codeleteA.cpu_funcs[0] = callback_a_opencl;
															
 
																+#endif
															
 
																+	}
															
 
																+	struct starpu_codelet codeleteB;
															
 
																+	{
															
 
																+		memset(&codeleteB, 0, sizeof(codeleteB));
															
 
																+		codeleteB.nbuffers = 2;
															
 
																+		codeleteB.modes[0] = STARPU_RW;
															
 
																+		codeleteB.modes[1] = STARPU_RW;
															
 
																+		codeleteB.name = "codeleteB";
															
 
																+		codeleteB.where = STARPU_CPU;
															
 
																+		codeleteB.cpu_funcs[0] = callback_b_cpu;
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+		codeleteB.where |= STARPU_OPENCL;
															
 
																+		codeleteB.cpu_funcs[0] = callback_b_opencl;
															
 
																+#endif
															
 
																+	}
															
 
																+	struct starpu_codelet codeleteC;
															
 
																+	{
															
 
																+		memset(&codeleteC, 0, sizeof(codeleteC));
															
 
																+		codeleteC.nbuffers = 2;
															
 
																+		codeleteC.modes[0] = STARPU_RW;
															
 
																+		codeleteC.modes[1] = STARPU_RW;
															
 
																+		codeleteC.name = "codeleteC";
															
 
																+		codeleteC.where = STARPU_CPU;
															
 
																+		codeleteC.cpu_funcs[0] = callback_c_cpu;
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+		codeleteC.where |= STARPU_OPENCL;
															
 
																+		codeleteC.cpu_funcs[0] = callback_c_opencl;
															
 
																+#endif
															
 
																+	}
															
 
																+
															
 
																+	const int nbHandles = 10;
															
 
																+	printf("Nb handles = %d\n", nbHandles);
															
 
																+
															
 
																+	starpu_data_handle_t handles[nbHandles];
															
 
																+	memset(handles, 0, sizeof(handles[0])*nbHandles);
															
 
																+	int dataA[nbHandles];
															
 
																+	int idx;
															
 
																+	for(idx = 0; idx < nbHandles; ++idx){
															
 
																+		dataA[idx] = idx;
															
 
																+	}
															
 
																+	int idxHandle;
															
 
																+	for(idxHandle = 0; idxHandle < nbHandles; ++idxHandle){
															
 
																+		starpu_variable_data_register(&handles[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle]));
															
 
																+	}
															
 
																+
															
 
																+	const int nbTasks = 40;
															
 
																+	printf("Submit %d tasks \n", nbTasks);
															
 
																+
															
 
																+	starpu_resume();
															
 
																+	
															
 
																+	int idxTask;
															
 
																+	for(idxTask = 0; idxTask < nbTasks; ++idxTask){
															
 
																+		starpu_insert_task(&codeleteA,
															
 
																+				   STARPU_PRIORITY, 0,
															
 
																+				   (STARPU_RW), handles[(idxTask*2)%nbHandles],
															
 
																+				   (STARPU_RW), handles[(idxTask*3+1)%nbHandles],
															
 
																+				   0);
															
 
																+		starpu_insert_task(&codeleteB,
															
 
																+				   STARPU_PRIORITY, 1,
															
 
																+				   (STARPU_RW), handles[(idxTask*2 +1 )%nbHandles],
															
 
																+				   (STARPU_RW), handles[(idxTask*2)%nbHandles],
															
 
																+				   0);
															
 
																+		starpu_insert_task(&codeleteC,
															
 
																+				   STARPU_PRIORITY, 2,
															
 
																+				   (STARPU_RW), handles[(idxTask)%nbHandles],
															
 
																+				   (STARPU_RW), handles[(idxTask*idxTask)%nbHandles],
															
 
																+				   0);
															
 
																+	}
															
 
																+
															
 
																+	printf("Wait task\n");
															
 
																+
															
 
																+	starpu_task_wait_for_all();
															
 
																+	starpu_pause();
															
 
																+
															
 
																+	printf("Release data\n");
															
 
																+
															
 
																+	for(idxHandle = 0 ; idxHandle < nbHandles ; ++idxHandle){
															
 
																+		starpu_data_unregister(handles[idxHandle]);
															
 
																+	}
															
 
																+	
															
 
																+	printf("Shutdown\n");
															
 
																+
															
 
																+
															
 
																+	starpu_resume();
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
--- a/include/schedulers/heteroprio.h
+++ b/include/schedulers/heteroprio.h
@@ -0,0 +1,83 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2015  INRIA
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#ifndef __STARPU_SCHEDULER_HETEROPRIO_H__
															
 
																+#define __STARPU_SCHEDULER_HETEROPRIO_H__
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#ifdef __cplusplus
															
 
																+extern "C"
															
 
																+{
															
 
																+#endif
															
 
																+
															
 
																+#define HETEROPRIO_MAX_PRIO 100
															
 
																+/* #define FSTARPU_NB_TYPES 3 */
															
 
																+/* #define FSTARPU_CPU_IDX 0 */
															
 
																+/* #define FSTARPU_CUDA_IDX 1 */
															
 
																+/* #define FSTARPU_OPENCL_IDX 2 */
															
 
																+
															
 
																+#define HETEROPRIO_MAX_PREFETCH 2
															
 
																+#if HETEROPRIO_MAX_PREFETCH <= 0
															
 
																+#error HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so HETEROPRIO_MAX_PREFETCH must >= 1
															
 
																+#endif
															
 
																+
															
 
																+enum FStarPUTypes{
															
 
																+// First will be zero
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	FSTARPU_CPU_IDX, // = 0
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	FSTARPU_CUDA_IDX,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	FSTARPU_OPENCL_IDX,
															
 
																+#endif
															
 
																+// This will be the number of archs
															
 
																+	FSTARPU_NB_TYPES
															
 
																+};
															
 
																+
															
 
																+const unsigned FStarPUTypesToArch[FSTARPU_NB_TYPES+1] = {
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	STARPU_CPU,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	STARPU_CUDA,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	STARPU_OPENCL,
															
 
																+#endif
															
 
																+	0
															
 
																+};
															
 
																+
															
 
																+
															
 
																+/** Tell how many prio there are for a given arch */
															
 
																+void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned max_prio);
															
 
																+
															
 
																+/** Set the mapping for a given arch prio=>bucket */
															
 
																+void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned source_prio, unsigned dest_bucket_id);
															
 
																+
															
 
																+/** Tell which arch is the faster for the tasks of a bucket (optional) */
															
 
																+void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned bucket_id);
															
 
																+
															
 
																+/** Tell how slow is a arch for the tasks of a bucket (optional) */ 
															
 
																+void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned bucket_id, float slow_factor);
															
 
																+
															
 
																+#ifdef __cplusplus
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
															
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -126,6 +126,8 @@ struct starpu_conf
 
																 	int global_sched_ctx_min_priority;
															
 
																 	int global_sched_ctx_max_priority;
															
 
																+	void (*sched_policy_init)(void);
															
 
																+
															
 
																 };
															
 
																 int starpu_conf_init(struct starpu_conf *conf);
															
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -31,6 +31,7 @@ extern "C"
 
																 #define STARPU_SCHED_CTX_HIERARCHY_LEVEL         (5<<16)
															
 
																 #define STARPU_SCHED_CTX_NESTED                  (6<<16)
															
 
																 #define STARPU_SCHED_CTX_AWAKE_WORKERS           (7<<16)
															
 
																+#define STARPU_SCHED_CTX_POLICY_INIT             (8<<16)
															
 
																 unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...);
															
@@ -138,6 +139,8 @@ void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_
 
																 int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id);
															
 
																+void (*starpu_sched_ctx_get_sched_policy_init(unsigned sched_ctx_id))(void);
															
 
																+
															
 
																 #ifdef STARPU_USE_SC_HYPERVISOR
															
 
																 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
															
 
																 #endif /* STARPU_USE_SC_HYPERVISOR */
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -192,6 +192,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 
																 	sched_policies/fifo_queues.c				\
															
 
																 	sched_policies/parallel_heft.c				\
															
 
																 	sched_policies/parallel_eager.c				\
															
 
																+	sched_policies/heteroprio.c				\
															
 
																 	drivers/driver_common/driver_common.c			\
															
 
																 	drivers/disk/driver_disk.c				\
															
 
																 	datawizard/memory_nodes.c				\
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -455,7 +455,9 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 
																 						   int nworkers_ctx, unsigned is_initial_sched,
															
 
																 						   const char *sched_ctx_name,
															
 
																 						   int min_prio_set, int min_prio,
															
 
																-						   int max_prio_set, int max_prio, unsigned awake_workers)
															
 
																+						   int max_prio_set, int max_prio, 
															
 
																+						   unsigned awake_workers,  
															
 
																+						   void (*sched_policy_init)(void))
															
 
																 {
															
 
																 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
															
@@ -499,7 +501,7 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 
																 	sched_ctx->main_master = -1;
															
 
																 	sched_ctx->perf_arch.devices = NULL;
															
 
																 	sched_ctx->perf_arch.ndevices = 0;
															
 
																-
															
 
																+	sched_ctx->init_sched = sched_policy_init;
															
 
																 	int w;
															
 
																 	for(w = 0; w < nworkers; w++)
															
 
																 	{
															
@@ -693,7 +695,7 @@ unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const
 
																 	for(i = 0; i < nw; i++)
															
 
																 		printf("%d ", workers[i]);
															
 
																 	printf("\n");
															
 
																-	sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1);
															
 
																+	sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1, NULL);
															
 
																 	sched_ctx->min_ncpus = min_ncpus;
															
 
																 	sched_ctx->max_ncpus = max_ncpus;
															
 
																 	sched_ctx->min_ngpus = min_ngpus;
															
@@ -723,6 +725,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 
																 	unsigned hierarchy_level = 0;
															
 
																 	unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS;
															
 
																 	unsigned awake_workers = 0;
															
 
																+	void (*init_sched)(void) = NULL;
															
 
																 	va_start(varg_list, sched_ctx_name);
															
 
																 	while ((arg_type = va_arg(varg_list, int)) != 0)
															
@@ -759,6 +762,10 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 
																 		{
															
 
																 			awake_workers = 1;
															
 
																 		}
															
 
																+		else if (arg_type == STARPU_SCHED_CTX_POLICY_INIT)
															
 
																+		{
															
 
																+			init_sched = va_arg(varg_list, void(*)(void));
															
 
																+		}
															
 
																 		else
															
 
																 		{
															
 
																 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
															
@@ -768,7 +775,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 
																 	va_end(varg_list);
															
 
																 	struct _starpu_sched_ctx *sched_ctx = NULL;
															
 
																-	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers);
															
 
																+	sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched);
															
 
																 	sched_ctx->hierarchy_level = hierarchy_level;
															
 
																 	sched_ctx->nesting_sched_ctx = nesting_sched_ctx;
															
@@ -2315,3 +2322,9 @@ int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id)
 
																 	return -1;
															
 
																 }
															
 
																+
															
 
																+void (*starpu_sched_ctx_get_sched_policy_init(unsigned sched_ctx_id))(void)
															
 
																+{
															
 
																+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																+	return sched_ctx->init_sched;
															
 
																+}
															
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -165,6 +165,9 @@ struct _starpu_sched_ctx
 
																 	   the threads to sleep in order to replace them with other threads or leave
															
 
																 	   them awake & use them in the parallel code*/
															
 
																 	unsigned awake_workers;
															
 
																+	
															
 
																+	/* function called when initializing the scheduler */
															
 
																+	void (*init_sched)();
															
 
																 };
															
 
																 struct _starpu_machine_config;
															
@@ -175,7 +178,7 @@ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config);
 
																 /* allocate all structures belonging to a context */
															
 
																 struct _starpu_sched_ctx*  _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name,
															
 
																 						    int min_prio_set, int min_prio,
															
 
																-						    int max_prio_set, int max_prio, unsigned awake_workers);
															
 
																+						    int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_init)(void));
															
 
																 /* delete all sched_ctx */
															
 
																 void _starpu_delete_all_sched_ctxs();
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -58,6 +58,7 @@ static struct starpu_sched_policy *predefined_policies[] =
 
																 	&_starpu_sched_dmda_sorted_decision_policy,
															
 
																 	&_starpu_sched_parallel_heft_policy,
															
 
																 	&_starpu_sched_peager_policy,
															
 
																+	&_starpu_sched_heteroprio_policy,
															
 
																 	NULL
															
 
																 };
															
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -71,6 +71,7 @@ extern struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy;
 
																 extern struct starpu_sched_policy _starpu_sched_eager_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_peager_policy;
															
 
																+extern struct starpu_sched_policy _starpu_sched_heteroprio_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_eager_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_prio_policy;
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1258,7 +1258,7 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
																 	if (!is_a_sink)
															
 
																 	{
															
 
																 		struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&config, config.conf->sched_policy_name);
															
 
																-		_starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", (config.conf->global_sched_ctx_min_priority != -1), config.conf->global_sched_ctx_min_priority, (config.conf->global_sched_ctx_min_priority != -1), config.conf->global_sched_ctx_max_priority, 1);
															
 
																+		_starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", (config.conf->global_sched_ctx_min_priority != -1), config.conf->global_sched_ctx_min_priority, (config.conf->global_sched_ctx_min_priority != -1), config.conf->global_sched_ctx_max_priority, 1, config.conf->sched_policy_init);
															
 
																 	}
															
 
																 	_starpu_initialize_registered_performance_models();
															
--- a/src/sched_policies/heteroprio.c
+++ b/src/sched_policies/heteroprio.c
@@ -0,0 +1,618 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2015  INRIA
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/* Distributed queues using performance modeling to assign tasks */
															
 
																+
															
 
																+#include <starpu_config.h>
															
 
																+#include <starpu_scheduler.h>
															
 
																+#include <schedulers/heteroprio.h>
															
 
																+
															
 
																+#include <common/fxt.h>
															
 
																+#include <core/task.h>
															
 
																+
															
 
																+#include <sched_policies/fifo_queues.h>
															
 
																+#include <limits.h>
															
 
																+
															
 
																+#ifdef HAVE_AYUDAME_H
															
 
																+#include <Ayudame.h>
															
 
																+#endif
															
 
																+
															
 
																+#ifndef DBL_MIN
															
 
																+#define DBL_MIN __DBL_MIN__
															
 
																+#endif
															
 
																+
															
 
																+#ifndef DBL_MAX
															
 
																+#define DBL_MAX __DBL_MAX__
															
 
																+#endif
															
 
																+
															
 
																+/* A bucket corresponds to a Pair of priorities
															
 
																+ * When a task is pushed with a priority X, it will be stored
															
 
																+ * into the bucket X.
															
 
																+ * All the tasks stored in the fifo should be computable by the arch
															
 
																+ * in valide_archs.
															
 
																+ * For example if valide_archs = (STARPU_CPU|STARPU_CUDA)
															
 
																+ * Then task->task->cl->where should be at least (STARPU_CPU|STARPU_CUDA)
															
 
																+ */
															
 
																+struct _heteroprio_bucket{
															
 
																+	/* The task of the current bucket */
															
 
																+	struct _starpu_fifo_taskq* tasks_queue;
															
 
																+	/* The correct arch for the current bucket */
															
 
																+	unsigned valide_archs;
															
 
																+	/* The slow factors for any archs */
															
 
																+	float slow_factors_per_index[FSTARPU_NB_TYPES];
															
 
																+	/* The base arch for the slow factor (the fatest arch for the current task in the bucket */
															
 
																+	unsigned factor_base_arch_index;
															
 
																+};
															
 
																+
															
 
																+/* Init a bucket */
															
 
																+static void _heteroprio_bucket_init(struct _heteroprio_bucket* bucket){
															
 
																+	memset(bucket, 0, sizeof(*bucket));
															
 
																+	bucket->tasks_queue =  _starpu_create_fifo();
															
 
																+}
															
 
																+
															
 
																+/* Release a bucket */
															
 
																+static void _heteroprio_bucket_release(struct _heteroprio_bucket* bucket){
															
 
																+	STARPU_ASSERT(_starpu_fifo_empty(bucket->tasks_queue) != 0);
															
 
																+	_starpu_destroy_fifo(bucket->tasks_queue);
															
 
																+}
															
 
																+
															
 
																+
															
 
																+/* A worker is mainly composed of a fifo for the tasks
															
 
																+ * and some direct access to worker properties.
															
 
																+ * The fifo is implemented with any array,
															
 
																+ * to read a task, access tasks_queue[tasks_queue_index]
															
 
																+ * to write a task, access tasks_queue[(tasks_queue_index+tasks_queue_size)%HETEROPRIO_MAX_PREFETCH]
															
 
																+ */
															
 
																+/* ANDRA_MODIF: can use starpu fifo + starpu sched_mutex*/
															
 
																+struct _heteroprio_worker_wrapper{
															
 
																+	unsigned arch_type;
															
 
																+	unsigned arch_index;
															
 
																+	struct _starpu_fifo_taskq *tasks_queue;
															
 
																+};
															
 
																+
															
 
																+struct _starpu_heteroprio_data
															
 
																+{
															
 
																+	starpu_pthread_mutex_t policy_mutex;
															
 
																+	struct starpu_bitmap *waiters;
															
 
																+	/* The bucket to store the tasks */
															
 
																+	struct _heteroprio_bucket buckets[HETEROPRIO_MAX_PRIO];
															
 
																+	/* The number of buckets for each arch */
															
 
																+	unsigned nb_prio_per_arch_index[FSTARPU_NB_TYPES];
															
 
																+	/* The mapping to the corresponding buckets */
															
 
																+	unsigned prio_mapping_per_arch_index[FSTARPU_NB_TYPES][HETEROPRIO_MAX_PRIO];
															
 
																+	/* The number of available tasks for a given arch (not prefetched) */
															
 
																+	unsigned nb_remaining_tasks_per_arch_index[FSTARPU_NB_TYPES];
															
 
																+	/* The total number of tasks in the bucket (not prefetched) */
															
 
																+	unsigned total_tasks_in_buckets;
															
 
																+	/* The total number of prefetched tasks for a given arch */
															
 
																+	unsigned nb_prefetched_tasks_per_arch_index[FSTARPU_NB_TYPES];
															
 
																+	/* The information for all the workers */
															
 
																+	struct _heteroprio_worker_wrapper workers_heteroprio[STARPU_NMAXWORKERS];
															
 
																+	/* The number of workers for a given arch */
															
 
																+	unsigned nb_workers_per_arch_index[FSTARPU_NB_TYPES];
															
 
																+};
															
 
																+
															
 
																+  
															
 
																+
															
 
																+/** Tell how many prio there are for a given arch */
															
 
																+void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned max_prio)
															
 
																+{
															
 
																+	
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	STARPU_ASSERT(max_prio < HETEROPRIO_MAX_PRIO);
															
 
																+
															
 
																+	hp->nb_prio_per_arch_index[arch] = max_prio;
															
 
																+
															
 
																+}
															
 
																+
															
 
																+ 
															
 
																+
															
 
																+/** Set the mapping for a given arch prio=>bucket */
															
 
																+inline void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned source_prio, unsigned dest_bucket_id)
															
 
																+{
															
 
																+
															
 
																+	STARPU_ASSERT(dest_bucket_id < HETEROPRIO_MAX_PRIO);
															
 
																+
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	hp->prio_mapping_per_arch_index[arch][source_prio] = dest_bucket_id;
															
 
																+
															
 
																+	hp->buckets[dest_bucket_id].valide_archs |= FStarPUTypesToArch[arch];
															
 
																+
															
 
																+}
															
 
																+
															
 
																+ 
															
 
																+
															
 
																+/** Tell which arch is the faster for the tasks of a bucket (optional) */
															
 
																+inline void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned bucket_id){
															
 
																+
															
 
																+	STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO);
															
 
																+
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	hp->buckets[bucket_id].factor_base_arch_index = arch;
															
 
																+
															
 
																+	hp->buckets[bucket_id].slow_factors_per_index[arch] = 0;
															
 
																+
															
 
																+}
															
 
																+
															
 
																+ 
															
 
																+
															
 
																+/** Tell how slow is a arch for the tasks of a bucket (optional) */
															
 
																+
															
 
																+inline void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum FStarPUTypes arch, unsigned bucket_id, float slow_factor){
															
 
																+
															
 
																+	STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO);
															
 
																+
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	hp->buckets[bucket_id].slow_factors_per_index[arch] = slow_factor;
															
 
																+
															
 
																+} 
															
 
																+
															
 
																+static void initialize_heteroprio_policy(unsigned sched_ctx_id)
															
 
																+{
															
 
																+#ifdef STARPU_HAVE_HWLOC
															
 
																+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_TREE);
															
 
																+#else
															
 
																+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
															
 
																+#endif
															
 
																+	/* Alloc the scheduler data  */
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)malloc(sizeof(struct _starpu_heteroprio_data));
															
 
																+	memset(hp, 0, sizeof(*hp));
															
 
																+
															
 
																+	hp->waiters = starpu_bitmap_create();
															
 
																+
															
 
																+	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hp);
															
 
																+
															
 
																+	STARPU_PTHREAD_MUTEX_INIT(&hp->policy_mutex, NULL);
															
 
																+
															
 
																+	unsigned idx_prio;
															
 
																+	for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio)
															
 
																+		_heteroprio_bucket_init(&hp->buckets[idx_prio]);
															
 
																+
															
 
																+	/* TODO call the callback */	
															
 
																+	void (*init_sched)(void) = starpu_sched_ctx_get_sched_policy_init(sched_ctx_id);
															
 
																+
															
 
																+	if(init_sched)
															
 
																+		init_sched();
															
 
																+
															
 
																+	/* Ensure that information have been correctly filled */
															
 
																+	unsigned check_all_archs[HETEROPRIO_MAX_PRIO];
															
 
																+	memset(check_all_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO);
															
 
																+	unsigned arch_index;
															
 
																+	for(arch_index = 0; arch_index < FSTARPU_NB_TYPES; ++arch_index)
															
 
																+	{
															
 
																+		STARPU_ASSERT(hp->nb_prio_per_arch_index[arch_index] <= HETEROPRIO_MAX_PRIO);
															
 
																+		
															
 
																+		unsigned check_archs[HETEROPRIO_MAX_PRIO];
															
 
																+		memset(check_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO);
															
 
																+		
															
 
																+		for(idx_prio = 0; idx_prio < hp->nb_prio_per_arch_index[arch_index]; ++idx_prio)
															
 
																+		{
															
 
																+			const unsigned mapped_prio = hp->prio_mapping_per_arch_index[arch_index][idx_prio];
															
 
																+			STARPU_ASSERT(mapped_prio <= HETEROPRIO_MAX_PRIO);
															
 
																+			STARPU_ASSERT(hp->buckets[mapped_prio].slow_factors_per_index[arch_index] >= 0.0);
															
 
																+			STARPU_ASSERT(hp->buckets[mapped_prio].valide_archs & FStarPUTypesToArch[arch_index]);
															
 
																+			check_archs[mapped_prio]      = 1;
															
 
																+			check_all_archs[mapped_prio] += 1;
															
 
																+		}
															
 
																+		for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio)
															
 
																+		{
															
 
																+			/* Ensure the current arch use a bucket or someone else can use it */
															
 
																+			STARPU_ASSERT(check_archs[idx_prio] == 1 || hp->buckets[idx_prio].valide_archs == 0
															
 
																+				      || (hp->buckets[idx_prio].valide_archs & ~FStarPUTypesToArch[arch_index]) != 0);
															
 
																+		}
															
 
																+	}
															
 
																+	/* Ensure that if a valide_archs = (STARPU_CPU|STARPU_CUDA) then check_all_archs[] = 2 for example */
															
 
																+	
															
 
																+	for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio)
															
 
																+	{
															
 
																+		unsigned nb_arch_on_bucket = 0;
															
 
																+		for(arch_index = 0; arch_index < FSTARPU_NB_TYPES; ++arch_index)
															
 
																+		{
															
 
																+			if(hp->buckets[idx_prio].valide_archs & FStarPUTypesToArch[arch_index])
															
 
																+			{
															
 
																+				nb_arch_on_bucket += 1;
															
 
																+			}
															
 
																+		}
															
 
																+		STARPU_ASSERT(check_all_archs[idx_prio] == nb_arch_on_bucket);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void deinitialize_heteroprio_policy(unsigned sched_ctx_id)
															
 
																+{
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	/* Ensure there are no more tasks */
															
 
																+	STARPU_ASSERT(hp->total_tasks_in_buckets == 0);
															
 
																+	unsigned arch_index;
															
 
																+	for(arch_index = 0; arch_index < FSTARPU_NB_TYPES; ++arch_index){
															
 
																+		STARPU_ASSERT(hp->nb_remaining_tasks_per_arch_index[arch_index] == 0);
															
 
																+		STARPU_ASSERT(hp->nb_prefetched_tasks_per_arch_index[arch_index] == 0);
															
 
																+	}
															
 
																+
															
 
																+	unsigned idx_prio;
															
 
																+	for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio){
															
 
																+		_heteroprio_bucket_release(&hp->buckets[idx_prio]);
															
 
																+	}
															
 
																+
															
 
																+	starpu_bitmap_destroy(hp->waiters);
															
 
																+
															
 
																+	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
															
 
																+	STARPU_PTHREAD_MUTEX_DESTROY(&hp->policy_mutex);
															
 
																+	free(hp);
															
 
																+}
															
 
																+
															
 
																+static void add_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
 
																+{
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	int workerid;
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < nworkers; i++)
															
 
																+	{
															
 
																+		workerid = workerids[i];
															
 
																+		memset(&hp->workers_heteroprio[workerid], 0, sizeof(hp->workers_heteroprio[workerid]));
															
 
																+		/* if the worker has alreadry belonged to this context
															
 
																+		   the queue and the synchronization variables have been already initialized */
															
 
																+		if(hp->workers_heteroprio[workerid].tasks_queue == NULL)
															
 
																+		{
															
 
																+			hp->workers_heteroprio[workerid].tasks_queue = _starpu_create_fifo();
															
 
																+			switch(starpu_worker_get_type(workerid))
															
 
																+			{
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+			case STARPU_CPU_WORKER:
															
 
																+				hp->workers_heteroprio[workerid].arch_type = STARPU_CPU;
															
 
																+				hp->workers_heteroprio[workerid].arch_index = FSTARPU_CPU_IDX;
															
 
																+				break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+			case STARPU_CUDA_WORKER:
															
 
																+				hp->workers_heteroprio[workerid].arch_type = STARPU_CUDA;
															
 
																+				hp->workers_heteroprio[workerid].arch_index = FSTARPU_CUDA_IDX;
															
 
																+				break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+			case STARPU_OPENCL_WORKER:
															
 
																+				hp->workers_heteroprio[workerid].arch_type = STARPU_OPENCL;
															
 
																+				hp->workers_heteroprio[workerid].arch_index = FSTARPU_OPENCL_IDX;
															
 
																+				break;
															
 
																+#endif
															
 
																+			default:
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																+		}
															
 
																+		hp->nb_workers_per_arch_index[hp->workers_heteroprio[workerid].arch_index]++;
															
 
																+
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void remove_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
 
																+{
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	int workerid;
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < nworkers; i++)
															
 
																+	{
															
 
																+		workerid = workerids[i];
															
 
																+		if(hp->workers_heteroprio[workerid].tasks_queue != NULL)
															
 
																+		{
															
 
																+			_starpu_destroy_fifo(hp->workers_heteroprio[workerid].tasks_queue);
															
 
																+			hp->workers_heteroprio[workerid].tasks_queue = NULL;
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+/* Push a new task (simply store it and update counters) */
															
 
																+static int push_task_heteroprio_policy(struct starpu_task *task)
															
 
																+{
															
 
																+	unsigned sched_ctx_id = task->sched_ctx;
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+
															
 
																+	/* One worker at a time use heteroprio */
															
 
																+	STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex);
															
 
																+
															
 
																+	/* Retrieve the correct bucket */
															
 
																+	STARPU_ASSERT(task->priority < HETEROPRIO_MAX_PRIO);
															
 
																+	struct _heteroprio_bucket* bucket = &hp->buckets[task->priority];
															
 
																+	/* Ensure that any worker that check that list can compute the task */
															
 
																+	STARPU_ASSERT(bucket->valide_archs
															
 
																+	       && ((bucket->valide_archs ^ task->cl->where) & bucket->valide_archs) == 0);
															
 
																+	/* save the task */
															
 
																+	_starpu_fifo_push_back_task(bucket->tasks_queue,task);
															
 
																+
															
 
																+	/* Inc counters */
															
 
																+	unsigned arch_index;
															
 
																+	for(arch_index = 0; arch_index < FSTARPU_NB_TYPES; ++arch_index)
															
 
																+	{
															
 
																+		/* We test the archs on the bucket and not on task->cl->where since it is restrictive */
															
 
																+		if(bucket->valide_archs & FStarPUTypesToArch[arch_index])
															
 
																+			hp->nb_remaining_tasks_per_arch_index[arch_index] += 1;
															
 
																+	}
															
 
																+
															
 
																+	hp->total_tasks_in_buckets += 1;
															
 
																+
															
 
																+	starpu_push_task_end(task);
															
 
																+
															
 
																+	/*if there are no tasks_queue block */
															
 
																+	/* wake people waiting for a task */
															
 
																+	unsigned worker = 0;
															
 
																+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																+
															
 
																+	struct starpu_sched_ctx_iterator it;
															
 
																+#ifndef STARPU_NON_BLOCKING_DRIVERS
															
 
																+	char dowake[STARPU_NMAXWORKERS] = { 0 };
															
 
																+#endif
															
 
																+
															
 
																+	workers->init_iterator(workers, &it);
															
 
																+	while(workers->has_next_master(workers, &it))
															
 
																+	{
															
 
																+		worker = workers->get_next_master(workers, &it);
															
 
																+
															
 
																+#ifdef STARPU_NON_BLOCKING_DRIVERS
															
 
																+		if (!starpu_bitmap_get(hp->waiters, worker))
															
 
																+			/* This worker is not waiting for a task */
															
 
																+			continue;
															
 
																+#endif
															
 
																+
															
 
																+		if (starpu_worker_can_execute_task_first_impl(worker, task, NULL))
															
 
																+		{
															
 
																+			/* It can execute this one, tell him! */
															
 
																+#ifdef STARPU_NON_BLOCKING_DRIVERS
															
 
																+			starpu_bitmap_unset(hp->waiters, worker);
															
 
																+			/* We really woke at least somebody, no need to wake somebody else */
															
 
																+			break;
															
 
																+#else
															
 
																+			dowake[worker] = 1;
															
 
																+#endif
															
 
																+		}
															
 
																+	}
															
 
																+	/* Let the task free */
															
 
																+	STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex);
															
 
																+
															
 
																+#ifndef STARPU_NON_BLOCKING_DRIVERS
															
 
																+	/* Now that we have a list of potential workers, try to wake one */
															
 
																+
															
 
																+	workers->init_iterator(workers, &it);
															
 
																+	while(workers->has_next(workers, &it))
															
 
																+	{
															
 
																+		worker = workers->get_next(workers, &it);
															
 
																+		if (dowake[worker])
															
 
																+			if (starpu_wake_worker(worker))
															
 
																+				break; // wake up a single worker
															
 
																+	}
															
 
																+#endif
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static struct starpu_task *pop_task_heteroprio_policy(unsigned sched_ctx_id)
															
 
																+{
															
 
																+	const unsigned workerid = starpu_worker_get_id();
															
 
																+	struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+	struct _heteroprio_worker_wrapper* worker = &hp->workers_heteroprio[workerid];
															
 
																+
															
 
																+	/* If no tasks available, no tasks in worker queue or some arch worker queue just return NULL */
															
 
																+	if ((hp->total_tasks_in_buckets == 0 || hp->nb_remaining_tasks_per_arch_index[worker->arch_index] == 0)
															
 
																+            && worker->tasks_queue->ntasks == 0 && hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] == 0){
															
 
																+		return NULL;
															
 
																+	}
															
 
																+
															
 
																+#ifdef STARPU_NON_BLOCKING_DRIVERS
															
 
																+	if (starpu_bitmap_get(hp->waiters, workerid)){
															
 
																+		/* Nobody woke us, avoid bothering the mutex */
															
 
																+		return NULL;
															
 
																+	}
															
 
																+#endif
															
 
																+/* TOTO beranger check this out */
															
 
																+	starpu_pthread_mutex_t *worker_sched_mutex;
															
 
																+	starpu_pthread_cond_t *worker_sched_cond;
															
 
																+	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
															
 
																+	
															
 
																+	
															
 
																+	/* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
															
 
																+	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
															
 
																+
															
 
																+	STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex);
															
 
																+
															
 
																+	/* keep track of the new added task to perfom real prefetch on node */
															
 
																+	unsigned nb_added_tasks = 0;
															
 
																+
															
 
																+	/* Check that some tasks are available for the current worker arch */
															
 
																+	if( hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0 ){
															
 
																+		/* Ideally we would like to fill the prefetch array */
															
 
																+		unsigned nb_tasks_to_prefetch = (HETEROPRIO_MAX_PREFETCH-worker->tasks_queue->ntasks);
															
 
																+		/* But there are maybe less tasks than that! */
															
 
																+		if(nb_tasks_to_prefetch > hp->nb_remaining_tasks_per_arch_index[worker->arch_index]){
															
 
																+			nb_tasks_to_prefetch = hp->nb_remaining_tasks_per_arch_index[worker->arch_index];
															
 
																+		}
															
 
																+		/* But in case there are less tasks than worker we take the minimum */
															
 
																+		if(hp->nb_remaining_tasks_per_arch_index[worker->arch_index] < starpu_sched_ctx_get_nworkers(sched_ctx_id)){
															
 
																+			if(worker->tasks_queue->ntasks == 0) 
															
 
																+				nb_tasks_to_prefetch = 1;
															
 
																+			else 
															
 
																+				nb_tasks_to_prefetch = 0;
															
 
																+		}
															
 
																+
															
 
																+		nb_added_tasks = nb_tasks_to_prefetch;
															
 
																+
															
 
																+		unsigned idx_prio, arch_index;
															
 
																+		/* We iterate until we found all the tasks we need */
															
 
																+		for(idx_prio = 0; nb_tasks_to_prefetch && idx_prio < hp->nb_prio_per_arch_index[worker->arch_index]; ++idx_prio)
															
 
																+		{
															
 
																+			/* Retrieve the bucket using the mapping */
															
 
																+			struct _heteroprio_bucket* bucket = &hp->buckets[hp->prio_mapping_per_arch_index[worker->arch_index][idx_prio]];
															
 
																+			/* Ensure we can compute task from this bucket */
															
 
																+			STARPU_ASSERT(bucket->valide_archs & worker->arch_type);
															
 
																+			/* Take nb_tasks_to_prefetch tasks if possible */
															
 
																+			while(!_starpu_fifo_empty(bucket->tasks_queue) && nb_tasks_to_prefetch && 
															
 
																+			      (bucket->factor_base_arch_index == 0 || 
															
 
																+			       worker->arch_index == bucket->factor_base_arch_index || 
															
 
																+			       (((float)bucket->tasks_queue->ntasks)/((float)hp->nb_workers_per_arch_index[bucket->factor_base_arch_index])) >= bucket->slow_factors_per_index[worker->arch_index]))
															
 
																+			{
															
 
																+				struct starpu_task* task = _starpu_fifo_pop_local_task(bucket->tasks_queue);
															
 
																+				STARPU_ASSERT(starpu_worker_can_execute_task(workerid, task, 0));
															
 
																+				/* Save the task */
															
 
																+				_starpu_fifo_push_task(worker->tasks_queue, task);
															
 
																+
															
 
																+				/* Update general counter */
															
 
																+				hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] += 1;
															
 
																+				hp->total_tasks_in_buckets -= 1;
															
 
																+				
															
 
																+				for(arch_index = 0; arch_index < FSTARPU_NB_TYPES; ++arch_index){
															
 
																+					/* We test the archs on the bucket and not on task->cl->where since it is restrictive */
															
 
																+					if(bucket->valide_archs & FStarPUTypesToArch[arch_index]){
															
 
																+						hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1;
															
 
																+					}
															
 
																+				}
															
 
																+				/* Decrease the number of tasks to found */
															
 
																+				nb_tasks_to_prefetch -= 1;
															
 
																+				// TODO starpu_prefetch_task_input_on_node(task, workerid);
															
 
																+			}
															
 
																+		}
															
 
																+		STARPU_ASSERT_MSG(nb_tasks_to_prefetch == 0, "but %d and worker %d \n", nb_tasks_to_prefetch, workerid);
															
 
																+	}
															
 
																+
															
 
																+	struct starpu_task* task = NULL;
															
 
																+
															
 
																+	/* The worker has some tasks in its queue */
															
 
																+	if(worker->tasks_queue->ntasks){
															
 
																+		task = _starpu_fifo_pop_task(worker->tasks_queue, workerid);
															
 
																+		hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] -= 1;
															
 
																+	}
															
 
																+	/* Otherwise look if we can steal some work */
															
 
																+	else if(hp->nb_prefetched_tasks_per_arch_index[worker->arch_index]){
															
 
																+		/* If HETEROPRIO_MAX_PREFETCH==1 it should not be possible to steal work */
															
 
																+		STARPU_ASSERT(HETEROPRIO_MAX_PREFETCH != 1);
															
 
																+		
															
 
																+		struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																+
															
 
																+		struct starpu_sched_ctx_iterator it;
															
 
																+
															
 
																+		workers->init_iterator(workers, &it);
															
 
																+		unsigned victim = workerid;
															
 
																+		unsigned current_worker;
															
 
																+		while(workers->has_next_master(workers, &it))
															
 
																+		{
															
 
																+			current_worker = workers->get_next_master(workers, &it);
															
 
																+			if(current_worker == victim)
															
 
																+				break;
															
 
																+		}
															
 
																+		
															
 
																+		/* circular loop */
															
 
																+		while(1)
															
 
																+		{
															
 
																+			while(workers->has_next_master(workers, &it))
															
 
																+			{
															
 
																+				victim = workers->get_next_master(workers, &it);
															
 
																+				if(victim == workerid)
															
 
																+					continue;
															
 
																+		
															
 
																+				/* If it is the same arch and there is a task to steal */
															
 
																+				if(hp->workers_heteroprio[victim].arch_index == worker->arch_index
															
 
																+				   && hp->workers_heteroprio[victim].tasks_queue->ntasks){
															
 
																+					starpu_pthread_mutex_t *victim_sched_mutex;
															
 
																+					starpu_pthread_cond_t *victim_sched_cond;
															
 
																+					starpu_worker_get_sched_condition(victim, &victim_sched_mutex, &victim_sched_cond);
															
 
																+
															
 
																+					/* ensure the worker is not currently prefetching its data */
															
 
																+					STARPU_PTHREAD_MUTEX_LOCK(victim_sched_mutex);
															
 
																+
															
 
																+					if(hp->workers_heteroprio[victim].arch_index == worker->arch_index
															
 
																+					   && hp->workers_heteroprio[victim].tasks_queue->ntasks){
															
 
																+						/* steal the last added task */
															
 
																+						task = starpu_task_list_pop_back(&hp->workers_heteroprio[victim].tasks_queue->taskq);
															
 
																+						/* we steal a task update global counter */
															
 
																+						hp->nb_prefetched_tasks_per_arch_index[hp->workers_heteroprio[victim].arch_index] -= 1;
															
 
																+						
															
 
																+						STARPU_PTHREAD_MUTEX_UNLOCK(victim_sched_mutex);
															
 
																+						break;
															
 
																+					}
															
 
																+					STARPU_PTHREAD_MUTEX_UNLOCK(victim_sched_mutex);
															
 
																+				}
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	if (!task){
															
 
																+		/* Tell pushers that we are waiting for tasks_queue for us */
															
 
																+		starpu_bitmap_set(hp->waiters, workerid);
															
 
																+	}
															
 
																+	STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex);
															
 
																+	STARPU_PTHREAD_MUTEX_LOCK(worker_sched_mutex);
															
 
																+
															
 
																+	if(task){
															
 
																+		unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id);
															
 
																+		if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS){
															
 
																+			starpu_sched_ctx_move_task_to_ctx(task, child_sched_ctx);
															
 
																+			starpu_sched_ctx_revert_task_counters(sched_ctx_id, task->flops);
															
 
																+			return NULL;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	/* if we have task (task) me way have some in the queue (worker->tasks_queue_size) that was freshly addeed (nb_added_tasks) */
															
 
																+	if(task && worker->tasks_queue->ntasks && nb_added_tasks && starpu_get_prefetch_flag()){
															
 
																+		const unsigned memory_node = starpu_worker_get_memory_node(workerid);
															
 
																+
															
 
																+		/* prefetch the new task that I own but protecte my node from work stealing during the prefetch */
															
 
																+//		STARPU_PTHREAD_MUTEX_LOCK(&worker->ws_prefetch_mutex);
															
 
																+		/*already protected - the lock of the worker is taken */
															
 
																+
															
 
																+		/* prefetch task but stop in case we now some one may steal a task from us */
															
 
																+		/* while(nb_added_tasks && hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0){ */
															
 
																+		/* 	/\* prefetch from closest to end task *\/ */
															
 
																+		/* 	starpu_prefetch_task_input_on_node(worker->tasks_queue[(worker->tasks_queue_index+worker->tasks_queue_size-nb_added_tasks)%HETEROPRIO_MAX_PREFETCH], memory_node); */
															
 
																+		/* 	nb_added_tasks -= 1; */
															
 
																+		/* } */
															
 
																+
															
 
																+/* TOTO beranger check this out - is this how you planned to prefetch tasks ? */
															
 
																+		struct starpu_task *task_to_prefetch = NULL;
															
 
																+		for (task_to_prefetch  = starpu_task_list_begin(&worker->tasks_queue->taskq);
															
 
																+		     (task_to_prefetch != starpu_task_list_end(&worker->tasks_queue->taskq) && 
															
 
																+		      nb_added_tasks && hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0);
															
 
																+		     task_to_prefetch  = starpu_task_list_next(task_to_prefetch))
															
 
																+		{
															
 
																+			/* prefetch from closest to end task */
															
 
																+			starpu_prefetch_task_input_on_node(task_to_prefetch, memory_node);
															
 
																+			nb_added_tasks -= 1;
															
 
																+		}
															
 
																+
															
 
																+
															
 
																+//		STARPU_PTHREAD_MUTEX_UNLOCK(&worker->ws_prefetch_mutex);
															
 
																+	}
															
 
																+
															
 
																+	return task;
															
 
																+}
															
 
																+
															
 
																+struct starpu_sched_policy _starpu_sched_heteroprio_policy =
															
 
																+{
															
 
																+        .init_sched = initialize_heteroprio_policy,
															
 
																+        .deinit_sched = deinitialize_heteroprio_policy,
															
 
																+        .add_workers = add_workers_heteroprio_policy,
															
 
																+        .remove_workers = remove_workers_heteroprio_policy,
															
 
																+        .push_task = push_task_heteroprio_policy,
															
 
																+	.simulate_push_task = NULL,
															
 
																+        .push_task_notify = NULL,
															
 
																+	.pop_task = pop_task_heteroprio_policy,
															
 
																+	.pre_exec_hook = NULL,
															
 
																+        .post_exec_hook = NULL,
															
 
																+	.pop_every_task = NULL,
															
 
																+        .policy_name = "heteroprio",
															
 
																+        .policy_description = "heteroprio"
															
 
																+};