11 years ago · 6fd68c0e07
--- a/examples/sched_ctx/parallel_code.c
+++ b/examples/sched_ctx/parallel_code.c
@@ -16,23 +16,49 @@
 
																  */
															
 
																 #include <starpu.h>
															
 
																+#include <omp.h>
															
 
																 #ifdef STARPU_QUICK_CHECK
															
 
																 #define NTASKS 64
															
 
																 #else
															
 
																-#define NTASKS 1000
															
 
																+#define NTASKS 10
															
 
																 #endif
															
 
																-int tasks_executed = 0;
															
 
																+int tasks_executed[2];
															
 
																 starpu_pthread_mutex_t mut;
															
 
																-static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
															
 
																+int parallel_code(int sched_ctx)
															
 
																 {
															
 
																-	starpu_pthread_mutex_lock(&mut);
															
 
																-	tasks_executed++;
															
 
																-	starpu_pthread_mutex_unlock(&mut);
															
 
																+	int i;
															
 
																+	int t = 0;
															
 
																+	int *cpuids = NULL;
															
 
																+	int ncpuids = 0;
															
 
																+	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
															
 
																+
															
 
																+//	printf("execute task of %d threads \n", ncpuids);
															
 
																+	omp_set_nested(1);
															
 
																+#pragma omp parallel num_threads(1)
															
 
																+	{
															
 
																+#pragma omp parallel num_threads(ncpuids)
															
 
																+		{
															
 
																+			starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
															
 
																+// 			printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), sched_ctx, omp_get_num_threads());
															
 
																+#pragma omp for
															
 
																+			for(i = 0; i < NTASKS; i++)
															
 
																+				t++;
															
 
																+		}
															
 
																+	}
															
 
																+	free(cpuids);
															
 
																+	return t;
															
 
																+}
															
 
																+
															
 
																+static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
															
 
																+{
															
 
																+	unsigned sched_ctx = (unsigned)arg;
															
 
																+	tasks_executed[sched_ctx-1] = parallel_code(sched_ctx);
															
 
																 }
															
 
																+
															
 
																 static struct starpu_codelet sched_ctx_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {sched_ctx_func, NULL},
															
@@ -43,15 +69,10 @@ static struct starpu_codelet sched_ctx_codelet =
 
																 	.name = "sched_ctx"
															
 
																 };
															
 
																-int parallel_code(int nprocs)
															
 
																+void *th(void* p)
															
 
																 {
															
 
																-	int i;
															
 
																-	int tasks = 0;
															
 
																-#pragma omp parallel for num_threads(nprocs)
															
 
																-	for (i = 0; i < NTASKS; i++) 
															
 
																-		tasks++;
															
 
																-
															
 
																-	return tasks;
															
 
																+	unsigned sched_ctx = (unsigned)p;
															
 
																+	tasks_executed[sched_ctx-1] = (int)starpu_sched_ctx_exec_parallel_code((void*)parallel_code, (void*)sched_ctx, sched_ctx); 
															
 
																 }
															
 
																 int main(int argc, char **argv)
															
@@ -67,12 +88,12 @@ int main(int argc, char **argv)
 
																 	starpu_pthread_mutex_init(&mut, NULL);
															
 
																 	int nprocs1 = 1;
															
 
																 	int nprocs2 = 1;
															
 
																-	int procs1[20], procs2[20];
															
 
																-	procs1[0] = 0;
															
 
																-	procs2[0] = 0;
															
 
																+	int *procs1, *procs2;
															
 
																 #ifdef STARPU_USE_CPU
															
 
																 	unsigned ncpus =  starpu_cpu_worker_get_count();
															
 
																+	procs1 = (int*)malloc(ncpus*sizeof(int));
															
 
																+	procs2 = (int*)malloc(ncpus*sizeof(int));
															
 
																 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
															
 
																 	nprocs1 = ncpus/2;
															
@@ -80,14 +101,60 @@ int main(int argc, char **argv)
 
																 	int j, k = 0;
															
 
																 	for(j = nprocs1; j < nprocs1+nprocs2; j++)
															
 
																 		procs2[k++] = j;
															
 
																+#else
															
 
																+	procs1 = (int*)malloc(nprocs1*sizeof(int));
															
 
																+	procs2 = (int*)malloc(nprocs2*sizeof(int));
															
 
																+	procs1[0] = 0:
															
 
																+	procs2[0] = 0:
															
 
																+
															
 
																 #endif
															
 
																+	int p;
															
 
																+	for(p = 0; p <nprocs1; p++)
															
 
																+		printf("w %d in ctx 1 \n", procs1[p]);
															
 
																+
															
 
																+	for(p = 0; p <nprocs2; p++)
															
 
																+		printf("w %d in ctx 2 \n", procs2[p]);
															
 
																+
															
 
																 	/*create contexts however you want*/
															
 
																 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0);
															
 
																 	unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0);
															
 
																 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
															
 
																-	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);
															
 
																+//	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);
															
 
																+
															
 
																+	int nprocs3 = nprocs1/2;
															
 
																+	int nprocs4 = nprocs1/2;
															
 
																+	int nprocs5 = nprocs2/2;
															
 
																+	int nprocs6 = nprocs2/2;
															
 
																+	int procs3[nprocs3];
															
 
																+	int procs4[nprocs4];
															
 
																+	int procs5[nprocs5];
															
 
																+	int procs6[nprocs6];
															
 
																+
															
 
																+	k = 0;
															
 
																+	for(j = 0; j < nprocs3; j++)
															
 
																+		procs3[k++] = procs1[j];
															
 
																+	k = 0;
															
 
																+	for(j = nprocs3; j < nprocs3+nprocs4; j++)
															
 
																+		procs4[k++] = procs1[j];
															
 
																+
															
 
																+	k = 0;
															
 
																+	for(j = 0; j < nprocs5; j++)
															
 
																+		procs5[k++] = procs2[j];
															
 
																+	k = 0;
															
 
																+	for(j = nprocs5; j < nprocs5+nprocs6; j++)
															
 
																+		procs6[k++] = procs2[j];
															
 
																+
															
 
																+	int master3 = starpu_sched_ctx_book_workers_for_task(sched_ctx1, procs3, nprocs3);
															
 
																+	int master4 = starpu_sched_ctx_book_workers_for_task(sched_ctx1, procs4, nprocs4);
															
 
																+
															
 
																+	int master5 = starpu_sched_ctx_book_workers_for_task(sched_ctx2, procs5, nprocs5);
															
 
																+	int master6 = starpu_sched_ctx_book_workers_for_task(sched_ctx2, procs6, nprocs6);
															
 
																+
															
 
																+/* 	int master1 = starpu_sched_ctx_book_workers_for_task(sched_ctx1, procs1, nprocs1); */
															
 
																+/* 	int master2 = starpu_sched_ctx_book_workers_for_task(sched_ctx2, procs2, nprocs2); */
															
 
																+
															
 
																 	int i;
															
 
																 	for (i = 0; i < ntasks; i++)
															
@@ -95,7 +162,7 @@ int main(int argc, char **argv)
 
																 		struct starpu_task *task = starpu_task_create();
															
 
																 		task->cl = &sched_ctx_codelet;
															
 
																-		task->cl_arg = NULL;
															
 
																+		task->cl_arg = sched_ctx1;
															
 
																 		/*submit tasks to context*/
															
 
																 		ret = starpu_task_submit_to_ctx(task,sched_ctx1);
															
@@ -103,23 +170,49 @@ int main(int argc, char **argv)
 
																 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	}
															
 
																+	for (i = 0; i < ntasks; i++)
															
 
																+	{
															
 
																+		struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+		task->cl = &sched_ctx_codelet;
															
 
																+		task->cl_arg = sched_ctx2;
															
 
																+
															
 
																+		/*submit tasks to context*/
															
 
																+		ret = starpu_task_submit_to_ctx(task,sched_ctx2);
															
 
																+
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+
															
 
																 	/* tell starpu when you finished submitting tasks to this context
															
 
																 	   in order to allow moving resources from this context to the inheritor one
															
 
																 	   when its corresponding tasks finished executing */
															
 
																-	starpu_sched_ctx_finished_submit(sched_ctx1);
															
 
																-	/* execute an openmp code */
															
 
																-	int ret_ntasks = (int)starpu_sched_ctx_exec_parallel_code((void*)parallel_code, (void*)nprocs2, sched_ctx2);
															
 
																-	starpu_sched_ctx_finished_submit(sched_ctx2);
															
 
																 	/* wait for all tasks at the end*/
															
 
																 	starpu_task_wait_for_all();
															
 
																+/* 	starpu_sched_ctx_unbook_workers_for_task(sched_ctx1, master1); */
															
 
																+/* 	starpu_sched_ctx_unbook_workers_for_task(sched_ctx2, master2); */
															
 
																+
															
 
																+	starpu_sched_ctx_unbook_workers_for_task(sched_ctx1, master3);
															
 
																+	starpu_sched_ctx_unbook_workers_for_task(sched_ctx1, master4);
															
 
																+
															
 
																+	starpu_sched_ctx_unbook_workers_for_task(sched_ctx2, master5);
															
 
																+	starpu_sched_ctx_unbook_workers_for_task(sched_ctx2, master6);
															
 
																+
															
 
																+	pthread_t mp[2];
															
 
																+	pthread_create(&mp[0], NULL, th, sched_ctx1);
															
 
																+	pthread_create(&mp[1], NULL, th, sched_ctx2);
															
 
																+
															
 
																+	pthread_join(mp[0], NULL);
															
 
																+	pthread_join(mp[1], NULL);
															
 
																+
															
 
																 	starpu_sched_ctx_delete(sched_ctx1);
															
 
																 	starpu_sched_ctx_delete(sched_ctx2);
															
 
																-	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed, ntasks);
															
 
																-	printf("ctx%d: tasks openmp executed %d out of %d\n", sched_ctx2, ret_ntasks, NTASKS);
															
 
																+	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS);
															
 
																+	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS);
															
 
																 	starpu_shutdown();
															
 
																 	return 0;
															
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -116,6 +116,15 @@ void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ct
 
																 void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority);
															
 
																 unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
															
 
																+
															
 
																+void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids);
															
 
																+
															
 
																+void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid);
															
 
																+
															
 
																+int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers);
															
 
																+
															
 
																+void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master);
															
 
																+
															
 
																 #ifdef STARPU_USE_SC_HYPERVISOR
															
 
																 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
															
 
																 #endif /* STARPU_USE_SC_HYPERVISOR */
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -219,23 +219,22 @@ static void _starpu_sched_ctx_free_scheduling_data(struct _starpu_sched_ctx *sch
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_ctx)
															
 
																 {
															
 
																-	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
															
 
																 	sched_ctx->hwloc_workers_set = hwloc_bitmap_alloc();
															
 
																 	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																-	int worker;
															
 
																+	struct _starpu_worker *worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																 	if(workers->init_iterator)
															
 
																 		workers->init_iterator(workers, &it);
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																-		worker = workers->get_next(workers, &it);
															
 
																-		if(!starpu_worker_is_combined_worker(worker))
															
 
																+		worker = _starpu_get_worker_struct(workers->get_next(workers, &it));
															
 
																+		if(!starpu_worker_is_combined_worker(worker->workerid))
															
 
																 		{
															
 
																 			hwloc_bitmap_or(sched_ctx->hwloc_workers_set,
															
 
																 					sched_ctx->hwloc_workers_set,
															
 
																-					config->workers[worker].hwloc_cpu_set);
															
 
																+					worker->hwloc_cpu_set);
															
 
																 		}
															
 
																 	}
															
@@ -1496,7 +1495,7 @@ static void _starpu_sched_ctx_bind_thread_to_ctx_cpus(unsigned sched_ctx_id)
 
																         {
															
 
																 		hwloc_bitmap_t set = sched_ctx->hwloc_workers_set;
															
 
																                 int ret;
															
 
																-		
															
 
																+		int current_worker_id = starpu_worker_get_id();
															
 
																                 ret = hwloc_set_cpubind (config->topology.hwtopology, set,
															
 
																                                          HWLOC_CPUBIND_THREAD);
															
 
																 		if (ret)
															
@@ -1509,10 +1508,11 @@ static void _starpu_sched_ctx_bind_thread_to_ctx_cpus(unsigned sched_ctx_id)
 
																 #else
															
 
																 #warning no sched ctx CPU binding support
															
 
																 #endif
															
 
																+
															
 
																 	return;
															
 
																 }
															
 
																-void _starpu_sched_ctx_rebind_thread_to_its_cpu(unsigned cpuid)
															
 
																+void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid)
															
 
																 {
															
 
																 	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
@@ -1527,7 +1527,7 @@ void _starpu_sched_ctx_rebind_thread_to_its_cpu(unsigned cpuid)
 
																 	if (support->cpubind->set_thisthread_cpubind)
															
 
																 	{
															
 
																 		hwloc_obj_t obj = hwloc_get_obj_by_depth (config->topology.hwtopology,
															
 
																-							  config->cpu_depth, cpuid);
															
 
																+							  config->pu_depth, cpuid);
															
 
																 		hwloc_bitmap_t set = obj->cpuset;
															
 
																 		int ret;
															
@@ -1569,27 +1569,30 @@ void _starpu_sched_ctx_rebind_thread_to_its_cpu(unsigned cpuid)
 
																 }
															
 
																-static void _starpu_sched_ctx_get_workers_to_sleep(unsigned sched_ctx_id)
															
 
																+static void _starpu_sched_ctx_get_workers_to_sleep(unsigned sched_ctx_id, int *workerids, int nworkers, int master)
															
 
																 {
															
 
																+	int current_worker_id = starpu_worker_get_id();
															
 
																+	
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																-	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																-	struct starpu_sched_ctx_iterator it;
															
 
																+	int w;
															
 
																 	struct _starpu_worker *worker = NULL;
															
 
																-	if(workers->init_iterator)
															
 
																-		workers->init_iterator(workers, &it);
															
 
																-
															
 
																-	while(workers->has_next(workers, &it))
															
 
																+	for(w = 0; w < nworkers; w++)
															
 
																 	{
															
 
																-		worker = _starpu_get_worker_struct(workers->get_next(workers, &it));
															
 
																-		STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																+		worker = _starpu_get_worker_struct(workerids[w]);
															
 
																+		worker->master = master;
															
 
																+		if(current_worker_id == -1 || worker->workerid != current_worker_id)
															
 
																+			STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																 		worker->parallel_sect = 1;
															
 
																-		STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																+		if(current_worker_id == -1 || worker->workerid != current_worker_id)
															
 
																+			STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																 	}
															
 
																-	while(workers->has_next(workers, &it))
															
 
																+	int workerid;
															
 
																+	for(w = 0; w < nworkers; w++)
															
 
																 	{
															
 
																-		workers->get_next(workers, &it);
															
 
																-		sem_wait(&sched_ctx->parallel_code_sem);
															
 
																+		workerid = workerids[w];
															
 
																+		if(current_worker_id == -1 || workerid != current_worker_id)
															
 
																+			sem_wait(&sched_ctx->parallel_code_sem);
															
 
																 	}
															
 
																 	return;
															
 
																 }
															
@@ -1608,30 +1611,41 @@ void _starpu_sched_ctx_signal_worker_blocked(int workerid)
 
																 	return;
															
 
																 }
															
 
																-static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id)
															
 
																+static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id, int master)
															
 
																 {
															
 
																+	int current_worker_id = starpu_worker_get_id();
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																-
															
 
																 	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																-	struct starpu_sched_ctx_iterator it;
															
 
																 	struct _starpu_worker *worker = NULL;
															
 
																+
															
 
																+	struct starpu_sched_ctx_iterator it;
															
 
																 	if(workers->init_iterator)
															
 
																 		workers->init_iterator(workers, &it);
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = _starpu_get_worker_struct(workers->get_next(workers, &it));
															
 
																-		STARPU_PTHREAD_MUTEX_LOCK(&worker->parallel_sect_mutex);
															
 
																-		STARPU_PTHREAD_COND_SIGNAL(&worker->parallel_sect_cond);
															
 
																-		STARPU_PTHREAD_MUTEX_UNLOCK(&worker->parallel_sect_mutex);
															
 
																+		if(worker->master == master)
															
 
																+		{
															
 
																+			if(current_worker_id == -1 || worker->workerid != current_worker_id)
															
 
																+			{
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&worker->parallel_sect_mutex);
															
 
																+				STARPU_PTHREAD_COND_SIGNAL(&worker->parallel_sect_cond);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&worker->parallel_sect_mutex);
															
 
																+			}
															
 
																+			else
															
 
																+				worker->parallel_sect = 0;
															
 
																+			worker->master = -1;
															
 
																+		}
															
 
																 	}
															
 
																 	return;
															
 
																 }
															
 
																 void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, unsigned sched_ctx_id)
															
 
																 {
															
 
																-	/* get starpu workers to sleep */
															
 
																-	_starpu_sched_ctx_get_workers_to_sleep(sched_ctx_id);
															
 
																+	int *workerids;
															
 
																+	int nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
															
 
																+	int master = starpu_sched_ctx_book_workers_for_task(sched_ctx_id, workerids, nworkers);
															
 
																 	/* bind current thread on all workers of the context */
															
 
																 	_starpu_sched_ctx_bind_thread_to_ctx_cpus(sched_ctx_id);
															
@@ -1640,7 +1654,82 @@ void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, uns
 
																 	void* ret = func(param);
															
 
																 	/* wake up starpu workers */
															
 
																-	_starpu_sched_ctx_wake_up_workers(sched_ctx_id);
															
 
																+	starpu_sched_ctx_unbook_workers_for_task(sched_ctx_id, master);
															
 
																 	return ret;
															
 
																 }
															
 
																+
															
 
																+void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids)
															
 
																+{
															
 
																+	int current_worker_id = starpu_worker_get_id();
															
 
																+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																+	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																+
															
 
																+	(*cpuids) = (int*)malloc(workers->nworkers*sizeof(int));
															
 
																+	int w = 0;
															
 
																+
															
 
																+	struct _starpu_worker *worker = NULL;
															
 
																+	struct starpu_sched_ctx_iterator it;
															
 
																+	int workerid;
															
 
																+	if(workers->init_iterator)
															
 
																+		workers->init_iterator(workers, &it);
															
 
																+
															
 
																+	while(workers->has_next(workers, &it))
															
 
																+	{
															
 
																+		workerid = workers->get_next(workers, &it);
															
 
																+		worker = _starpu_get_worker_struct(workerid);
															
 
																+		if(worker->master == current_worker_id || workerid == current_worker_id)
															
 
																+			(*cpuids)[w++] = starpu_worker_get_bindid(workerid);
															
 
																+	}
															
 
																+	*ncpuids = w;
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																+int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers)
															
 
																+{
															
 
																+	int current_worker_id = starpu_worker_get_id();
															
 
																+
															
 
																+	int final_workerids[nworkers];
															
 
																+	int nfinal_workerids = 0;
															
 
																+	int w;
															
 
																+	int master = -1;
															
 
																+	for(w = 0; w < nworkers; w++)
															
 
																+	{
															
 
																+		if(current_worker_id == -1)
															
 
																+		{
															
 
																+			final_workerids[nfinal_workerids++] = workerids[w];
															
 
																+			if(nfinal_workerids == nworkers - 1)
															
 
																+			{
															
 
																+				master = workerids[nfinal_workerids];
															
 
																+				break;
															
 
																+			}
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			if(workerids[w] != current_worker_id)
															
 
																+				final_workerids[nfinal_workerids++] = workerids[w];
															
 
																+			else
															
 
																+			{
															
 
																+				if(nfinal_workerids == nworkers - 1)
															
 
																+				{
															
 
																+					master = workerids[nfinal_workerids];
															
 
																+					break;
															
 
																+				}
															
 
																+				else
															
 
																+					master = current_worker_id;
															
 
																+			}	
															
 
																+		}
															
 
																+	}
															
 
																+	/* get starpu workers to sleep */
															
 
																+	_starpu_sched_ctx_get_workers_to_sleep(sched_ctx_id, final_workerids, nfinal_workerids, master);
															
 
																+
															
 
																+	/* bind current thread on all workers of the context */
															
 
																+//	_starpu_sched_ctx_bind_thread_to_ctx_cpus(sched_ctx_id);
															
 
																+	return master;
															
 
																+}
															
 
																+
															
 
																+void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master)
															
 
																+{
															
 
																+	/* wake up starpu workers */
															
 
																+	_starpu_sched_ctx_wake_up_workers(sched_ctx_id, master);
															
 
																+}
															
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -180,9 +180,6 @@ starpu_pthread_rwlock_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched
 
																    (if it is the last one awake in a context he should better keep awake) */
															
 
																 unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker);
															
 
																-/*rebind each thread on its cpu after finishing a parallel code */
															
 
																-void _starpu_sched_ctx_rebind_thread_to_its_cpu(unsigned cpuid);
															
 
																-
															
 
																 /* let the appl know that the worker blocked to execute parallel code */
															
 
																 void _starpu_sched_ctx_signal_worker_blocked(int workerid);
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -269,6 +269,7 @@ static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch,
 
																 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																+	if(config.workers[workerid].parallel_sect) return 0;
															
 
																 	/* TODO: check that the task operand sizes will fit on that device */
															
 
																 	return (task->cl->where & config.workers[workerid].worker_mask) &&
															
 
																 		_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
															
@@ -448,6 +449,7 @@ static void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu
 
																 	workerarg->reverse_phase[1] = 0;
															
 
																 	workerarg->pop_ctx_priority = 1;
															
 
																 	workerarg->sched_mutex_locked = 0;
															
 
																+	workerarg->master = -1;
															
 
																 	/* cpu_set/hwloc_cpu_set initialized in topology.c */
															
 
																 }
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -117,6 +117,8 @@ LIST_TYPE(_starpu_worker,
 
																 	/* flag to know if sched_mutex is locked or not */
															
 
																 	unsigned sched_mutex_locked;
															
 
																+	
															
 
																+	int master;
															
 
																 #ifdef __GLIBC__
															
 
																 	cpu_set_t cpu_set;
															
 
																 #endif /* __GLIBC__ */
															
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -203,7 +203,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 
																 		STARPU_PTHREAD_MUTEX_LOCK(&args->parallel_sect_mutex);
															
 
																 		_starpu_sched_ctx_signal_worker_blocked(args->workerid);
															
 
																 		STARPU_PTHREAD_COND_WAIT(&args->parallel_sect_cond, &args->parallel_sect_mutex);
															
 
																-		_starpu_sched_ctx_rebind_thread_to_its_cpu(args->bindid);
															
 
																+		starpu_sched_ctx_bind_current_thread_to_cpuid(args->bindid);
															
 
																 		STARPU_PTHREAD_MUTEX_UNLOCK(&args->parallel_sect_mutex);
															
 
																 		args->parallel_sect = 0;
															
 
																 	}
															
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -719,10 +719,6 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
																 		while(workers->has_next(workers, &it))
															
 
																 		{
															
 
																 			worker = workers->get_next(workers, &it);
															
 
																-			if (worker >= nworkers_ctx)
															
 
																-				/* This is a just-added worker, discard it */
															
 
																-				continue;
															
 
																-
															
 
																 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 			{
															
 
																 				if (!starpu_worker_can_execute_task(worker, task, nimpl))
															
@@ -730,8 +726,6 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
																 					/* no one on that queue may execute this task */
															
 
																 					continue;
															
 
																 				}
															
 
																-
															
 
																-
															
 
																 				fitness[worker_ctx][nimpl] = dt->alpha*(exp_end[worker_ctx][nimpl] - best_exp_end)
															
 
																 					+ dt->beta*(local_data_penalty[worker_ctx][nimpl])
															
 
																 					+ dt->_gamma*(local_power[worker_ctx][nimpl]);