5 years ago · 8cd00d737f
--- a/ChangeLog
+++ b/ChangeLog
@@ -43,6 +43,7 @@ New features:
 
																   * starpufft: Support 3D.
															
 
																   * New modular-eager-prio scheduler.
															
 
																   * Add 'ready' heuristic to modular schedulers.
															
 
																+  * New modular-heteroprio scheduler.
															
 
																 Changes:
															
 
																   * Modification in the Native Fortran interface of the functions
															
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -238,6 +238,10 @@ modular-heft-prio is similar to \c modular-heft, but only decides the memory
 
																 node, not the exact worker, just pushing tasks to one central queue per memory
															
 
																 node.
															
 
																+- Heteroprio Scheduler: \n
															
 
																+Maps tasks to worker similarly to HEFT, but first attribute accelerated tasks to
															
 
																+GPUs, then not-so-accelerated tasks to CPUs.
															
 
																+
															
 
																 To use one of these schedulers, one can set the environment variable \ref STARPU_SCHED.
															
 
																 \section StaticScheduling Static Scheduling
															
--- a/examples/cholesky/cholesky.sh
+++ b/examples/cholesky/cholesky.sh
@@ -17,7 +17,7 @@
 
																 ROOT=${0%.sh}
															
 
																 #[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display`
															
 
																-[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-heft modular-heft-prio dmdap dmdar dmda dmdasd prio lws"
															
 
																+[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-heft2 modular-heft modular-heft-prio modular-heteroprio dmdap dmdar dmda dmdasd prio lws"
															
 
																 [ -n "$STARPU_HOSTNAME" ] || export STARPU_HOSTNAME=mirage
															
 
																 unset MALLOC_PERTURB_
															
--- a/include/starpu_sched_component.h
+++ b/include/starpu_sched_component.h
@@ -553,6 +553,16 @@ int starpu_sched_component_is_heft(struct starpu_sched_component *component);
 
																 /** @} */
															
 
																 /**
															
 
																+   @name Resource-mapping Heteroprio Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+struct starpu_sched_component * starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params) STARPU_ATTRIBUTE_MALLOC;
															
 
																+int starpu_sched_component_is_heteroprio(struct starpu_sched_component *component);
															
 
																+
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																    @name Special-purpose Best_Implementation Component API
															
 
																    @{
															
 
																 */
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -287,6 +287,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 
																 	sched_policies/component_eager_calibration.c				\
															
 
																 	sched_policies/component_mct.c				\
															
 
																 	sched_policies/component_heft.c				\
															
 
																+	sched_policies/component_heteroprio.c				\
															
 
																 	sched_policies/component_best_implementation.c		\
															
 
																 	sched_policies/component_perfmodel_select.c				\
															
 
																 	sched_policies/component_composed.c				\
															
@@ -303,6 +304,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 
																 	sched_policies/modular_parallel_heft.c			\
															
 
																 	sched_policies/modular_heft.c				\
															
 
																 	sched_policies/modular_heft_prio.c			\
															
 
																+	sched_policies/modular_heteroprio.c			\
															
 
																 	sched_policies/modular_heft2.c				\
															
 
																 	sched_policies/modular_ws.c				\
															
 
																 	sched_policies/modular_ez.c
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -70,6 +70,7 @@ static struct starpu_sched_policy *predefined_policies[] =
 
																 	&_starpu_sched_modular_heft_policy,
															
 
																 	&_starpu_sched_modular_heft_prio_policy,
															
 
																 	&_starpu_sched_modular_heft2_policy,
															
 
																+	&_starpu_sched_modular_heteroprio_policy,
															
 
																 	&_starpu_sched_modular_parallel_heft_policy,
															
 
																 	&_starpu_sched_eager_policy,
															
 
																 	&_starpu_sched_prio_policy,
															
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -107,6 +107,7 @@ extern struct starpu_sched_policy _starpu_sched_modular_ws_policy;
 
																 extern struct starpu_sched_policy _starpu_sched_modular_heft_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_heft_prio_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_heft2_policy;
															
 
																+extern struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_modular_parallel_heft_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_graph_test_policy;
															
 
																 extern struct starpu_sched_policy _starpu_sched_tree_heft_hierarchical_policy;
															
--- a/src/sched_policies/component_heteroprio.c
+++ b/src/sched_policies/component_heteroprio.c
@@ -0,0 +1,478 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2013,2017,2018                           Inria
															
 
																+ * Copyright (C) 2014-2018                                CNRS
															
 
																+ * Copyright (C) 2013-2019                                Université de Bordeaux
															
 
																+ * Copyright (C) 2013                                     Simon Archipoff
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/* Heteroprio, which sorts tasks by acceleration factor into buckets, and makes
															
 
																+ * GPUs take accelerated tasks first and CPUs take non-accelerated tasks first */
															
 
																+
															
 
																+#include <starpu_sched_component.h>
															
 
																+#include "prio_deque.h"
															
 
																+#include <starpu_perfmodel.h>
															
 
																+#include "helper_mct.h"
															
 
																+#include <float.h>
															
 
																+#include <core/sched_policy.h>
															
 
																+#include <core/task.h>
															
 
																+
															
 
																+/* Approximation ratio for acceleration factor bucketing
															
 
																+ * We will put tasks with +-10% similar acceleration into the same bucket. */
															
 
																+#define APPROX 0.10
															
 
																+
															
 
																+struct _starpu_heteroprio_data
															
 
																+{
															
 
																+	/* This is an array of priority queues.
															
 
																+	 * The array is sorted by acceleration factor, most accelerated first */
															
 
																+	struct _starpu_prio_deque **bucket;
															
 
																+	float *accel;
															
 
																+	unsigned naccel;
															
 
																+
															
 
																+	/* This contains tasks which are not supported on all archs. */
															
 
																+	struct _starpu_prio_deque no_accel;
															
 
																+
															
 
																+	/* This protects all queues */
															
 
																+	starpu_pthread_mutex_t mutex;
															
 
																+
															
 
																+	struct _starpu_mct_data *mct_data;
															
 
																+};
															
 
																+
															
 
																+static int heteroprio_push(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, enum starpu_worker_archtype archtype, int front)
															
 
																+{
															
 
																+	struct starpu_task *task = NULL;
															
 
																+	starpu_pthread_mutex_t * mutex = &data->mutex;
															
 
																+	int j, ret;
															
 
																+	double acceleration = INFINITY;
															
 
																+
															
 
																+	struct _starpu_mct_data * d = data->mct_data;
															
 
																+
															
 
																+	STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+	if (front)
															
 
																+		/* Pick up accelerated tasks first */
															
 
																+		for (j = 0; j < (int) data->naccel; j++)
															
 
																+		{
															
 
																+			task = _starpu_prio_deque_pop_task(data->bucket[j]);
															
 
																+			if (task)
															
 
																+				break;
															
 
																+		}
															
 
																+	else
															
 
																+		/* Pick up accelerated tasks last */
															
 
																+		for (j = (int) data->naccel-1; j >= 0; j--)
															
 
																+		{
															
 
																+			task = _starpu_prio_deque_pop_task(data->bucket[j]);
															
 
																+			if (task)
															
 
																+				break;
															
 
																+		}
															
 
																+
															
 
																+	if (task)
															
 
																+	{
															
 
																+		acceleration = data->accel[j];
															
 
																+		//fprintf(stderr, "for %s thus %s, found task %p in bucket %d: %f\n", starpu_worker_get_type_as_string(archtype), front?"front":"back", task, j, acceleration);
															
 
																+	}
															
 
																+
															
 
																+	STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+
															
 
																+	if (!task)
															
 
																+		return 1;
															
 
																+
															
 
																+	/* TODO: we might want to prefer to pick up a task whose data is already on some GPU */
															
 
																+
															
 
																+	struct starpu_sched_component * best_component;
															
 
																+
															
 
																+	/* Estimated task duration for each child */
															
 
																+	double estimated_lengths[component->nchildren];
															
 
																+	/* Estimated transfer duration for each child */
															
 
																+	double estimated_transfer_length[component->nchildren];
															
 
																+	/* Estimated transfer+task termination for each child */
															
 
																+	double estimated_ends_with_task[component->nchildren];
															
 
																+
															
 
																+	/* Minimum transfer+task termination on all children */
															
 
																+	double min_exp_end_with_task;
															
 
																+	/* Maximum transfer+task termination on all children */
															
 
																+	double max_exp_end_with_task;
															
 
																+
															
 
																+	unsigned suitable_components[component->nchildren];
															
 
																+	unsigned nsuitable_components;
															
 
																+
															
 
																+	nsuitable_components = starpu_mct_compute_execution_times(component, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			suitable_components);
															
 
																+
															
 
																+	/* Entering critical section to make sure no two workers
															
 
																+	   make scheduling decisions at the same time */
															
 
																+	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
															
 
																+
															
 
																+	starpu_mct_compute_expected_times(component, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			estimated_ends_with_task,
															
 
																+			&min_exp_end_with_task, &max_exp_end_with_task,
															
 
																+			suitable_components, nsuitable_components);
															
 
																+
															
 
																+	/* And now find out which worker suits best for this task,
															
 
																+	 * including data transfer */
															
 
																+	int best_icomponent = starpu_mct_get_best_component(d, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			estimated_ends_with_task,
															
 
																+			min_exp_end_with_task, max_exp_end_with_task,
															
 
																+			suitable_components, nsuitable_components);
															
 
																+
															
 
																+	best_component = component->children[best_icomponent];
															
 
																+
															
 
																+	int idworker;
															
 
																+	for(idworker = starpu_bitmap_first(best_component->workers);
															
 
																+		idworker != -1;
															
 
																+		idworker = starpu_bitmap_next(best_component->workers, idworker))
															
 
																+	{
															
 
																+		if (starpu_worker_get_type(idworker) == archtype)
															
 
																+			break;
															
 
																+	}
															
 
																+
															
 
																+	if (idworker == -1)
															
 
																+	{
															
 
																+		STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		/* Ok, we do have a worker there of that type, try to push it there. */
															
 
																+		STARPU_ASSERT(!starpu_sched_component_is_worker(best_component));
															
 
																+		starpu_sched_task_break(task);
															
 
																+		ret = starpu_sched_component_push_task(component,best_component,task);
															
 
																+
															
 
																+		/* I can now exit the critical section: Pushing the task above ensures that its execution
															
 
																+		   time will be taken into account for subsequent scheduling decisions */
															
 
																+		STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
															
 
																+		if (!ret)
															
 
																+		{
															
 
																+			//fprintf(stderr, "pushed %p to %d\n", task, best_icomponent);
															
 
																+			/* Great! */
															
 
																+			return 0;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	/* No such kind of worker there, or it refused our task, abort */
															
 
																+
															
 
																+	//fprintf(stderr, "could not push %p to %d actually\n", task, best_icomponent);
															
 
																+	/* Could not push to child actually, push that one back */
															
 
																+	STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+	for (j = 0; j < (int) data->naccel; j++) {
															
 
																+		if (acceleration == data->accel[j])
															
 
																+		{
															
 
																+			_starpu_prio_deque_push_front_task(data->bucket[j], task);
															
 
																+			break;
															
 
																+		}
															
 
																+	}
															
 
																+	STARPU_ASSERT(j != (int) data->naccel);
															
 
																+	STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+
															
 
																+	//fprintf(stderr, "finished pushing to %d\n", archtype);
															
 
																+
															
 
																+	return 1;
															
 
																+}
															
 
																+
															
 
																+static int heteroprio_push_heft(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, struct starpu_task *task)
															
 
																+{
															
 
																+	struct _starpu_mct_data * d = data->mct_data;
															
 
																+	int ret;
															
 
																+
															
 
																+	struct starpu_sched_component * best_component;
															
 
																+
															
 
																+	/* Estimated task duration for each child */
															
 
																+	double estimated_lengths[component->nchildren];
															
 
																+	/* Estimated transfer duration for each child */
															
 
																+	double estimated_transfer_length[component->nchildren];
															
 
																+	/* Estimated transfer+task termination for each child */
															
 
																+	double estimated_ends_with_task[component->nchildren];
															
 
																+
															
 
																+	/* Minimum transfer+task termination on all children */
															
 
																+	double min_exp_end_with_task;
															
 
																+	/* Maximum transfer+task termination on all children */
															
 
																+	double max_exp_end_with_task;
															
 
																+
															
 
																+	unsigned suitable_components[component->nchildren];
															
 
																+	unsigned nsuitable_components;
															
 
																+
															
 
																+	nsuitable_components = starpu_mct_compute_execution_times(component, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			suitable_components);
															
 
																+
															
 
																+	/* If no suitable components were found, it means that the perfmodel of
															
 
																+	 * the task had been purged since it has been pushed on the mct component.
															
 
																+	 * We should send a push_fail message to its parent so that it will
															
 
																+	 * be able to reschedule the task properly. */
															
 
																+	if(nsuitable_components == 0)
															
 
																+		return 1;
															
 
																+
															
 
																+	/* Entering critical section to make sure no two workers
															
 
																+	   make scheduling decisions at the same time */
															
 
																+	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
															
 
																+
															
 
																+	starpu_mct_compute_expected_times(component, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			estimated_ends_with_task,
															
 
																+			&min_exp_end_with_task, &max_exp_end_with_task,
															
 
																+			suitable_components, nsuitable_components);
															
 
																+
															
 
																+	/* And now find out which worker suits best for this task,
															
 
																+	 * including data transfer */
															
 
																+	int best_icomponent = starpu_mct_get_best_component(d, task,
															
 
																+			estimated_lengths,
															
 
																+			estimated_transfer_length,
															
 
																+			estimated_ends_with_task,
															
 
																+			min_exp_end_with_task, max_exp_end_with_task,
															
 
																+			suitable_components, nsuitable_components);
															
 
																+
															
 
																+	/* If no best component is found, it means that the perfmodel of
															
 
																+	 * the task had been purged since it has been pushed on the mct component.
															
 
																+	 * We should send a push_fail message to its parent so that it will
															
 
																+	 * be able to reschedule the task properly. */
															
 
																+	if(best_icomponent == -1)
															
 
																+	{
															
 
																+		STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
															
 
																+		return 1;
															
 
																+	}
															
 
																+
															
 
																+	best_component = component->children[best_icomponent];
															
 
																+
															
 
																+	STARPU_ASSERT(!starpu_sched_component_is_worker(best_component));
															
 
																+	ret = starpu_sched_component_push_task(component,best_component,task);
															
 
																+	STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
															
 
																+
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																+static int heteroprio_progress_one(struct starpu_sched_component *component)
															
 
																+{
															
 
																+	struct _starpu_heteroprio_data * data = component->data;
															
 
																+	starpu_pthread_mutex_t * mutex = &data->mutex;
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+	struct _starpu_prio_deque * no_accel = &data->no_accel;
															
 
																+	STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+	task = _starpu_prio_deque_pop_task(no_accel);
															
 
																+	STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+
															
 
																+	if (task) {
															
 
																+		if (heteroprio_push_heft(component, data, task))
															
 
																+		{
															
 
																+			/* Could not push to child actually, push that one back */
															
 
																+			STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+			_starpu_prio_deque_push_front_task(no_accel, task);
															
 
																+			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	/* Note: this hardcodes acceleration order */
															
 
																+	if (!heteroprio_push(component, data, STARPU_CUDA_WORKER, 1))
															
 
																+		return 0;
															
 
																+	if (!heteroprio_push(component, data, STARPU_OPENCL_WORKER, 1))
															
 
																+		return 0;
															
 
																+	if (!heteroprio_push(component, data, STARPU_MIC_WORKER, 1))
															
 
																+		return 0;
															
 
																+	if (!heteroprio_push(component, data, STARPU_MPI_MS_WORKER, 0))
															
 
																+		return 0;
															
 
																+	if (!heteroprio_push(component, data, STARPU_CPU_WORKER, 0))
															
 
																+		return 0;
															
 
																+
															
 
																+	return 1;
															
 
																+}
															
 
																+
															
 
																+/* Try to push some tasks below */
															
 
																+static void heteroprio_progress(struct starpu_sched_component *component)
															
 
																+{
															
 
																+	STARPU_ASSERT(component && starpu_sched_component_is_heteroprio(component));
															
 
																+	while (!heteroprio_progress_one(component))
															
 
																+		;
															
 
																+}
															
 
																+
															
 
																+static int heteroprio_push_task(struct starpu_sched_component * component, struct starpu_task * task)
															
 
																+{
															
 
																+	STARPU_ASSERT(component && task && starpu_sched_component_is_heteroprio(component));
															
 
																+	struct _starpu_heteroprio_data * data = component->data;
															
 
																+	starpu_pthread_mutex_t * mutex = &data->mutex;
															
 
																+	unsigned nimpl;
															
 
																+
															
 
																+	double min_expected = INFINITY, max_expected = -INFINITY;
															
 
																+	double acceleration;
															
 
																+
															
 
																+	/* Compute acceleration between best-performing arch and least-performing arch */
															
 
																+	int workerid;
															
 
																+	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
															
 
																+	    workerid != -1;
															
 
																+	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
															
 
																+	{
															
 
																+		unsigned impl_mask;
															
 
																+		if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask))
															
 
																+			break;
															
 
																+
															
 
																+		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx);
															
 
																+		double min_arch = INFINITY;
															
 
																+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																+		{
															
 
																+			if (!(impl_mask & (1U << nimpl)))
															
 
																+				continue;
															
 
																+			double expected = starpu_task_expected_length(task, perf_arch, nimpl);
															
 
																+			if (expected < min_arch)
															
 
																+				min_arch = expected;
															
 
																+		}
															
 
																+		STARPU_ASSERT(min_arch != INFINITY);
															
 
																+		if (min_arch < min_expected)
															
 
																+			min_expected = min_arch;
															
 
																+		if (min_arch > max_expected)
															
 
																+			max_expected = min_arch;
															
 
																+	}
															
 
																+
															
 
																+	if (workerid == -1) {
															
 
																+		/* All archs can run it */
															
 
																+		STARPU_ASSERT(min_expected != INFINITY);
															
 
																+		STARPU_ASSERT(max_expected != -INFINITY);
															
 
																+		acceleration = max_expected / min_expected;
															
 
																+
															
 
																+		//fprintf(stderr,"%s: acceleration %f\n", starpu_task_get_name(task), acceleration);
															
 
																+
															
 
																+		STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+		unsigned i, j;
															
 
																+		/* Try to find a bucket with similar acceleration */
															
 
																+		for (i = 0; i < data->naccel; i++) {
															
 
																+			if (acceleration >= data->accel[i] * (1 - APPROX) &&
															
 
																+			    acceleration <= data->accel[i] * (1 + APPROX))
															
 
																+				break;
															
 
																+		}
															
 
																+
															
 
																+		if (i == data->naccel) {
															
 
																+			/* Didn't find it, add one */
															
 
																+			data->naccel++;
															
 
																+
															
 
																+			float *newaccel = malloc(data->naccel * sizeof(*newaccel));
															
 
																+			struct _starpu_prio_deque **newbuckets = malloc(data->naccel * sizeof(*newbuckets));
															
 
																+			struct _starpu_prio_deque *newbucket = malloc(sizeof(*newbucket));
															
 
																+			_starpu_prio_deque_init(newbucket);
															
 
																+			int inserted = 0;
															
 
																+
															
 
																+			for (j = 0; j < data->naccel-1; j++) {
															
 
																+				if (!inserted && acceleration > data->accel[j]) {
															
 
																+					/* Insert the new bucket here */
															
 
																+					i = j;
															
 
																+					newbuckets[j] = newbucket;
															
 
																+					newaccel[j] = acceleration;
															
 
																+					inserted = 1;
															
 
																+				}
															
 
																+				newbuckets[j+inserted] = data->bucket[j];
															
 
																+				newaccel[j+inserted] = data->accel[j];
															
 
																+			}
															
 
																+			if (!inserted) {
															
 
																+				/* Insert it last */
															
 
																+				newbuckets[data->naccel-1] = newbucket;
															
 
																+				newaccel[data->naccel-1] = acceleration;
															
 
																+			}
															
 
																+			free(data->bucket);
															
 
																+			free(data->accel);
															
 
																+			data->bucket = newbuckets;
															
 
																+			data->accel = newaccel;
															
 
																+		}
															
 
																+#if 0
															
 
																+		fprintf(stderr,"buckets:");
															
 
																+		for (j = 0; j < data->naccel; j++) {
															
 
																+			fprintf(stderr, " %f", data->accel[j]);
															
 
																+		}
															
 
																+		fprintf(stderr,"\ninserting %p %f to %d\n", task, acceleration, i);
															
 
																+#endif
															
 
																+		_starpu_prio_deque_push_back_task(data->bucket[i],task);
															
 
																+		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+	} else {
															
 
																+		/* Not all archs can run it, will resort to HEFT strategy */
															
 
																+		acceleration = INFINITY;
															
 
																+		//fprintf(stderr,"%s: some archs can't do it\n", starpu_task_get_name(task));
															
 
																+		struct _starpu_prio_deque * no_accel = &data->no_accel;
															
 
																+		STARPU_COMPONENT_MUTEX_LOCK(mutex);
															
 
																+		_starpu_prio_deque_push_back_task(no_accel,task);
															
 
																+		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
															
 
																+	}
															
 
																+
															
 
																+	heteroprio_progress(component);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int heteroprio_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED)
															
 
																+{
															
 
																+	heteroprio_progress(component);
															
 
																+	int ret = 0;
															
 
																+	unsigned j;
															
 
																+	for(j=0; j < component->nparents; j++)
															
 
																+	{
															
 
																+		if(component->parents[j] == NULL)
															
 
																+			continue;
															
 
																+		else
															
 
																+		{
															
 
																+			ret = component->parents[j]->can_push(component->parents[j], component);
															
 
																+			if(ret)
															
 
																+				break;
															
 
																+		}
															
 
																+	}
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																+static void heteroprio_component_deinit_data(struct starpu_sched_component * component)
															
 
																+{
															
 
																+	STARPU_ASSERT(starpu_sched_component_is_heteroprio(component));
															
 
																+	struct _starpu_heteroprio_data * d = component->data;
															
 
																+	struct _starpu_mct_data * mct_d = d->mct_data;
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < d->naccel; i++)
															
 
																+	{
															
 
																+		_starpu_prio_deque_destroy(d->bucket[i]);
															
 
																+		free(d->bucket[i]);
															
 
																+	}
															
 
																+	free(d->bucket);
															
 
																+	free(d->accel);
															
 
																+	_starpu_prio_deque_destroy(&d->no_accel);
															
 
																+	STARPU_PTHREAD_MUTEX_DESTROY(&d->mutex);
															
 
																+	STARPU_PTHREAD_MUTEX_DESTROY(&mct_d->scheduling_mutex);
															
 
																+	free(mct_d);
															
 
																+	free(d);
															
 
																+}
															
 
																+
															
 
																+int starpu_sched_component_is_heteroprio(struct starpu_sched_component * component)
															
 
																+{
															
 
																+	return component->push_task == heteroprio_push_task;
															
 
																+}
															
 
																+
															
 
																+struct starpu_sched_component * starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params)
															
 
																+{
															
 
																+	struct starpu_sched_component * component = starpu_sched_component_create(tree, "heteroprio");
															
 
																+	struct _starpu_mct_data *mct_data = starpu_mct_init_parameters(params);
															
 
																+	struct _starpu_heteroprio_data *data;
															
 
																+	_STARPU_MALLOC(data, sizeof(*data));
															
 
																+
															
 
																+	data->bucket = NULL;
															
 
																+	data->accel = NULL;
															
 
																+	data->naccel = 0;
															
 
																+	_starpu_prio_deque_init(&data->no_accel);
															
 
																+	STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL);
															
 
																+	data->mct_data = mct_data;
															
 
																+	STARPU_PTHREAD_MUTEX_INIT(&mct_data->scheduling_mutex,NULL);
															
 
																+	component->data = data;
															
 
																+
															
 
																+	component->push_task = heteroprio_push_task;
															
 
																+	component->can_push = heteroprio_can_push;
															
 
																+	component->deinit_data = heteroprio_component_deinit_data;
															
 
																+
															
 
																+	return component;
															
 
																+}
															
--- a/src/sched_policies/modular_ez.c
+++ b/src/sched_policies/modular_ez.c
@@ -201,7 +201,8 @@ void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_c
 
																 	/* Take default ntasks_threshold */
															
 
																 	unsigned ntasks_threshold;
															
 
																 	if (starpu_sched_component_is_heft(decision_component) ||
															
 
																-	    starpu_sched_component_is_mct(decision_component)) {
															
 
																+	    starpu_sched_component_is_mct(decision_component) ||
															
 
																+	    starpu_sched_component_is_heteroprio(decision_component)) {
															
 
																 		/* These need more queueing to allow CPUs to take some share of the work */
															
 
																 		ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_HEFT;
															
 
																 	} else {
															
--- a/src/sched_policies/modular_heteroprio.c
+++ b/src/sched_policies/modular_heteroprio.c
@@ -0,0 +1,58 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2013-2015,2017                           Inria
															
 
																+ * Copyright (C) 2014,2015,2017                           CNRS
															
 
																+ * Copyright (C) 2013-2015,2017,2018-2019                 Université de Bordeaux
															
 
																+ * Copyright (C) 2013                                     Simon Archipoff
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu_sched_component.h>
															
 
																+#include <starpu_scheduler.h>
															
 
																+#include <float.h>
															
 
																+#include <limits.h>
															
 
																+
															
 
																+static void initialize_heteroprio_center_policy(unsigned sched_ctx_id)
															
 
																+{
															
 
																+	starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_heteroprio_create, NULL,
															
 
																+			STARPU_SCHED_SIMPLE_DECIDE_WORKERS |
															
 
																+			STARPU_SCHED_SIMPLE_PERFMODEL |
															
 
																+			STARPU_SCHED_SIMPLE_FIFO_ABOVE |
															
 
																+			STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO |
															
 
																+			STARPU_SCHED_SIMPLE_FIFOS_BELOW |
															
 
																+			STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO |
															
 
																+			STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY |
															
 
																+			STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id);
															
 
																+}
															
 
																+
															
 
																+static void deinitialize_heteroprio_center_policy(unsigned sched_ctx_id)
															
 
																+{
															
 
																+	struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																+	starpu_sched_tree_destroy(t);
															
 
																+}
															
 
																+
															
 
																+struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy =
															
 
																+{
															
 
																+	.init_sched = initialize_heteroprio_center_policy,
															
 
																+	.deinit_sched = deinitialize_heteroprio_center_policy,
															
 
																+	.add_workers = starpu_sched_tree_add_workers,
															
 
																+	.remove_workers = starpu_sched_tree_remove_workers,
															
 
																+	.push_task = starpu_sched_tree_push_task,
															
 
																+	.pop_task = starpu_sched_tree_pop_task,
															
 
																+	.pre_exec_hook = starpu_sched_component_worker_pre_exec_hook,
															
 
																+	.post_exec_hook = starpu_sched_component_worker_post_exec_hook,
															
 
																+	.pop_every_task = NULL,
															
 
																+	.policy_name = "modular-heteroprio",
															
 
																+	.policy_description = "heteroprio modular policy",
															
 
																+	.worker_type = STARPU_WORKER_LIST,
															
 
																+};
															
--- a/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh
+++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh
@@ -17,6 +17,6 @@
 
																 #
															
 
																 source $(dirname $0)/microbench.sh
															
 
																-XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-gemm random peager heteroprio graph_test"
															
 
																+XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
															
 
																 test_scheds parallel_independent_heterogeneous_tasks
															
--- a/tests/microbenchs/parallel_independent_homogeneous_tasks.sh
+++ b/tests/microbenchs/parallel_independent_homogeneous_tasks.sh
@@ -17,6 +17,6 @@
 
																 #
															
 
																 source $(dirname $0)/microbench.sh
															
 
																-XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-gemm random peager heteroprio graph_test"
															
 
																+XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
															
 
																 test_scheds parallel_independent_homogeneous_tasks