|
@@ -2,6 +2,7 @@
|
|
*
|
|
*
|
|
* Copyright (C) 2010, 2011 Université de Bordeaux 1
|
|
* Copyright (C) 2010, 2011 Université de Bordeaux 1
|
|
* Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
* Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
|
|
+ * Copyright (C) 2011 Télécom-SudParis
|
|
*
|
|
*
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
@@ -328,59 +329,67 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, struct starpu_
|
|
/* A priori, we know all estimations */
|
|
/* A priori, we know all estimations */
|
|
int unknown = 0;
|
|
int unknown = 0;
|
|
|
|
|
|
|
|
+ unsigned best_impl = 0;
|
|
|
|
+ unsigned nimpl;
|
|
unsigned nworkers = sched_ctx->nworkers_in_ctx;
|
|
unsigned nworkers = sched_ctx->nworkers_in_ctx;
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers; worker_in_ctx++)
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers; worker_in_ctx++)
|
|
{
|
|
{
|
|
- worker = sched_ctx->workerid[worker_in_ctx];
|
|
|
|
- double exp_end;
|
|
|
|
|
|
+ for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
|
|
|
|
+ {
|
|
|
|
+ worker = sched_ctx->workerid[worker_in_ctx];
|
|
|
|
+ double exp_end;
|
|
|
|
|
|
- fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
+ fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
- /* Sometimes workers didn't take the tasks as early as we expected */
|
|
|
|
- fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
|
|
|
|
- fifo->exp_end = fifo->exp_start + fifo->exp_len;
|
|
|
|
|
|
+ /* Sometimes workers didn't take the tasks as early as we expected */
|
|
|
|
+ fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
|
|
|
|
+ fifo->exp_end = fifo->exp_start + fifo->exp_len;
|
|
|
|
|
|
- if (!starpu_worker_may_execute_task(worker, task))
|
|
|
|
- {
|
|
|
|
- /* no one on that queue may execute this task */
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+ if (!starpu_worker_may_execute_task(worker, task, nimpl))
|
|
|
|
+ {
|
|
|
|
+ /* no one on that queue may execute this task */
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
|
|
- enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
|
|
|
|
- double local_length = starpu_task_expected_length(task, perf_arch);
|
|
|
|
- double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
|
|
|
|
-
|
|
|
|
- if (ntasks_best == -1
|
|
|
|
- || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
|
|
|
|
- || (!calibrating && local_length == -1.0) /* Not calibrating but this worker is being calibrated */
|
|
|
|
- || (calibrating && local_length == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
|
|
|
|
- ) {
|
|
|
|
- ntasks_best_end = ntasks_end;
|
|
|
|
- ntasks_best = worker;
|
|
|
|
- }
|
|
|
|
|
|
+ enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
|
|
|
|
+ double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
|
|
|
|
+ double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
|
|
|
|
|
|
- if (local_length == -1.0)
|
|
|
|
- /* we are calibrating, we want to speed-up calibration time
|
|
|
|
- * so we privilege non-calibrated tasks (but still
|
|
|
|
- * greedily distribute them to avoid dumb schedules) */
|
|
|
|
- calibrating = 1;
|
|
|
|
|
|
+ //_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
|
|
|
|
|
|
- if (local_length <= 0.0)
|
|
|
|
- /* there is no prediction available for that task
|
|
|
|
- * with that arch yet, so switch to a greedy strategy */
|
|
|
|
- unknown = 1;
|
|
|
|
|
|
+ if (ntasks_best == -1
|
|
|
|
+ || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
|
|
|
|
+ || (!calibrating && local_length == -1.0) /* Not calibrating but this worker is being calibrated */
|
|
|
|
+ || (calibrating && local_length == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
|
|
|
|
+ ) {
|
|
|
|
+ ntasks_best_end = ntasks_end;
|
|
|
|
+ ntasks_best = worker;
|
|
|
|
+ }
|
|
|
|
|
|
- if (unknown)
|
|
|
|
- continue;
|
|
|
|
|
|
+ if (local_length == -1.0)
|
|
|
|
+ /* we are calibrating, we want to speed-up calibration time
|
|
|
|
+ * so we privilege non-calibrated tasks (but still
|
|
|
|
+ * greedily distribute them to avoid dumb schedules) */
|
|
|
|
+ calibrating = 1;
|
|
|
|
|
|
- exp_end = fifo->exp_start + fifo->exp_len + local_length;
|
|
|
|
|
|
+ if (local_length <= 0.0)
|
|
|
|
+ /* there is no prediction available for that task
|
|
|
|
+ * with that arch yet, so switch to a greedy strategy */
|
|
|
|
+ unknown = 1;
|
|
|
|
|
|
- if (best == -1 || exp_end < best_exp_end)
|
|
|
|
- {
|
|
|
|
- /* a better solution was found */
|
|
|
|
- best_exp_end = exp_end;
|
|
|
|
- best = worker;
|
|
|
|
- model_best = local_length;
|
|
|
|
|
|
+ if (unknown)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ exp_end = fifo->exp_start + fifo->exp_len + local_length;
|
|
|
|
+
|
|
|
|
+ if (best == -1 || exp_end < best_exp_end)
|
|
|
|
+ {
|
|
|
|
+ /* a better solution was found */
|
|
|
|
+ best_exp_end = exp_end;
|
|
|
|
+ best = worker;
|
|
|
|
+ model_best = local_length;
|
|
|
|
+ best_impl = nimpl;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -390,6 +399,11 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, struct starpu_
|
|
}
|
|
}
|
|
|
|
|
|
_starpu_increment_nsubmitted_tasks_of_worker(best);
|
|
_starpu_increment_nsubmitted_tasks_of_worker(best);
|
|
|
|
+
|
|
|
|
+ //_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
|
|
|
|
+
|
|
|
|
+ _starpu_get_job_associated_to_task(task)->nimpl = 0;//best_impl;
|
|
|
|
+
|
|
/* we should now have the best worker in variable "best" */
|
|
/* we should now have the best worker in variable "best" */
|
|
return push_task_on_best_worker(task, best, model_best, prio, sched_ctx);
|
|
return push_task_on_best_worker(task, best, model_best, prio, sched_ctx);
|
|
}
|
|
}
|
|
@@ -426,82 +440,89 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, struct starp
|
|
/* A priori, we know all estimations */
|
|
/* A priori, we know all estimations */
|
|
int unknown = 0;
|
|
int unknown = 0;
|
|
|
|
|
|
|
|
+ unsigned best_impl = 0;
|
|
|
|
+ unsigned nimpl=0;
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers_in_ctx; worker_in_ctx++)
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers_in_ctx; worker_in_ctx++)
|
|
{
|
|
{
|
|
worker = sched_ctx->workerid[worker_in_ctx];
|
|
worker = sched_ctx->workerid[worker_in_ctx];
|
|
|
|
+ for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
|
|
|
|
+ {
|
|
|
|
+ fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
- fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
+ /* Sometimes workers didn't take the tasks as early as we expected */
|
|
|
|
+ fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
|
|
|
|
+ fifo->exp_end = fifo->exp_start + fifo->exp_len;
|
|
|
|
+ if (fifo->exp_end > max_exp_end)
|
|
|
|
+ max_exp_end = fifo->exp_end;
|
|
|
|
|
|
- /* Sometimes workers didn't take the tasks as early as we expected */
|
|
|
|
- fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
|
|
|
|
- fifo->exp_end = fifo->exp_start + fifo->exp_len;
|
|
|
|
- if (fifo->exp_end > max_exp_end)
|
|
|
|
- max_exp_end = fifo->exp_end;
|
|
|
|
|
|
+ if (!starpu_worker_may_execute_task(worker, task, nimpl))
|
|
|
|
+ {
|
|
|
|
+ /* no one on that queue may execute this task */
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
|
|
- if (!starpu_worker_may_execute_task(worker, task))
|
|
|
|
- {
|
|
|
|
- /* no one on that queue may execute this task */
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+ enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
|
|
|
|
+ local_task_length[worker_in_ctx] = starpu_task_expected_length(task, perf_arch);
|
|
|
|
|
|
- enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
|
|
|
|
- local_task_length[worker_in_ctx] = starpu_task_expected_length(task, perf_arch);
|
|
|
|
|
|
+ //_STARPU_DEBUG("Scheduler dmda: task length (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],worker,nimpl);
|
|
|
|
|
|
- unsigned memory_node = starpu_worker_get_memory_node(worker);
|
|
|
|
- local_data_penalty[worker_in_ctx] = starpu_task_expected_data_transfer_time(memory_node, task);
|
|
|
|
|
|
+ unsigned memory_node = starpu_worker_get_memory_node(worker);
|
|
|
|
+ local_data_penalty[worker_in_ctx] = starpu_task_expected_data_transfer_time(memory_node, task);
|
|
|
|
|
|
- double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
|
|
|
|
|
|
+ double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
|
|
|
|
|
|
- if (ntasks_best == -1
|
|
|
|
- || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
|
|
|
|
- || (!calibrating && local_task_length[worker] == -1.0) /* Not calibrating but this worker is being calibrated */
|
|
|
|
- || (calibrating && local_task_length[worker] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
|
|
|
|
- ) {
|
|
|
|
- ntasks_best_end = ntasks_end;
|
|
|
|
- ntasks_best = worker;
|
|
|
|
- }
|
|
|
|
|
|
+ if (ntasks_best == -1
|
|
|
|
+ || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
|
|
|
|
+ || (!calibrating && local_task_length[worker] == -1.0) /* Not calibrating but this worker is being calibrated */
|
|
|
|
+ || (calibrating && local_task_length[worker] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
|
|
|
|
+ ) {
|
|
|
|
+ ntasks_best_end = ntasks_end;
|
|
|
|
+ ntasks_best = worker;
|
|
|
|
+ }
|
|
|
|
|
|
- if (local_task_length[worker_in_ctx] == -1.0)
|
|
|
|
- /* we are calibrating, we want to speed-up calibration time
|
|
|
|
- * so we privilege non-calibrated tasks (but still
|
|
|
|
- * greedily distribute them to avoid dumb schedules) */
|
|
|
|
- calibrating = 1;
|
|
|
|
|
|
+ if (local_task_length[worker_in_ctx] == -1.0)
|
|
|
|
+ /* we are calibrating, we want to speed-up calibration time
|
|
|
|
+ * so we privilege non-calibrated tasks (but still
|
|
|
|
+ * greedily distribute them to avoid dumb schedules) */
|
|
|
|
+ calibrating = 1;
|
|
|
|
|
|
- if (local_task_length[worker_in_ctx] <= 0.0)
|
|
|
|
- /* there is no prediction available for that task
|
|
|
|
- * with that arch yet, so switch to a greedy strategy */
|
|
|
|
- unknown = 1;
|
|
|
|
|
|
+ if (local_task_length[worker_in_ctx] <= 0.0)
|
|
|
|
+ /* there is no prediction available for that task
|
|
|
|
+ * with that arch yet, so switch to a greedy strategy */
|
|
|
|
+ unknown = 1;
|
|
|
|
|
|
- if (unknown)
|
|
|
|
- continue;
|
|
|
|
|
|
+ if (unknown)
|
|
|
|
+ continue;
|
|
|
|
|
|
- exp_end[worker_in_ctx] = fifo->exp_start + fifo->exp_len + local_task_length[worker_in_ctx];
|
|
|
|
|
|
+ exp_end[worker_in_ctx] = fifo->exp_start + fifo->exp_len + local_task_length[worker_in_ctx];
|
|
|
|
|
|
- if (exp_end[worker_in_ctx] < best_exp_end)
|
|
|
|
- {
|
|
|
|
- /* a better solution was found */
|
|
|
|
- best_exp_end = exp_end[worker_in_ctx];
|
|
|
|
- }
|
|
|
|
|
|
+ if (exp_end[worker_in_ctx] < best_exp_end)
|
|
|
|
+ {
|
|
|
|
+ /* a better solution was found */
|
|
|
|
+ best_exp_end = exp_end[worker_in_ctx];
|
|
|
|
+ best_impl = nimpl;
|
|
|
|
+ }
|
|
|
|
|
|
- local_power[worker_in_ctx] = starpu_task_expected_power(task, perf_arch);
|
|
|
|
- if (local_power[worker_in_ctx] == -1.0)
|
|
|
|
- local_power[worker_in_ctx] = 0.;
|
|
|
|
- }
|
|
|
|
|
|
+ local_power[worker_in_ctx] = starpu_task_expected_power(task, perf_arch, nimpl);
|
|
|
|
+ if (local_power[worker_in_ctx] == -1.0)
|
|
|
|
+ local_power[worker_in_ctx] = 0.;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- if (unknown)
|
|
|
|
- forced_best = ntasks_best;
|
|
|
|
|
|
+ if (unknown)
|
|
|
|
+ forced_best = ntasks_best;
|
|
|
|
|
|
- double best_fitness = -1;
|
|
|
|
|
|
+ double best_fitness = -1;
|
|
|
|
|
|
- if (forced_best == -1)
|
|
|
|
- {
|
|
|
|
|
|
+ if (forced_best == -1)
|
|
|
|
+ {
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers_in_ctx; worker_in_ctx++)
|
|
for (worker_in_ctx = 0; worker_in_ctx < nworkers_in_ctx; worker_in_ctx++)
|
|
{
|
|
{
|
|
worker = sched_ctx->workerid[worker_in_ctx];
|
|
worker = sched_ctx->workerid[worker_in_ctx];
|
|
|
|
|
|
- fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
+ fifo = dt->queue_array[worker_in_ctx];
|
|
|
|
|
|
- if (!starpu_worker_may_execute_task(worker, task))
|
|
|
|
|
|
+ if (!starpu_worker_may_execute_task(worker, task, 0))
|
|
{
|
|
{
|
|
/* no one on that queue may execute this task */
|
|
/* no one on that queue may execute this task */
|
|
continue;
|
|
continue;
|
|
@@ -546,6 +567,10 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, struct starp
|
|
//penality_best = local_data_penalty[best];
|
|
//penality_best = local_data_penalty[best];
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+ //_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
|
|
|
|
+ _starpu_get_job_associated_to_task(task)->nimpl = best_impl;
|
|
|
|
+
|
|
/* we should now have the best worker in variable "best" */
|
|
/* we should now have the best worker in variable "best" */
|
|
return push_task_on_best_worker(task, best, model_best, prio, sched_ctx);
|
|
return push_task_on_best_worker(task, best, model_best, prio, sched_ctx);
|
|
}
|
|
}
|
|
@@ -559,18 +584,12 @@ static int dmda_push_sorted_task(struct starpu_task *task, unsigned sched_ctx_id
|
|
static int dm_push_task(struct starpu_task *task, unsigned sched_ctx_id)
|
|
static int dm_push_task(struct starpu_task *task, unsigned sched_ctx_id)
|
|
{
|
|
{
|
|
struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
|
|
struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
|
|
- if (task->priority > 0)
|
|
|
|
- return _dm_push_task(task, 1, sched_ctx);
|
|
|
|
-
|
|
|
|
return _dm_push_task(task, 0, sched_ctx);
|
|
return _dm_push_task(task, 0, sched_ctx);
|
|
}
|
|
}
|
|
|
|
|
|
static int dmda_push_task(struct starpu_task *task, unsigned sched_ctx_id)
|
|
static int dmda_push_task(struct starpu_task *task, unsigned sched_ctx_id)
|
|
{
|
|
{
|
|
struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
|
|
struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
|
|
- if (task->priority > 0)
|
|
|
|
- return _dmda_push_task(task, 1, sched_ctx);
|
|
|
|
-
|
|
|
|
return _dmda_push_task(task, 0, sched_ctx);
|
|
return _dmda_push_task(task, 0, sched_ctx);
|
|
}
|
|
}
|
|
|
|
|