| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2011, 2012, 2013 CNRS
- * Copyright (C) 2010-2015 Université de Bordeaux
- * Copyright (C) 2011 Télécom-SudParis
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- /*
- * Record which kinds of tasks have been executed, to later on compute an upper
- * bound of the performance that could have theoretically been achieved
- */
- #include <starpu.h>
- #include <starpu_config.h>
- #include <profiling/bound.h>
- #include <core/jobs.h>
- #include <core/workers.h>
- #ifdef STARPU_HAVE_GLPK_H
- #include <glpk.h>
- #endif /* STARPU_HAVE_GLPK_H */
- /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
- /* TODO: compute critical path and introduce it in the LP */
- /*
- * Record without dependencies: just count each kind of task
- *
- * The linear programming problem will just have as variables:
- * - the number of tasks of kind `t' executed by worker `w'
- * - the total duration
- *
- * and the constraints will be:
- * - the time taken by each worker to complete its assigned tasks is lower than
- * the total duration.
- * - the total numer of tasks of a given kind is equal to the number run by the
- * application.
- */
- struct bound_task_pool
- {
- /* Which codelet has been executed */
- struct starpu_codelet *cl;
- /* Task footprint key (for history-based perfmodel) */
- uint32_t footprint;
- /* Number of tasks of this kind */
- unsigned long n;
- /* Other task kinds */
- struct bound_task_pool *next;
- };
- /*
- * Record with dependencies: each task is recorded separately
- *
- * The linear programming problem will have as variables:
- * - The start time of each task
- * - The completion time of each tag
- * - The total duration
- * - For each task and for each worker, whether the task is executing on that worker.
- * - For each pair of task, which task is scheduled first.
- *
- * and the constraints will be:
- * - All task start time plus duration are less than total duration
- * - Each task is executed on exactly one worker.
- * - Each task starts after all its task dependencies finish.
- * - Each task starts after all its tag dependencies finish.
- * - For each task pair and each worker, if both tasks are executed by that worker,
- * one is started after the other's completion.
- */
- struct task_dep
- {
- /* Task this depends on */
- struct bound_task *dep;
- /* Data transferred between tasks (i.e. implicit data dep size) */
- size_t size;
- };
- struct bound_task
- {
- /* Unique ID */
- unsigned long id;
- /* Tag ID, if any */
- starpu_tag_t tag_id;
- int use_tag;
- /* Which codelet has been executed */
- struct starpu_codelet *cl;
- /* Task footprint key */
- uint32_t footprint;
- /* Task priority */
- int priority;
- /* Tasks this one depends on */
- struct task_dep *deps;
- int depsn;
- /* Estimated duration */
- double** duration[STARPU_NARCH];
- /* Other tasks */
- struct bound_task *next;
- };
- struct bound_tag_dep
- {
- starpu_tag_t tag;
- starpu_tag_t dep_tag;
- struct bound_tag_dep *next;
- };
- static struct bound_task_pool *task_pools, *last;
- static struct bound_task *tasks;
- static struct bound_tag_dep *tag_deps;
- int _starpu_bound_recording;
- static int recorddeps;
- static int recordprio;
- static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
- /* Initialization */
- void starpu_bound_start(int deps, int prio)
- {
- struct bound_task_pool *tp;
- struct bound_task *t;
- struct bound_tag_dep *td;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- tp = task_pools;
- task_pools = NULL;
- last = NULL;
- t = tasks;
- tasks = NULL;
- td = tag_deps;
- tag_deps = NULL;
- _starpu_bound_recording = 1;
- recorddeps = deps;
- recordprio = prio;
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- while (tp != NULL)
- {
- struct bound_task_pool *next = tp->next;
- free(tp);
- tp = next;
- }
- while (t != NULL)
- {
- struct bound_task *next = t->next;
- free(t);
- t = next;
- }
- while (td != NULL)
- {
- struct bound_tag_dep *next = td->next;
- free(td);
- td = next;
- }
- }
- /* Whether we will include it in the computation */
- static int good_job(struct _starpu_job *j)
- {
- /* No codelet, nothing to measure */
- if (j->exclude_from_dag)
- return 0;
- if (!j->task->cl)
- return 0;
- /* No performance model, no time duration estimation */
- if (!j->task->cl->model)
- return 0;
- /* Only support history based */
- if (j->task->cl->model->type != STARPU_HISTORY_BASED
- && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED)
- return 0;
- return 1;
- }
- static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
- {
- int devid, maxncore;
- double ** arch_model = malloc(sizeof(*arch_model)*(maxdevid+1));
- arch_model[maxdevid] = NULL;
- for(devid=0; devid<maxdevid; devid++)
- {
- if(maxncore_table != NULL)
- maxncore = maxncore_table[devid];
- else
- maxncore = 1;
- arch_model[devid] = calloc(maxncore+1,sizeof(*arch_model[devid]));
- }
- return arch_model;
- }
- static void initialize_duration(struct bound_task *task)
- {
- struct _starpu_machine_config *conf = _starpu_get_machine_config();
- task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.ncpus);
- task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.ncudagpus,NULL);
- task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nopenclgpus,NULL);
- task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nmicdevices,conf->topology.nmiccores);
- task->duration[STARPU_SCC_WORKER] = initialize_arch_duration(conf->topology.nsccdevices,NULL);
- }
- /* Create a new task (either because it has just been submitted, or a
- * dependency was added before submission) */
- static void new_task(struct _starpu_job *j)
- {
- struct bound_task *t;
- if (j->bound_task)
- return;
- t = (struct bound_task *) malloc(sizeof(*t));
- memset(t, 0, sizeof(*t));
- t->id = j->job_id;
- t->tag_id = j->task->tag_id;
- t->use_tag = j->task->use_tag;
- t->cl = j->task->cl;
- t->footprint = _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, STARPU_CPU_WORKER, 0, j);
- t->priority = j->task->priority;
- t->deps = NULL;
- t->depsn = 0;
- initialize_duration(t);
- t->next = tasks;
- j->bound_task = t;
- tasks = t;
- }
- /* A new task was submitted, record it */
- void _starpu_bound_record(struct _starpu_job *j)
- {
- if (!_starpu_bound_recording)
- return;
- if (!good_job(j))
- return;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- /* Re-check, this time with mutex held */
- if (!_starpu_bound_recording)
- {
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- if (recorddeps)
- {
- new_task(j);
- }
- else
- {
- struct bound_task_pool *tp;
- _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, STARPU_CPU_WORKER, 0, j);
- if (last && last->cl == j->task->cl && last->footprint == j->footprint)
- tp = last;
- else
- for (tp = task_pools; tp; tp = tp->next)
- if (tp->cl == j->task->cl && tp->footprint == j->footprint)
- break;
- if (!tp)
- {
- tp = (struct bound_task_pool *) malloc(sizeof(*tp));
- tp->cl = j->task->cl;
- tp->footprint = j->footprint;
- tp->n = 0;
- tp->next = task_pools;
- task_pools = tp;
- }
- /* One more task of this kind */
- tp->n++;
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /* A tag dependency was emitted, record it */
- void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
- {
- struct bound_tag_dep *td;
- if (!_starpu_bound_recording || !recorddeps)
- return;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- /* Re-check, this time with mutex held */
- if (!_starpu_bound_recording || !recorddeps)
- {
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- td = (struct bound_tag_dep *) malloc(sizeof(*td));
- td->tag = id;
- td->dep_tag = dep_id;
- td->next = tag_deps;
- tag_deps = td;
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /* A task dependency was emitted, record it */
- void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
- {
- struct bound_task *t;
- int i;
- if (!_starpu_bound_recording || !recorddeps)
- return;
- if (!good_job(j) || !good_job(dep_j))
- return;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- /* Re-check, this time with mutex held */
- if (!_starpu_bound_recording || !recorddeps)
- {
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- new_task(j);
- new_task(dep_j);
- t = j->bound_task;
- for (i = 0; i < t->depsn; i++)
- if (t->deps[i].dep == dep_j->bound_task)
- break;
- if (i == t->depsn)
- {
- /* Not already there, add */
- t->deps = (struct task_dep *) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
- t->deps[t->depsn-1].dep = dep_j->bound_task;
- t->deps[t->depsn-1].size = 0; /* We don't have data information in that case */
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /* Look for job with id ID among our tasks */
- static struct bound_task *find_job(unsigned long id)
- {
- struct bound_task *t;
- for (t = tasks; t; t = t->next)
- if (t->id == id)
- return t;
- return NULL;
- }
- /* Job J depends on previous job of id ID (which is already finished) */
- void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j, unsigned long id)
- {
- struct bound_task *t, *dep_t;
- int i;
- if (!_starpu_bound_recording || !recorddeps)
- return;
- if (!good_job(j))
- return;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- /* Re-check, this time with mutex held */
- if (!_starpu_bound_recording || !recorddeps)
- {
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- new_task(j);
- dep_t = find_job(id);
- if (!dep_t)
- {
- fprintf(stderr,"dependency %lu not found !\n", id);
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- t = j->bound_task;
- for (i = 0; i < t->depsn; i++)
- if (t->deps[i].dep == dep_t)
- {
- /* Found, just add size */
- t->deps[i].size += _starpu_data_get_size(handle);
- break;
- }
- if (i == t->depsn)
- {
- /* Not already there, add */
- t->deps = (struct task_dep *) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
- t->deps[t->depsn-1].dep = dep_t;
- t->deps[t->depsn-1].size = _starpu_data_get_size(handle);
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- void starpu_bound_stop(void)
- {
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- _starpu_bound_recording = 0;
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /* Compute all tasks times on all workers */
- static void _starpu_get_tasks_times(int nw, int nt, double *times)
- {
- struct bound_task_pool *tp;
- int w, t;
- for (w = 0; w < nw; w++)
- {
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- struct _starpu_job j =
- {
- .footprint = tp->footprint,
- .footprint_is_computed = 1,
- };
- struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
- double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
- if (isnan(length))
- times[w*nt+t] = NAN;
- else
- times[w*nt+t] = length / 1000.;
- }
- }
- }
- /* Return whether PARENT is an ancestor of CHILD */
- static int ancestor(struct bound_task *child, struct bound_task *parent)
- {
- int i;
- for (i = 0; i < child->depsn; i++)
- {
- if (parent == child->deps[i].dep)
- return 1;
- if (ancestor(child->deps[i].dep, parent))
- return -1;
- }
- return 0;
- }
- /* Print bound recording in .dot format */
- void starpu_bound_print_dot(FILE *output)
- {
- struct bound_task *t;
- struct bound_tag_dep *td;
- int i;
- if (!recorddeps)
- {
- fprintf(output, "Not supported\n");
- return;
- }
- fprintf(output, "strict digraph bounddeps {\n");
- for (t = tasks; t; t = t->next)
- {
- fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl));
- for (i = 0; i < t->depsn; i++)
- fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i].dep->id, t->id);
- }
- for (td = tag_deps; td; td = td->next)
- fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
- fprintf(output, "}\n");
- }
- /*
- * Print bound system in lp_solve format
- *
- * When dependencies are enabled, you can check the set of tasks and deps that
- * were recorded by using tools/lp2paje and vite.
- */
- void starpu_bound_print_lp(FILE *output)
- {
- int nt; /* Number of different kinds of tasks */
- int nw; /* Number of different workers */
- int t;
- int w, w2; /* worker */
- unsigned n, n2;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- nw = starpu_worker_get_count();
- if (!nw)
- /* Make llvm happy about the VLA below */
- return;
- if (recorddeps)
- {
- struct bound_task *t1, *t2;
- struct bound_tag_dep *td;
- int i;
- nt = 0;
- for (t1 = tasks; t1; t1 = t1->next)
- {
- if (t1->cl->model->type != STARPU_HISTORY_BASED &&
- t1->cl->model->type != STARPU_NL_REGRESSION_BASED)
- /* TODO: */
- fprintf(stderr, "Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
- struct _starpu_job j =
- {
- .footprint = t1->footprint,
- .footprint_is_computed = 1,
- };
- for (w = 0; w < nw; w++)
- {
- struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
- if (_STARPU_IS_ZERO(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
- {
- double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
- if (isnan(length))
- /* Avoid problems with binary coding of doubles */
- t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = NAN;
- else
- t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = length / 1000.;
- }
- }
- nt++;
- }
- if (!nt)
- return;
- fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
- fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
- fprintf(output, "/* We want to minimize total execution time (ms) */\n");
- fprintf(output, "min: tmax;\n\n");
- fprintf(output, "/* Number of tasks */\n");
- fprintf(output, "nt = %d;\n", nt);
- fprintf(output, "/* Number of workers */\n");
- fprintf(output, "nw = %d;\n", nw);
- fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- fprintf(output, "c%lu <= tmax;\n", t1->id);
- fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- {
- for (w = 0; w < nw; w++)
- {
- struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
- if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
- fprintf(output, " +t%luw%d", t1->id, w);
- }
- fprintf(output, " = 1;\n");
- }
- fprintf(output, "\n/* Completion time is start time plus computation time */\n");
- fprintf(output, "/* According to where the task is indeed executed */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- {
- fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id);
- for (w = 0; w < nw; w++)
- {
- struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
- if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
- fprintf(output, " + %f t%luw%d", t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores], t1->id, w);
- }
- fprintf(output, ";\n");
- }
- fprintf(output, "\n/* Each task starts after all its task dependencies finish and data is transferred. */\n");
- fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- for (i = 0; i < t1->depsn; i++)
- {
- fprintf(output, "/* %lu bytes transferred */\n", (unsigned long) t1->deps[i].size);
- fprintf(output, "s%lu >= c%lu", t1->id, t1->deps[i].dep->id);
- /* Transfer time: pick up one source node and a worker on it */
- for (n = 0; n < starpu_memory_nodes_get_count(); n++)
- for (w = 0; w < nw; w++)
- if (starpu_worker_get_memory_node(w) == n)
- {
- /* pick up another destination node and a worker on it */
- for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
- if (n2 != n)
- {
- for (w2 = 0; w2 < nw; w2++)
- if (starpu_worker_get_memory_node(w2) == n2)
- {
- /* If predecessor is on worker w and successor
- * on worker w2 on different nodes, we need to
- * transfer the data. */
- fprintf(output, " + d_t%luw%ut%luw%u", t1->deps[i].dep->id, w, t1->id, w2);
- }
- }
- }
- fprintf(output, ";\n");
- /* Transfer time: pick up one source node and a worker on it */
- for (n = 0; n < starpu_memory_nodes_get_count(); n++)
- for (w = 0; w < nw; w++)
- if (starpu_worker_get_memory_node(w) == n)
- {
- /* pick up another destination node and a worker on it */
- for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
- if (n2 != n)
- {
- for (w2 = 0; w2 < nw; w2++)
- if (starpu_worker_get_memory_node(w2) == n2)
- {
- /* The data transfer is at least 0ms */
- fprintf(output, "d_t%luw%ut%luw%u >= 0;\n", t1->deps[i].dep->id, w, t1->id, w2);
- /* The data transfer from w to w2 only happens if tasks run there */
- fprintf(output, "d_t%luw%ut%luw%u >= %f - 2e5 + 1e5 t%luw%u + 1e5 t%luw%u;\n",
- t1->deps[i].dep->id, w, t1->id, w2,
- starpu_transfer_predict(n, n2, t1->deps[i].size)/1000.,
- t1->deps[i].dep->id, w, t1->id, w2);
- }
- }
- }
- }
- fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- if (t1->use_tag)
- {
- for (w = 0; w < nw; w++)
- fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
- }
- fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
- for (td = tag_deps; td; td = td->next)
- fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
- /* TODO: factorize ancestor calls */
- fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
- fprintf(output, " one is started after the other's completion */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- {
- for (t2 = t1->next; t2; t2 = t2->next)
- {
- if (!ancestor(t1, t2) && !ancestor(t2, t1))
- {
- for (w = 0; w < nw; w++)
- {
- struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
- if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
- {
- fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
- t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
- fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
- t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
- }
- }
- }
- }
- }
- #if 0
- /* Doesn't help at all to actually express what "after" means */
- for (t1 = tasks; t1; t1 = t1->next)
- for (t2 = t1->next; t2; t2 = t2->next)
- if (!ancestor(t1, t2) && !ancestor(t2, t1))
- {
- fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
- fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
- }
- #endif
- if (recordprio)
- {
- fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
- fprintf(output, " * more prioritized first */\n");
- for (t1 = tasks; t1; t1 = t1->next)
- {
- for (t2 = t1->next; t2; t2 = t2->next)
- {
- if (!ancestor(t1, t2) && !ancestor(t2, t1)
- && t1->priority != t2->priority)
- {
- if (t1->priority > t2->priority)
- {
- /* Either t2 is scheduled before t1, but then it
- needs to be scheduled before some t dep finishes */
- /* One of the t1 deps to give the maximum start time for t2 */
- if (t1->depsn > 1)
- {
- for (i = 0; i < t1->depsn; i++)
- fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
- fprintf(output, " = 1;\n");
- }
- for (i = 0; i < t1->depsn; i++)
- {
- fprintf(output, "c%lu - s%lu >= ", t1->deps[i].dep->id, t2->id);
- if (t1->depsn > 1)
- /* Only checks this when it's this dependency that is chosen */
- fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
- else
- fprintf(output, "-1e5");
- /* Only check this if t1 is after t2 */
- fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
- fprintf(output, ";\n");
- }
- /* Or t2 is scheduled after t1 is. */
- fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
- }
- else
- {
- /* Either t1 is scheduled before t2, but then it
- needs to be scheduled before some t2 dep finishes */
- /* One of the t2 deps to give the maximum start time for t1 */
- if (t2->depsn > 1)
- {
- for (i = 0; i < t2->depsn; i++)
- fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
- fprintf(output, " = 1;\n");
- }
- for (i = 0; i < t2->depsn; i++)
- {
- fprintf(output, "c%lu - s%lu >= ", t2->deps[i].dep->id, t1->id);
- if (t2->depsn > 1)
- /* Only checks this when it's this dependency that is chosen */
- fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
- /* Only check this if t2 is after t1 */
- fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
- }
- /* Or t1 is scheduled after t2 is. */
- fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
- }
- }
- }
- }
- }
- for (t1 = tasks; t1; t1 = t1->next)
- for (t2 = t1->next; t2; t2 = t2->next)
- if (!ancestor(t1, t2) && !ancestor(t2, t1))
- {
- fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
- if (recordprio && t1->priority != t2->priority)
- {
- if (t1->priority > t2->priority)
- {
- if (t1->depsn > 1)
- for (i = 0; i < t1->depsn; i++)
- fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
- }
- else
- {
- if (t2->depsn > 1)
- for (i = 0; i < t2->depsn; i++)
- fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
- }
- }
- }
- for (t1 = tasks; t1; t1 = t1->next)
- for (w = 0; w < nw; w++)
- fprintf(output, "bin t%luw%d;\n", t1->id, w);
- }
- else
- {
- struct bound_task_pool *tp;
- nt = 0;
- for (tp = task_pools; tp; tp = tp->next)
- nt++;
- if (!nt)
- return;
- {
- double times[nw*nt];
- _starpu_get_tasks_times(nw, nt, times);
- fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
- fprintf(output, "/* We want to minimize total execution time (ms) */\n");
- fprintf(output, "min: tmax;\n\n");
- fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
- for (w = 0; w < nw; w++)
- {
- char name[32];
- starpu_worker_get_name(w, name, sizeof(name));
- fprintf(output, "/* worker %s */\n0", name);
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- if (!isnan(times[w*nt+t]))
- fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
- }
- fprintf(output, " <= tmax;\n");
- }
- fprintf(output, "\n");
- fprintf(output, "/* And we have to have computed exactly all tasks */\n");
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- int got_one = 0;
- fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
- for (w = 0; w < nw; w++)
- {
- if (isnan(times[w*nt+t]))
- fprintf(stderr, "Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
- else
- {
- got_one = 1;
- fprintf(output, "\t+w%dt%dn", w, t);
- }
- }
- fprintf(output, " = %lu;\n", tp->n);
- if (!got_one)
- fprintf(stderr, "Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
- /* Show actual values */
- fprintf(output, "/*");
- for (w = 0; w < nw; w++)
- fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
- fprintf(output, "\t*/\n\n");
- }
- fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
- fprintf(output, "/* int ");
- int first = 1;
- for (w = 0; w < nw; w++)
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- if (!first)
- fprintf(output, ",");
- else
- first = 0;
- fprintf(output, "w%dt%dn", w, t);
- }
- fprintf(output, "; */\n");
- }
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /*
- * Print bound system in MPS output format
- */
- void starpu_bound_print_mps(FILE *output)
- {
- struct bound_task_pool * tp;
- int nt; /* Number of different kinds of tasks */
- int nw; /* Number of different workers */
- int t, w;
- if (recorddeps)
- {
- fprintf(output, "Not supported\n");
- return;
- }
- nw = starpu_worker_get_count();
- if (!nw)
- /* Make llvm happy about the VLA below */
- return;
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- nt = 0;
- for (tp = task_pools; tp; tp = tp->next)
- nt++;
- if (!nt)
- {
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- return;
- }
- {
- double times[nw*nt];
- _starpu_get_tasks_times(nw, nt, times);
- fprintf(output, "NAME StarPU theoretical bound\n");
- fprintf(output, "*\nROWS\n");
- fprintf(output, "* We want to minimize total execution time (ms)\n");
- fprintf(output, " N TMAX\n");
- fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
- for (w = 0; w < nw; w++)
- {
- char name[32];
- starpu_worker_get_name(w, name, sizeof(name));
- fprintf(output, "* worker %s\n", name);
- fprintf(output, " L W%d\n", w);
- }
- fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
- fprintf(output, " E T%d\n", t);
- }
- fprintf(output, "*\nCOLUMNS\n*\n");
- fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
- for (w = 0; w < nw; w++)
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- if (!isnan(times[w*nt+t]))
- {
- char name[9];
- snprintf(name, sizeof(name), "W%dT%d", w, t);
- fprintf(output," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
- fprintf(output," %-8s T%-7d %12d\n", name, t, 1);
- }
- fprintf(output, "*\n* Total execution time\n*\n");
- for (w = 0; w < nw; w++)
- fprintf(output," TMAX W%-2d %12d\n", w, -1);
- fprintf(output," TMAX TMAX %12d\n", 1);
- fprintf(output, "*\nRHS\n*\n");
- fprintf(output, "*\n* Total number of tasks\n*\n");
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- fprintf(output," NT%-2d T%-7d %12lu\n", t, t, tp->n);
- fprintf(output, "ENDATA\n");
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- }
- /*
- * Solve bound system thanks to GNU Linear Programming Kit backend
- */
- #ifdef STARPU_HAVE_GLPK_H
- static glp_prob *_starpu_bound_glp_resolve(int integer)
- {
- struct bound_task_pool * tp;
- int nt; /* Number of different kinds of tasks */
- int nw; /* Number of different workers */
- int t, w;
- glp_prob *lp;
- int ret;
- nw = starpu_worker_get_count();
- if (!nw)
- /* Make llvm happy about the VLA below */
- return NULL;
- nt = 0;
- for (tp = task_pools; tp; tp = tp->next)
- nt++;
- if (!nt)
- return NULL;
- lp = glp_create_prob();
- glp_set_prob_name(lp, "StarPU theoretical bound");
- glp_set_obj_dir(lp, GLP_MIN);
- glp_set_obj_name(lp, "total execution time");
- {
- double times[nw*nt];
- int ne =
- nw * (nt+1) /* worker execution time */
- + nt * nw
- + 1; /* glp dumbness */
- int n = 1;
- int ia[ne], ja[ne];
- double ar[ne];
- _starpu_get_tasks_times(nw, nt, times);
- /* Variables: number of tasks i assigned to worker j, and tmax */
- glp_add_cols(lp, nw*nt+1);
- #define colnum(w, t) ((t)*nw+(w)+1)
- glp_set_obj_coef(lp, nw*nt+1, 1.);
- for (w = 0; w < nw; w++)
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- char name[32];
- snprintf(name, sizeof(name), "w%dt%dn", w, t);
- glp_set_col_name(lp, colnum(w, t), name);
- if (integer)
- glp_set_col_kind(lp, colnum(w, t), GLP_IV);
- glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
- }
- glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
- /* Total worker execution time */
- glp_add_rows(lp, nw);
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- int someone = 0;
- for (w = 0; w < nw; w++)
- if (!isnan(times[w*nt+t]))
- someone = 1;
- if (!someone)
- {
- /* This task does not have any performance model at all, abort */
- glp_delete_prob(lp);
- return NULL;
- }
- }
- for (w = 0; w < nw; w++)
- {
- char name[32], title[64];
- starpu_worker_get_name(w, name, sizeof(name));
- snprintf(title, sizeof(title), "worker %s", name);
- glp_set_row_name(lp, w+1, title);
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- ia[n] = w+1;
- ja[n] = colnum(w, t);
- if (isnan(times[w*nt+t]))
- ar[n] = 1000000000.;
- else
- ar[n] = times[w*nt+t];
- n++;
- }
- /* tmax */
- ia[n] = w+1;
- ja[n] = nw*nt+1;
- ar[n] = -1;
- n++;
- glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
- }
- /* Total task completion */
- glp_add_rows(lp, nt);
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- char name[32], title[64];
- starpu_worker_get_name(w, name, sizeof(name));
- snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
- glp_set_row_name(lp, nw+t+1, title);
- for (w = 0; w < nw; w++)
- {
- ia[n] = nw+t+1;
- ja[n] = colnum(w, t);
- ar[n] = 1;
- n++;
- }
- glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
- }
- STARPU_ASSERT(n == ne);
- glp_load_matrix(lp, ne-1, ia, ja, ar);
- }
- glp_smcp parm;
- glp_init_smcp(&parm);
- parm.msg_lev = GLP_MSG_OFF;
- ret = glp_simplex(lp, &parm);
- if (ret)
- {
- glp_delete_prob(lp);
- lp = NULL;
- return NULL;
- }
- if (integer)
- {
- glp_iocp iocp;
- glp_init_iocp(&iocp);
- iocp.msg_lev = GLP_MSG_OFF;
- glp_intopt(lp, &iocp);
- }
- return lp;
- }
- #endif /* STARPU_HAVE_GLPK_H */
- /* Print the computed bound as well as the optimized distribution of tasks */
- void starpu_bound_print(FILE *output, int integer STARPU_ATTRIBUTE_UNUSED)
- {
- #ifdef STARPU_HAVE_GLPK_H
- if (recorddeps)
- {
- fprintf(output, "Not supported\n");
- return;
- }
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- glp_prob *lp = _starpu_bound_glp_resolve(integer);
- if (lp)
- {
- struct bound_task_pool * tp;
- int t, w;
- int nw; /* Number of different workers */
- double tmax;
- nw = starpu_worker_get_count();
- if (integer)
- tmax = glp_mip_obj_val(lp);
- else
- tmax = glp_get_obj_val(lp);
- fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
- {
- fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
- for (w = 0; w < nw; w++)
- if (integer)
- fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
- else
- fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
- fprintf(output, "\n");
- }
- glp_delete_prob(lp);
- }
- else
- {
- fprintf(stderr, "Simplex failed\n");
- }
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- #else /* STARPU_HAVE_GLPK_H */
- fprintf(output, "Please rebuild StarPU with glpk installed.\n");
- #endif /* STARPU_HAVE_GLPK_H */
- }
- /* Compute and return the bound */
- void starpu_bound_compute(double *res, double *integer_res STARPU_ATTRIBUTE_UNUSED, int integer STARPU_ATTRIBUTE_UNUSED)
- {
- #ifdef STARPU_HAVE_GLPK_H
- double ret;
- if (recorddeps)
- {
- *res = 0.;
- return;
- }
- STARPU_PTHREAD_MUTEX_LOCK(&mutex);
- glp_prob *lp = _starpu_bound_glp_resolve(integer);
- if (lp)
- {
- ret = glp_get_obj_val(lp);
- if (integer)
- *integer_res = glp_mip_obj_val(lp);
- glp_delete_prob(lp);
- }
- else
- ret = 0.;
- STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
- *res = ret;
- #else /* STARPU_HAVE_GLPK_H */
- *res = 0.;
- #endif /* STARPU_HAVE_GLPK_H */
- }
|