6 年之前 · da69ff5021
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -63,6 +63,10 @@ also takes into account priorities.
 
				 The <b>prio</b> scheduler also uses a central task queue, but sorts tasks by
			
 
				 priority specified by the programmer (between -5 and 5).
			
 
				 
			
 
				+The <b>heteroprio</b> scheduler uses different priorities for the different processing units.
			
 
				+This scheduler must be configured to work correclty and to expect high-performance
			
 
				+as described in the corresponding section.
			
 
				+
			
 
				 \section DMTaskSchedulingPolicy Performance Model-Based Task Scheduling Policies
			
 
				 
			
 
				 If (<b>and only if</b>) your application <b>codelets have performance models</b> (\ref
			
@@ -255,4 +259,75 @@ contexts, otherwise the workers' corresponding scheduling structures may not be
 
				 the execution of the application may deadlock. Moreover, the hypervisor should not be used when
			
 
				 statically scheduling tasks.
			
 
				 
			
 
				+\section Configuring Heteroprio
			
 
				+
			
 
				+Within Heteroprio, one priority per processing unit type is assigned  to each task, such that a task has several
			
 
				+priorities. Each worker pops the task that has the highest priority for the hardware type it uses, which
			
 
				+could be CPU or CUDA for example. Therefore, the priorities has to be used to manage the critical path,
			
 
				+but also to promote the consumption of tasks by the more appropriate workers.
			
 
				+
			
 
				+The tasks are stored inside buckets, where each bucket corresponds to a priority set. Then each
			
 
				+worker uses an indirect access array to know the order in which it should access the buckets. Moreover,
			
 
				+all the tasks inside a bucket must be compatible with all the processing units that may access it (at least).
			
 
				+
			
 
				+As an example, see the following code where we have 5 types of tasks.
			
 
				+CPU workers can compute all of them, but CUDA workers can only execute
			
 
				+tasks of types 0 and 1, and is expected to go 20 and 30 time
			
 
				+faster than the CPU, respectively.
			
 
				+\code{.c}
			
 
				+// In the file that init StarPU
			
 
				+#include <starpu_heteroprio.h>
			
 
				+
			
 
				+////////////////////////////////////////////////////
			
 
				+
			
 
				+// Before calling starpu_init
			
 
				+struct starpu_conf conf;
			
 
				+starpu_conf_init(&conf);
			
 
				+// Inform StarPU to use Heteroprio
			
 
				+conf.sched_policy_name = "heteroprio";
			
 
				+// Inform StarPU about the function that will init the priorities in Heteroprio
			
 
				+// where init_heteroprio is a function to implement
			
 
				+conf.sched_policy_init = &init_heteroprio;
			
 
				+// Do other things with conf if needed, then init StarPU
			
 
				+starpu_init(&conf);
			
 
				+
			
 
				+////////////////////////////////////////////////////
			
 
				+
			
 
				+void init_heteroprio(unsigned sched_ctx) {
			
 
				+  // CPU uses 5 buckets and visits them in the natural order
			
 
				+  starpu_heteroprio_set_nb_prios(ctx, STARPU_CPU_IDX, 5);
			
 
				+  // It uses direct mapping idx => idx
			
 
				+  for(unsigned idx = 0; idx < 5; ++idx){
			
 
				+    starpu_heteroprio_set_mapping(ctx, STARPU_CPU_IDX, idx, idx);
			
 
				+    // If there is no CUDA worker we must tell that CPU is faster
			
 
				+    starpu_heteroprio_set_faster_arch(ctx, STARPU_CPU_IDX, idx);
			
 
				+  }
			
 
				+  
			
 
				+  if(starpu_cuda_worker_get_count()){
			
 
				+    // CUDA is enabled and uses 2 buckets
			
 
				+    starpu_heteroprio_set_nb_prios(ctx, STARPU_CUDA_IDX, 2);
			
 
				+    // CUDA will first look at bucket 1
			
 
				+    starpu_heteroprio_set_mapping(ctx, STARPU_CUDA_IDX, 0, 1);
			
 
				+    // CUDA will then look at bucket 2
			
 
				+    starpu_heteroprio_set_mapping(ctx, STARPU_CUDA_IDX, 1, 2);
			
 
				+
			
 
				+    // For bucket 1 CUDA is the fastest
			
 
				+    starpu_heteroprio_set_faster_arch(ctx, STARPU_CUDA_IDX, 1);
			
 
				+    // And CPU is 30 times slower
			
 
				+    starpu_heteroprio_set_arch_slow_factor(ctx, STARPU_CPU_IDX, 1, 30.0f);
			
 
				+    
			
 
				+    // For bucket 0 CUDA is the fastest
			
 
				+    starpu_heteroprio_set_faster_arch(ctx, STARPU_CUDA_IDX, 0);
			
 
				+    // And CPU is 20 times slower
			
 
				+    starpu_heteroprio_set_arch_slow_factor(ctx, STARPU_CPU_IDX, 0, 20.0f);
			
 
				+  }
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+Then, when a task is inserted <b>the priority of the task will be used to 
			
 
				+select in which bucket is has to be stored</b>.
			
 
				+So, in the given example, the priority of a task will be between 0 and 4 included.
			
 
				+However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and
			
 
				+tasks of priorities 2-4 must provide CPU kernels (at least).
			
 
				+
			
 
				 */