@c -*-texinfo-*-

@c This file is part of the StarPU Handbook.
@c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
@c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
@c See the file starpu.texi for copying conditions.

@menu
* Defining a new data interface::  
* Multiformat Data Interface::  
* Task Bundles::                
* Task Lists::                  
* Defining a new scheduling policy::  
* Expert mode::                 
@end menu

@node Defining a new data interface
@section Defining a new data interface

@menu
* Data Interface API::  Data Interface API
* An example of data interface::        An example of data interface
@end menu

@node Data Interface API
@subsection Data Interface API

@deftp {Data Type} {struct starpu_data_interface_ops}
@anchor{struct starpu_data_interface_ops}
Defines the per-interface methods.
@table @asis
@item @code{int @{ram,cuda,opencl,spu@}_to_@{ram,cuda,opencl,spu@}(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);}
These sixteen functions define how to copy data from the @var{src_interface}
interface on the @var{src_node} node to the @var{dst_interface} interface
on the @var{dst_node} node. They return 0 on success.
@item @code{int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (in RAM) to the @var{dst_interface} interface on the
@var{dst_node} node (on a CUDA device), using the given @var{stream}. Return 0
on success.
@item @code{int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on the
@var{dst_node} node (in RAM), using the given @var{stream}. Return 0
on success.
@item @code{int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on
the @var{dst_node} node (on another CUDA device), using the given @var{stream}.
Return 0 on success.
@item @code{int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (in RAM) to the @var{dst_interface} interface on the
@var{dst_node} node (on an OpenCL device), using @var{event}, a pointer to a
cl_event. Return 0 on success.
@item @code{int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
on the @var{dst_node} node (in RAM), using the given @var{event}, a pointer to
a cl_event. Return 0 on success.
@item @code{int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);}
Define how to copy data from the @var{src_interface} interface on the
@var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
on the @var{dst_node} node (on another OpenCL device), using the given
@var{event}, a pointer to a cl_event. Return 0 on success.
@end table
@end deftp

@deftp {Data Type} {struct starpu_data_copy_methods}
@table @asis
Per-interface data transfer methods.
@item @code{void (*register_data_handle)(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);}
Register an existing interface into a data handle.
@item @code{starpu_ssize_t (*allocate_data_on_node)(void *data_interface, uint32_t node);}
Allocate data for the interface on a given node.
@item @code{ void (*free_data_on_node)(void *data_interface, uint32_t node);}
Free data of the interface on a given node.
@item @code{ const struct starpu_data_copy_methods *copy_methods;}
ram/cuda/spu/opencl synchronous and asynchronous transfer methods.
@item @code{ void * (*handle_to_pointer)(starpu_data_handle_t handle, uint32_t node);}
Return the current pointer (if any) for the handle on the given node.
@item @code{ size_t (*get_size)(starpu_data_handle_t handle);}
Return an estimation of the size of data, for performance models.
@item @code{ uint32_t (*footprint)(starpu_data_handle_t handle);}
Return a 32bit footprint which characterizes the data size.
@item @code{ int (*compare)(void *data_interface_a, void *data_interface_b);}
Compare the data size of two interfaces.
@item @code{ void (*display)(starpu_data_handle_t handle, FILE *f);}
Dump the sizes of a handle to a file.
@item @code{ int (*convert_to_gordon)(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); }
Convert the data size to the spu size format. If no SPUs are used, this field can be seto NULL.
@item @code{enum starpu_data_interface_id interfaceid;}
An identifier that is unique to each interface.
@item @code{size_t interface_size;}
The size of the interface data descriptor.
@end table
@end deftp

@node An example of data interface
@subsection An example of data interface

TODO
See @code{src/datawizard/interfaces/vector_interface.c} for now.

@node Multiformat Data Interface
@section Multiformat Data Interface

@deftp {Data Type} {struct starpu_multiformat_data_interface_ops}
todo. The different fields are:
@table @asis
@item @code{cpu_elemsize}
the size of each element on CPUs,
@item @code{opencl_elemsize}
the size of each element on OpenCL devices,
@item @code{cuda_elemsize}
the size of each element on CUDA devices,
@item @code{cpu_to_opencl_cl}
pointer to a codelet which converts from CPU to OpenCL
@item @code{opencl_to_cpu_cl}
pointer to a codelet which converts from OpenCL to CPU
@item @code{cpu_to_cuda_cl}
pointer to a codelet which converts from CPU to CUDA
@item @code{cuda_to_cpu_cl}
pointer to a codelet which converts from CUDA to CPU
@end table
@end deftp

@deftypefun void starpu_multiformat_data_register (starpu_data_handle_t *@var{handle}, uint32_t @var{home_node}, void *@var{ptr}, uint32_t @var{nobjects}, struct starpu_multiformat_data_interface_ops *@var{format_ops});
Register a piece of data that can be represented in different ways, depending upon
the processing unit that manipulates it. It allows the programmer, for instance, to
use an array of structures when working on a CPU, and a structure of arrays when
working on a GPU.

@var{nobjects} is the number of elements in the data. @var{format_ops} describes
the format.
@end deftypefun


@node Task Bundles
@section Task Bundles

@deftp {DataType} {struct starpu_task_bundle}
The task bundle structure describes a list of tasks that should be
scheduled together whenever possible. The different fields are:
@table @asis
@item @code{mutex}
Mutex protecting the bundle
@item @code{int previous_workerid}
last worker previously assigned a task from the bundle (-1 if none)
@item @code{struct starpu_task_bundle_entry *list}
list of tasks
@item @code{int destroy}
If this flag is set, the bundle structure is automatically free'd when the bundle is deinitialized.
@item @code{int closed}
Is the bundle closed ?
@end table
@end deftp

@deftypefun void starpu_task_bundle_init ({struct starpu_task_bundle *}@var{bundle})
Initialize a task bundle
@end deftypefun

@deftypefun void starpu_task_bundle_deinit ({struct starpu_task_bundle *}@var{bundle})
Deinitialize a bundle. In case the destroy flag is set, the bundle
structure is freed too.
@end deftypefun

@deftypefun int starpu_task_bundle_insert ({struct starpu_task_bundle *}@var{bundle}, {struct starpu_task *}@var{task})
Insert a task into a bundle.
@end deftypefun

@deftypefun int starpu_task_bundle_remove ({struct starpu_task_bundle *}@var{bundle}, {struct starpu_task *}@var{task})
Remove a task from a bundle. This method must be called with
bundle->mutex hold. This function returns 0 if the task was found,
-ENOENT if the element was not found, 1 if the element is found and if
the list was deinitialized because it became empty.
@end deftypefun

@deftypefun void starpu_task_bundle_close ({struct starpu_task_bundle *}@var{bundle});
Close a bundle. No task can be added to a closed bundle. A closed
bundle automatically gets deinitialized when it becomes empty.
@end deftypefun

@deftypefun double starpu_task_bundle_expected_length ({struct starpu_task_bundle *}@var{bundle}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
Return the expected duration of the entire task bundle in µs.
@end deftypefun

@deftypefun double starpu_task_bundle_expected_data_transfer_time ({struct starpu_task_bundle *}@var{bundle}, unsigned {memory_node})
Return the time (in µs) expected to transfer all data used within the bundle
@end deftypefun

@deftypefun double starpu_task_bundle_expected_power ({struct starpu_task_bundle *}@var{bundle},  {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
Return the expected power consumption of the entire task bundle in J.
@end deftypefun

@node Task Lists
@section Task Lists

@deftp {Data Type} {struct starpu_task_list}
Stores a double-chained list of tasks
@end deftp

@deftypefun void starpu_task_list_init ({struct starpu_task_list *}@var{list})
Initialize a list structure
@end deftypefun

@deftypefun void starpu_task_list_push_front ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
Push a task at the front of a list
@end deftypefun

@deftypefun void starpu_task_list_push_back ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
Push a task at the back of a list
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_front ({struct starpu_task_list *}@var{list})
Get the front of the list (without removing it)
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_back ({struct starpu_task_list *}@var{list})
Get the back of the list (without removing it)
@end deftypefun

@deftypefun int starpu_task_list_empty ({struct starpu_task_list *}@var{list})
Test if a list is empty
@end deftypefun

@deftypefun void starpu_task_list_erase ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
Remove an element from the list
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_pop_front ({struct starpu_task_list *}@var{list})
Remove the element at the front of the list
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_pop_back ({struct starpu_task_list *}@var{list})
Remove the element at the back of the list
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_begin ({struct starpu_task_list *}@var{list})
Get the first task of the list.
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_end ({struct starpu_task_list *}@var{list})
Get the end of the list.
@end deftypefun

@deftypefun {struct starpu_task *} starpu_task_list_next ({struct starpu_task *}@var{task})
Get the next task of the list. This is not erase-safe.
@end deftypefun

@node Defining a new scheduling policy
@section Defining a new scheduling policy

TODO

A full example showing how to define a new scheduling policy is available in
the StarPU sources in the directory @code{examples/scheduler/}.

@menu
* Scheduling Policy API:: Scheduling Policy API
* Source code::
@end menu

@node Scheduling Policy API
@subsection Scheduling Policy API

@deftp {Data Type} {struct starpu_sched_policy}
This structure contains all the methods that implement a scheduling policy.  An
application may specify which scheduling strategy in the @code{sched_policy}
field of the @code{starpu_conf} structure passed to the @code{starpu_init}
function. The different fields are:
@table @asis
@item @code{init_sched}
Initialize the scheduling policy.
@item @code{deinit_sched}
Cleanup the scheduling policy.
@item @code{push_task}
Insert a task into the scheduler.
@item @code{push_task_notify}
Notify the scheduler that a task was pushed on a given worker. This method is
called when a task that was explicitely assigned to a worker becomes ready and
is about to be executed by the worker. This method therefore permits to keep
the state of of the scheduler coherent even when StarPU bypasses the scheduling
strategy.
@item @code{pop_task} (optional)
Get a task from the scheduler. The mutex associated to the worker is already
taken when this method is called. If this method is defined as @code{NULL}, the
worker will only execute tasks from its local queue. In this case, the
@code{push_task} method should use the @code{starpu_push_local_task} method to
assign tasks to the different workers.
@item @code{pop_every_task}
Remove all available tasks from the scheduler (tasks are chained by the means
of the prev and next fields of the starpu_task structure). The mutex associated
to the worker is already taken when this method is called. This is currently
only used by the Gordon driver.
@item @code{post_exec_hook} (optional)
This method is called every time a task has been executed.
@item @code{policy_name}
Name of the policy (optional).
@item @code{policy_description}
Description of the policy (optional).
@end table
@end deftp

@deftypefun void starpu_worker_set_sched_condition (int @var{workerid}, pthread_cond_t *@var{sched_cond}, pthread_mutex_t *@var{sched_mutex})
This function specifies the condition variable associated to a worker
When there is no available task for a worker, StarPU blocks this worker on a
condition variable. This function specifies which condition variable (and the
associated mutex) should be used to block (and to wake up) a worker. Note that
multiple workers may use the same condition variable. For instance, in the case
of a scheduling strategy with a single task queue, the same condition variable
would be used to block and wake up all workers.
The initialization method of a scheduling strategy (@code{init_sched}) must
call this function once per worker.
@end deftypefun

@deftypefun void starpu_sched_set_min_priority (int @var{min_prio})
Defines the minimum priority level supported by the scheduling policy. The
default minimum priority level is the same as the default priority level which
is 0 by convention.  The application may access that value by calling the
@code{starpu_sched_get_min_priority} function. This function should only be
called from the initialization method of the scheduling policy, and should not
be used directly from the application.
@end deftypefun

@deftypefun void starpu_sched_set_max_priority (int @var{max_prio})
Defines the maximum priority level supported by the scheduling policy. The
default maximum priority level is 1.  The application may access that value by
calling the @code{starpu_sched_get_max_priority} function. This function should
only be called from the initialization method of the scheduling policy, and
should not be used directly from the application.
@end deftypefun

@deftypefun int starpu_sched_get_min_priority (void)
Returns the current minimum priority level supported by the
scheduling policy
@end deftypefun

@deftypefun int starpu_sched_get_max_priority (void)
Returns the current maximum priority level supported by the
scheduling policy
@end deftypefun

@deftypefun int starpu_push_local_task (int @var{workerid}, {struct starpu_task} *@var{task}, int @var{back})
The scheduling policy may put tasks directly into a worker's local queue so
that it is not always necessary to create its own queue when the local queue
is sufficient. If @var{back} not null, @var{task} is put at the back of the queue
where the worker will pop tasks first. Setting @var{back} to 0 therefore ensures
a FIFO ordering.
@end deftypefun

@deftypefun int starpu_worker_may_run_task (unsigned @var{workerid}, {struct starpu_task *}@var{task}, unsigned {nimpl})
Check if the worker specified by workerid can execute the codelet. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute.
@end deftypefun

@deftypefun double starpu_timing_now (void)
Return the current date in µs
@end deftypefun

@deftypefun double starpu_task_expected_length ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
Returns expected task duration in µs
@end deftypefun

@deftypefun double starpu_worker_get_relative_speedup ({enum starpu_perf_archtype} @var{perf_archtype})
Returns an estimated speedup factor relative to CPU speed
@end deftypefun

@deftypefun double starpu_task_expected_data_transfer_time (uint32_t @var{memory_node}, {struct starpu_task *}@var{task})
Returns expected data transfer time in µs
@end deftypefun

@deftypefun double starpu_data_expected_transfer_time (starpu_data_handle_t @var{handle}, unsigned @var{memory_node}, {enum starpu_access_mode} @var{mode})
Predict the transfer time (in µs) to move a handle to a memory node
@end deftypefun

@deftypefun double starpu_task_expected_power ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
Returns expected power consumption in J
@end deftypefun

@deftypefun double starpu_task_expected_conversion_time ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned {nimpl})
Returns expected conversion time in ms (multiformat interface only)
@end deftypefun

@node Source code
@subsection Source code

@cartouche
@smallexample
static struct starpu_sched_policy dummy_sched_policy = @{
    .init_sched = init_dummy_sched,
    .deinit_sched = deinit_dummy_sched,
    .push_task = push_task_dummy,
    .push_prio_task = NULL,
    .pop_task = pop_task_dummy,
    .post_exec_hook = NULL,
    .pop_every_task = NULL,
    .policy_name = "dummy",
    .policy_description = "dummy scheduling strategy"
@};
@end smallexample
@end cartouche

@node Expert mode
@section Expert mode

@deftypefun void starpu_wake_all_blocked_workers (void)
todo
@end deftypefun

@deftypefun int starpu_progression_hook_register (unsigned (*@var{func})(void *arg), void *@var{arg})
todo
@end deftypefun

@deftypefun void starpu_progression_hook_deregister (int @var{hook_id})
todo
@end deftypefun