Browse Source

doc/doxygen: backport updates from branch mic-scc-merge

Nathalie Furmento 12 years ago
parent
commit
f0de9e2164

+ 2 - 0
doc/doxygen/Makefile.am

@@ -72,6 +72,8 @@ chapters =	\
 	chapters/api/mpi.doxy \
 	chapters/api/mpi.doxy \
 	chapters/api/multiformat_data_interface.doxy \
 	chapters/api/multiformat_data_interface.doxy \
 	chapters/api/opencl_extensions.doxy \
 	chapters/api/opencl_extensions.doxy \
+	chapters/api/mic_extensions.doxy \
+	chapters/api/scc_extensions.doxy \
 	chapters/api/parallel_tasks.doxy \
 	chapters/api/parallel_tasks.doxy \
 	chapters/api/performance_model.doxy \
 	chapters/api/performance_model.doxy \
 	chapters/api/profiling.doxy \
 	chapters/api/profiling.doxy \

+ 12 - 3
doc/doxygen/chapters/advanced_examples.doxy

@@ -39,6 +39,7 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 struct starpu_codelet cl = {
 struct starpu_codelet cl = {
     .where = STARPU_CPU,
     .where = STARPU_CPU,
     .cpu_funcs = { scal_cpu_func, scal_sse_func, NULL },
     .cpu_funcs = { scal_cpu_func, scal_sse_func, NULL },
+    .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func", NULL },
     .nbuffers = 1,
     .nbuffers = 1,
     .modes = { STARPU_RW }
     .modes = { STARPU_RW }
 };
 };
@@ -75,6 +76,7 @@ struct starpu_codelet cl = {
     .where = STARPU_CPU|STARPU_CUDA,
     .where = STARPU_CPU|STARPU_CUDA,
     .can_execute = can_execute,
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func, NULL },
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .cuda_funcs = { gpu_func, NULL }
     .cuda_funcs = { gpu_func, NULL }
     .nbuffers = 1,
     .nbuffers = 1,
     .modes = { STARPU_RW }
     .modes = { STARPU_RW }
@@ -120,6 +122,7 @@ struct starpu_codelet cl = {
     .where = STARPU_CPU|STARPU_CUDA,
     .where = STARPU_CPU|STARPU_CUDA,
     .can_execute = can_execute,
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func, NULL },
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .cuda_funcs = { scal_gpu_13, scal_gpu_20, NULL },
     .cuda_funcs = { scal_gpu_13, scal_gpu_20, NULL },
     .nbuffers = 1,
     .nbuffers = 1,
     .modes = { STARPU_RW }
     .modes = { STARPU_RW }
@@ -317,6 +320,7 @@ static struct starpu_perfmodel mult_perf_model = {
 struct starpu_codelet cl = {
 struct starpu_codelet cl = {
     .where = STARPU_CPU,
     .where = STARPU_CPU,
     .cpu_funcs = { cpu_mult, NULL },
     .cpu_funcs = { cpu_mult, NULL },
+    .cpu_funcs_name = { "cpu_mult", NULL },
     .nbuffers = 3,
     .nbuffers = 3,
     .modes = { STARPU_R, STARPU_R, STARPU_W },
     .modes = { STARPU_R, STARPU_R, STARPU_W },
     /* for the scheduling policy to be able to use performance models */
     /* for the scheduling policy to be able to use performance models */
@@ -483,6 +487,7 @@ void func_cpu(void *descr[], void *_args)
 struct starpu_codelet mycodelet = {
 struct starpu_codelet mycodelet = {
         .where = STARPU_CPU,
         .where = STARPU_CPU,
         .cpu_funcs = { func_cpu, NULL },
         .cpu_funcs = { func_cpu, NULL },
+        .cpu_funcs_name = { "func_cpu", NULL },
         .nbuffers = 2,
         .nbuffers = 2,
         .modes = { STARPU_RW, STARPU_RW }
         .modes = { STARPU_RW, STARPU_RW }
 };
 };
@@ -575,6 +580,7 @@ the codelets for initialization and reduction:
 struct starpu_codelet bzero_variable_cl =
 struct starpu_codelet bzero_variable_cl =
 {
 {
         .cpu_funcs = { bzero_variable_cpu, NULL },
         .cpu_funcs = { bzero_variable_cpu, NULL },
+        .cpu_funcs_name = { "bzero_variable_cpu", NULL },
         .cuda_funcs = { bzero_variable_cuda, NULL },
         .cuda_funcs = { bzero_variable_cuda, NULL },
         .nbuffers = 1,
         .nbuffers = 1,
 }
 }
@@ -597,6 +603,7 @@ static void accumulate_variable_cuda(void *descr[], void *cl_arg)
 struct starpu_codelet accumulate_variable_cl =
 struct starpu_codelet accumulate_variable_cl =
 {
 {
         .cpu_funcs = { accumulate_variable_cpu, NULL },
         .cpu_funcs = { accumulate_variable_cpu, NULL },
+        .cpu_funcs_name = { "accumulate_variable_cpu", NULL },
         .cuda_funcs = { accumulate_variable_cuda, NULL },
         .cuda_funcs = { accumulate_variable_cuda, NULL },
         .nbuffers = 1,
         .nbuffers = 1,
 }
 }
@@ -786,6 +793,7 @@ static struct starpu_codelet cl =
     .type = STARPU_SPMD,
     .type = STARPU_SPMD,
     .max_parallelism = INT_MAX,
     .max_parallelism = INT_MAX,
     .cpu_funcs = { func, NULL },
     .cpu_funcs = { func, NULL },
+    .cpu_funcs_name = { "func", NULL },
     .nbuffers = 1,
     .nbuffers = 1,
 }
 }
 \endcode
 \endcode
@@ -1140,9 +1148,10 @@ enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] = {
 
 
 struct starpu_codelet dummy_big_cl =
 struct starpu_codelet dummy_big_cl =
 {
 {
-	.cuda_funcs = {dummy_big_kernel, NULL},
+	.cuda_funcs = { dummy_big_kernel, NULL },
-	.opencl_funcs = {dummy_big_kernel, NULL},
+	.opencl_funcs = { dummy_big_kernel, NULL },
-	.cpu_funcs = {dummy_big_kernel, NULL},
+	.cpu_funcs = { dummy_big_kernel, NULL },
+	.cpu_funcs_name = { "dummy_big_kernel", NULL },
 	.nbuffers = STARPU_NMAXBUFS+1,
 	.nbuffers = STARPU_NMAXBUFS+1,
 	.dyn_modes = modes
 	.dyn_modes = modes
 };
 };

+ 50 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -72,6 +72,16 @@ to specify the codelet may be executed on a CUDA processing unit.
 This macro is used when setting the field starpu_codelet::where to
 This macro is used when setting the field starpu_codelet::where to
 specify the codelet may be executed on a OpenCL processing unit.
 specify the codelet may be executed on a OpenCL processing unit.
 
 
+\def STARPU_MIC
+\ingroup API_Codelet_And_Tasks
+This macro is used when setting the field starpu_codelet::where to
+specify the codelet may be executed on a MIC processing unit.
+
+\def STARPU_SCC
+\ingroup API_Codelet_And_Tasks
+This macro is used when setting the field starpu_codelet::where to
+specify the codelet may be executed on an SCC processing unit.
+
 \def STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
 \def STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
 \deprecated
 \deprecated
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
@@ -178,6 +188,12 @@ starpu_codelet::where is set, then the field starpu_codelet::cpu_funcs
 is ignored if ::STARPU_CPU does not appear in the field
 is ignored if ::STARPU_CPU does not appear in the field
 starpu_codelet::where, it must be non-null otherwise.
 starpu_codelet::where, it must be non-null otherwise.
 
 
+\var starpu_codelet::cpu_funcs_name
+Optional array of strings which provide the name of the CPU functions
+referenced in the array starpu_codelet::cpu_funcs. This can be used
+when running on MIC devices or the SCC platform, for StarPU to simply
+look up the MIC function implementation through its name.
+
 \var starpu_codelet::cuda_funcs
 \var starpu_codelet::cuda_funcs
 Optional array of function pointers to the CUDA implementations of the
 Optional array of function pointers to the CUDA implementations of the
 codelet. It must be terminated by a NULL value. The functions must be
 codelet. It must be terminated by a NULL value. The functions must be
@@ -202,6 +218,32 @@ starpu_codelet::opencl_funcs is ignored if ::STARPU_OPENCL does not
 appear in the field starpu_codelet::where, it must be non-null
 appear in the field starpu_codelet::where, it must be non-null
 otherwise.
 otherwise.
 
 
+\var starpu_codelet::mic_funcs
+Optional array of function pointers to a function which returns the
+MIC implementation of the codelet. It must be terminated by a NULL
+value. The functions prototype must be:
+\code{.c}
+starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl)
+\endcode
+If the field starpu_codelet::where is set, then the field
+starpu_codelet::mic_funcs is ignored if ::STARPU_MIC does not appear
+in the field starpu_codelet::where. It can be null if
+starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+simply make a symbol lookup to get the implementation.
+
+\var starpu_codelet::scc_funcs
+Optional array of function pointers to a function which returns the
+SCC implementation of the codelet. It must be terminated by a NULL value.
+The functions prototype must be:
+\code{.c}
+starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl)
+\endcode
+If the field starpu_codelet::where is set, then the field
+starpu_codelet::scc_funcs is ignored if ::STARPU_SCC does not appear
+in the field starpu_codelet::where. It can be null if
+starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+simply make a symbol lookup to get the implementation.
+
 \var starpu_codelet::nbuffers
 \var starpu_codelet::nbuffers
 Specify the number of arguments taken by the codelet. These arguments
 Specify the number of arguments taken by the codelet. These arguments
 are managed by the DSM and are accessed from the <c>void *buffers[]</c>
 are managed by the DSM and are accessed from the <c>void *buffers[]</c>
@@ -325,6 +367,14 @@ the buffer in local store (LS) instead. This field is ignored for CPU,
 CUDA and OpenCL codelets, where the starpu_task::cl_arg pointer is
 CUDA and OpenCL codelets, where the starpu_task::cl_arg pointer is
 given as such.
 given as such.
 
 
+\var starpu_task::cl_arg_free
+Optional field. In case starpu_task::cl_arg was allocated by the
+application through <c>malloc()</c>, setting starpu_task::cl_arg_free
+to 1 makes StarPU automatically call <c>free(cl_arg)</c> when
+destroying the task. This saves the user from defining a callback just
+for that. This is mostly useful when targetting MIC or SCC, where the
+codelet does not execute in the same memory space as the main thread.
+
 \var starpu_task::callback_func
 \var starpu_task::callback_func
 Optional field, the default value is <c>NULL</c>. This is a function
 Optional field, the default value is <c>NULL</c>. This is a function
 pointer of prototype <c>void (*f)(void *)</c> which specifies a
 pointer of prototype <c>void (*f)(void *)</c> which specifies a

+ 76 - 33
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -54,89 +54,132 @@ provided. It can still be useful to provide more specific method in
 case of e.g. available particular CUDA or OpenCL support.
 case of e.g. available particular CUDA or OpenCL support.
 \ingroup API_Data_Interfaces
 \ingroup API_Data_Interfaces
 \var starpu_data_copy_methods::ram_to_ram
 \var starpu_data_copy_methods::ram_to_ram
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the \p
-src_node CPU node to the dst_interface interface on the dst_node CPU
+src_node CPU node to the \p dst_interface interface on the \p dst_node
-node. Return 0 on success.
+CPU  node. Return 0 on success.
 \var starpu_data_copy_methods::ram_to_cuda
 \var starpu_data_copy_methods::ram_to_cuda
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node CUDA
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
 node. Return 0 on success.
 node. Return 0 on success.
 \var starpu_data_copy_methods::ram_to_opencl
 \var starpu_data_copy_methods::ram_to_opencl
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
 OpenCL node. Return 0 on success.
+
+\var starpu_data_copy_methods::ram_to_mic
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node MIC
+node. Return 0 on success.
+
 \var starpu_data_copy_methods::cuda_to_ram
 \var starpu_data_copy_methods::cuda_to_ram
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
 CPU node. Return 0 on success.
 \var starpu_data_copy_methods::cuda_to_cuda
 \var starpu_data_copy_methods::cuda_to_cuda
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CUDA
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node. Return 0 on success.
 node. Return 0 on success.
 \var starpu_data_copy_methods::cuda_to_opencl
 \var starpu_data_copy_methods::cuda_to_opencl
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
 OpenCL node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_ram
 \var starpu_data_copy_methods::opencl_to_ram
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
 CPU node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_cuda
 \var starpu_data_copy_methods::opencl_to_cuda
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CUDA node. Return 0 on success.
 CUDA node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_opencl
 \var starpu_data_copy_methods::opencl_to_opencl
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
 OpenCL node. Return 0 on success.
 
 
+\var starpu_data_copy_methods::mic_to_ram
+Define how to copy data from the \p src_interface interface on the
+\p src_node MIC node to the \p dst_interface interface on the \p dst_node CPU
+node. Return 0 on success.
+
+\var starpu_data_copy_methods::scc_src_to_sink
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+\var starpu_data_copy_methods::scc_sink_to_src
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+\var starpu_data_copy_methods::scc_sink_to_sink
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+
 \var starpu_data_copy_methods::ram_to_cuda_async
 \var starpu_data_copy_methods::ram_to_cuda_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node CUDA
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 some transfers are still ongoing and should be awaited for by the core.
 \var starpu_data_copy_methods::cuda_to_ram_async
 \var starpu_data_copy_methods::cuda_to_ram_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CPU
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU
 node, using the given stream. Must return 0 if the transfer was
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 some transfers are still ongoing and should be awaited for by the core.
 \var starpu_data_copy_methods::cuda_to_cuda_async
 \var starpu_data_copy_methods::cuda_to_cuda_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CUDA
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 some transfers are still ongoing and should be awaited for by the core.
 
 
 \var starpu_data_copy_methods::ram_to_opencl_async
 \var starpu_data_copy_methods::ram_to_opencl_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 of the last submitted transfer. Must return 0 if the transfer was
 of the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 some transfers are still ongoing and should be awaited for by the
 core.
 core.
 \var starpu_data_copy_methods::opencl_to_ram_async
 \var starpu_data_copy_methods::opencl_to_ram_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CPU node, by recording in event, a pointer to a cl_event, the event of
 CPU node, by recording in event, a pointer to a cl_event, the event of
 the last submitted transfer. Must return 0 if the transfer was
 the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 some transfers are still ongoing and should be awaited for by the
 core.
 core.
 \var starpu_data_copy_methods::opencl_to_opencl_async
 \var starpu_data_copy_methods::opencl_to_opencl_async
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 of the last submitted transfer. Must return 0 if the transfer was
 of the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 some transfers are still ongoing and should be awaited for by the
 core.
 core.
 
 
+\var starpu_data_copy_methods::ram_to_mic_async
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
+MIC node. Must return 0 if the transfer was actually completed
+completely synchronously, or -EAGAIN if at least some transfers are
+still ongoing and should be awaited for by the core.
+\var starpu_data_copy_methods::mic_to_ram_async
+Define how to copy data from the \p src_interface interface on the
+\p src_node MIC node to the \p dst_interface interface on the \p dst_node
+CPU node. Must return 0 if the transfer was actually completed
+completely synchronously, or -EAGAIN if at least some transfers are
+still ongoing and should be awaited for by the core.
+
 \var starpu_data_copy_methods::any_to_any
 \var starpu_data_copy_methods::any_to_any
-Define how to copy data from the src_interface interface on the
+Define how to copy data from the \p src_interface interface on the
-src_node node to the dst_interface interface on the dst_node node.
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
 This is meant to be implemented through the starpu_interface_copy()
 This is meant to be implemented through the starpu_interface_copy()
 helper, to which async_data should be passed as such, and will be used
 helper, to which async_data should be passed as such, and will be used
 to manage asynchronicity. This must return -EAGAIN if any of the
 to manage asynchronicity. This must return -EAGAIN if any of the

+ 27 - 7
doc/doxygen/chapters/api/initialization.doxy

@@ -49,9 +49,13 @@ This is the number of OpenCL devices that StarPU can use. This can
 also be specified with the environment variable \ref STARPU_NOPENCL.
 also be specified with the environment variable \ref STARPU_NOPENCL.
 (default = -1)
 (default = -1)
 \var starpu_conf::nmic
 \var starpu_conf::nmic
-This is the number of MIC devices that StarPU can use. (default = -1)
+This is the number of MIC devices that StarPU can use. This can also
+be specified with the environment variable \ref STARPU_NMIC.
+(default = -1)
 \var starpu_conf::nscc
 \var starpu_conf::nscc
-This is the number of SCC devices that StarPU can use. (default = -1)
+This is the number of SCC devices that StarPU can use. This can also
+be specified with the environment variable \ref STARPU_NSCC.
+(default = -1)
 
 
 \var starpu_conf::use_explicit_workers_bindid
 \var starpu_conf::use_explicit_workers_bindid
 If this flag is set, the starpu_conf::workers_bindid array indicates
 If this flag is set, the starpu_conf::workers_bindid array indicates
@@ -89,10 +93,14 @@ be used.
 \var starpu_conf::use_explicit_workers_mic_deviceid
 \var starpu_conf::use_explicit_workers_mic_deviceid
 If this flag is set, the MIC workers will be attached to the MIC
 If this flag is set, the MIC workers will be attached to the MIC
 devices specified in the array starpu_conf::workers_mic_deviceid.
 devices specified in the array starpu_conf::workers_mic_deviceid.
+Otherwise, StarPU affects the MIC devices in a round-robin fashion.
+This can also be specified with the environment variable \ref
+STARPU_WORKERS_MICID.
 (default = 0)
 (default = 0)
 \var starpu_conf::workers_mic_deviceid
 \var starpu_conf::workers_mic_deviceid
 If the flag starpu_conf::use_explicit_workers_mic_deviceid is set, the
 If the flag starpu_conf::use_explicit_workers_mic_deviceid is set, the
 array contains the logical identifiers of the MIC devices to be used.
 array contains the logical identifiers of the MIC devices to be used.
+
 \var starpu_conf::use_explicit_workers_scc_deviceid
 \var starpu_conf::use_explicit_workers_scc_deviceid
 If this flag is set, the SCC workers will be attached to the SCC
 If this flag is set, the SCC workers will be attached to the SCC
 devices specified in the array starpu_conf::workers_scc_deviceid.
 devices specified in the array starpu_conf::workers_scc_deviceid.
@@ -100,6 +108,9 @@ devices specified in the array starpu_conf::workers_scc_deviceid.
 \var starpu_conf::workers_scc_deviceid
 \var starpu_conf::workers_scc_deviceid
 If the flag starpu_conf::use_explicit_workers_scc_deviceid is set, the
 If the flag starpu_conf::use_explicit_workers_scc_deviceid is set, the
 array contains the logical identifiers of the SCC devices to be used.
 array contains the logical identifiers of the SCC devices to be used.
+Otherwise, StarPU affects the SCC devices in a round-robin fashion.
+This can also be specified with the environment variable \ref
+STARPU_WORKERS_SCCID.
 
 
 \var starpu_conf::bus_calibrate
 \var starpu_conf::bus_calibrate
 If this flag is set, StarPU will recalibrate the bus.  If this value
 If this flag is set, StarPU will recalibrate the bus.  If this value
@@ -129,7 +140,9 @@ variable \ref STARPU_SINGLE_COMBINED_WORKER.
 
 
 \var starpu_conf::mic_sink_program_path
 \var starpu_conf::mic_sink_program_path
 Path to the kernel to execute on the MIC device, compiled for MIC
 Path to the kernel to execute on the MIC device, compiled for MIC
-architecture.
+architecture. When set to NULL, StarPU automatically looks next to the
+host program location.
+(default = NULL)
 
 
 \var starpu_conf::disable_asynchronous_copy
 \var starpu_conf::disable_asynchronous_copy
 This flag should be set to 1 to disable
 This flag should be set to 1 to disable
@@ -164,7 +177,12 @@ option \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy
 (default = 0)
 (default = 0)
 
 
 \var starpu_conf::disable_asynchronous_mic_copy
 \var starpu_conf::disable_asynchronous_mic_copy
-indicate if asynchronous copies to MIC devices should be disabled
+This flag should be set to 1 to disable asynchronous copies between
+CPUs and MIC accelerators. This can also be specified with the
+environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
+This can also be specified at compilation time by giving to the
+configure script the option \ref disable-asynchronous-mic-copy "--disable-asynchronous-mic-copy".
+(default = 0).
 
 
 \var starpu_conf::cuda_opengl_interoperability
 \var starpu_conf::cuda_opengl_interoperability
 Enable CUDA/OpenGL interoperation on these CUDA
 Enable CUDA/OpenGL interoperation on these CUDA
@@ -200,9 +218,11 @@ returns 0. Otherwise, -ENODEV indicates that no worker was available
 
 
 \fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 \fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 \ingroup API_Initialization_and_Termination
 \ingroup API_Initialization_and_Termination
-Alternative initialization method with argc and argv. This is used by
+This is the same as starpu_init(), but also takes the \p argc and \p
-MIC, MPI, and SCC implementation. Do not call starpu_init() and
+argv as defined by the application. This is needed for SCC execution
-starpu_initialize() in the same program.
+to initialize the communication library.
+Do not call starpu_init() and starpu_initialize() in the
+same program.
 
 
 \fn int starpu_conf_init(struct starpu_conf *conf)
 \fn int starpu_conf_init(struct starpu_conf *conf)
 \ingroup API_Initialization_and_Termination
 \ingroup API_Initialization_and_Termination

+ 28 - 0
doc/doxygen/chapters/api/mic_extensions.doxy

@@ -0,0 +1,28 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup API_MIC_Extensions MIC Extensions
+
+\def STARPU_USE_MIC
+\ingroup API_MIC_Extensions
+This macro is defined when StarPU has been installed with MIC support.
+It should be used in your code to detect the availability of MIC.
+
+\fn int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
+\ingroup API_MIC_Extensions
+Initiate a lookup on each MIC device to find the adress of the
+function named \p func_name, store them in the global array kernels
+and return the index in the array through \p symbol.
+
+\fn starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
+\ingroup API_MIC_Extensions
+If success, return the pointer to the function defined by \p symbol on
+the device linked to the called device. This can for instance be used
+in a starpu_mic_func_t implementation.
+
+*/

+ 12 - 0
doc/doxygen/chapters/api/multiformat_data_interface.doxy

@@ -25,6 +25,18 @@ The different fields are:
         pointer to a codelet which converts from CPU to CUDA
         pointer to a codelet which converts from CPU to CUDA
 \var starpu_multiformat_data_interface_ops::cuda_to_cpu_cl
 \var starpu_multiformat_data_interface_ops::cuda_to_cpu_cl
         pointer to a codelet which converts from CUDA to CPU
         pointer to a codelet which converts from CUDA to CPU
+\var starpu_multiformat_data_interface_ops::mic_elemsize
+        the size of each element on MIC devices
+\var starpu_multiformat_data_interface_ops::cpu_to_mic_cl
+        pointer to a codelet which converts from CPU to MIC
+\var starpu_multiformat_data_interface_ops::mic_to_cpu_cl
+        pointer to a codelet which converts from MIC to CPU
+\var starpu_multiformat_data_interface_ops::scc_elemsize
+        the size of each element on SCC devices
+\var starpu_multiformat_data_interface_ops::cpu_to_scc_cl
+        pointer to a codelet which converts from CPU to SCC
+\var starpu_multiformat_data_interface_ops::scc_to_cpu_cl
+        pointer to a codelet which converts from SCC to CPU
 
 
 \struct starpu_multiformat_interface
 \struct starpu_multiformat_interface
 todo
 todo

+ 28 - 0
doc/doxygen/chapters/api/scc_extensions.doxy

@@ -0,0 +1,28 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup API_SCC_Extensions SCC Extensions
+
+\def STARPU_USE_SCC
+\ingroup API_SCC_Extensions
+This macro is defined when StarPU has been installed with SCC support.
+It should be used in your code to detect the availability of SCC.
+
+\fn int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name)
+\ingroup API_SCC_Extensions
+Initiate a lookup on each SCC device to find the adress of the
+function named \p func_name, store them in the global array kernels
+and return the index in the array through \p symbol.
+
+\fn starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol)
+\ingroup API_SCC_Extensions
+If success, return the pointer to the function defined by \p symbol on
+the device linked to the called device. This can for instance be used
+in a starpu_scc_func_t implementation.
+
+*/

+ 14 - 0
doc/doxygen/chapters/api/workers.doxy

@@ -80,6 +80,20 @@ returned value should be at most \ref STARPU_MAXCPUS.
 This function returns the number of CUDA devices controlled by
 This function returns the number of CUDA devices controlled by
 StarPU. The returned value should be at most \ref STARPU_MAXCUDADEVS.
 StarPU. The returned value should be at most \ref STARPU_MAXCUDADEVS.
 
 
+\fn unsigned starpu_mic_worker_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of MIC workers controlled by StarPU.
+
+\fn unsigned starpu_mic_device_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of MIC devices controlled by StarPU.
+The returned value should be at most \ref STARPU_MAXMICDEVS.
+
+\fn unsigned starpu_scc_worker_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of SCC devices controlled by StarPU.
+The returned value should be at most \ref STARPU_MAXSCCDEVS.
+
 \fn unsigned starpu_opencl_worker_get_count(void)
 \fn unsigned starpu_opencl_worker_get_count(void)
 \ingroup API_Workers_Properties
 \ingroup API_Workers_Properties
 This function returns the number of OpenCL devices controlled by
 This function returns the number of OpenCL devices controlled by

+ 2 - 0
doc/doxygen/chapters/basic_examples.doxy

@@ -77,6 +77,7 @@ struct starpu_codelet cl =
 {
 {
     .where = STARPU_CPU,
     .where = STARPU_CPU,
     .cpu_funcs = { cpu_func, NULL },
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .nbuffers = 0
     .nbuffers = 0
 };
 };
 \endcode
 \endcode
@@ -526,6 +527,7 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 struct starpu_codelet cl =
 struct starpu_codelet cl =
 {
 {
     .cpu_funcs = { scal_cpu_func, NULL },
     .cpu_funcs = { scal_cpu_func, NULL },
+    .cpu_funcs_name = { "scal_cpu_func", NULL },
     .nbuffers = 1,
     .nbuffers = 1,
     .modes = { STARPU_RW }
     .modes = { STARPU_RW }
 };
 };

+ 1 - 0
doc/doxygen/chapters/code/forkmode.c

@@ -36,6 +36,7 @@ static struct starpu_codelet cl =
     .type = STARPU_FORKJOIN,
     .type = STARPU_FORKJOIN,
     .max_parallelism = INT_MAX,
     .max_parallelism = INT_MAX,
     .cpu_funcs = {scal_cpu_func, NULL},
     .cpu_funcs = {scal_cpu_func, NULL},
+    .cpu_funcs_name = {"scal_cpu_func", NULL},
     .nbuffers = 1,
     .nbuffers = 1,
 };
 };
 //! [To be included]
 //! [To be included]

+ 1 - 0
doc/doxygen/chapters/code/multiformat.c

@@ -41,6 +41,7 @@ void opencl_to_cpu_func(void *buffers[], void *args);
 struct starpu_codelet opencl_to_cpu_cl = {
 struct starpu_codelet opencl_to_cpu_cl = {
     .where = STARPU_CPU,
     .where = STARPU_CPU,
     .cpu_funcs = { opencl_to_cpu_func, NULL },
     .cpu_funcs = { opencl_to_cpu_func, NULL },
+    .cpu_funcs_name = { "opencl_to_cpu_func", NULL },
     .nbuffers = 1,
     .nbuffers = 1,
     .modes = { STARPU_RW }
     .modes = { STARPU_RW }
 };
 };

+ 1 - 0
doc/doxygen/chapters/code/simgrid.c

@@ -19,6 +19,7 @@
 static struct starpu_codelet cl11 =
 static struct starpu_codelet cl11 =
 {
 {
 	.cpu_funcs = {chol_cpu_codelet_update_u11, NULL},
 	.cpu_funcs = {chol_cpu_codelet_update_u11, NULL},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u11", NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {chol_cublas_codelet_update_u11, NULL},
 	.cuda_funcs = {chol_cublas_codelet_update_u11, NULL},
 #elif defined(STARPU_SIMGRID)
 #elif defined(STARPU_SIMGRID)

+ 1 - 0
doc/doxygen/doxygen.cfg

@@ -1617,6 +1617,7 @@ INCLUDE_FILE_PATTERNS  =
 PREDEFINED             = STARPU_USE_OPENCL=1 \
 PREDEFINED             = STARPU_USE_OPENCL=1 \
                          STARPU_USE_CUDA=1 \
                          STARPU_USE_CUDA=1 \
                          STARPU_USE_MIC=1 \
                          STARPU_USE_MIC=1 \
+                         STARPU_USE_SCC=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \

+ 2 - 5
doc/doxygen/refman.tex

@@ -195,10 +195,8 @@ Documentation License”.
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
 \input{group__API__CUDA__Extensions}
 \input{group__API__CUDA__Extensions}
 \input{group__API__OpenCL__Extensions}
 \input{group__API__OpenCL__Extensions}
-
+\input{group__API__MIC__Extensions}
-%\input{group__MIC__Extensions}
+\input{group__API__SCC__Extensions}
-%\input{group__SCC__Extensions}
-
 \input{group__API__Miscellaneous__Helpers}
 \input{group__API__Miscellaneous__Helpers}
 \input{group__API__FxT__Support}
 \input{group__API__FxT__Support}
 \input{group__API__FFT__Support}
 \input{group__API__FFT__Support}
@@ -209,7 +207,6 @@ Documentation License”.
 \input{group__API__Running__Drivers}
 \input{group__API__Running__Drivers}
 \input{group__API__Expert__Mode}
 \input{group__API__Expert__Mode}
 \input{group__API__StarPUTop__Interface}
 \input{group__API__StarPUTop__Interface}
-
 \input{group__API__Scheduling__Contexts}
 \input{group__API__Scheduling__Contexts}
 \input{group__API__Scheduling__Policy}
 \input{group__API__Scheduling__Policy}
 \input{group__API__Scheduling__Context__Hypervisor}
 \input{group__API__Scheduling__Context__Hypervisor}