浏览代码

doc/doxygen: backport updates from branch mic-scc-merge

Nathalie Furmento 12 年之前
父节点
当前提交
f0de9e2164

+ 2 - 0
doc/doxygen/Makefile.am

@@ -72,6 +72,8 @@ chapters =	\
 	chapters/api/mpi.doxy \
 	chapters/api/multiformat_data_interface.doxy \
 	chapters/api/opencl_extensions.doxy \
+	chapters/api/mic_extensions.doxy \
+	chapters/api/scc_extensions.doxy \
 	chapters/api/parallel_tasks.doxy \
 	chapters/api/performance_model.doxy \
 	chapters/api/profiling.doxy \

+ 12 - 3
doc/doxygen/chapters/advanced_examples.doxy

@@ -39,6 +39,7 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 struct starpu_codelet cl = {
     .where = STARPU_CPU,
     .cpu_funcs = { scal_cpu_func, scal_sse_func, NULL },
+    .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func", NULL },
     .nbuffers = 1,
     .modes = { STARPU_RW }
 };
@@ -75,6 +76,7 @@ struct starpu_codelet cl = {
     .where = STARPU_CPU|STARPU_CUDA,
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .cuda_funcs = { gpu_func, NULL }
     .nbuffers = 1,
     .modes = { STARPU_RW }
@@ -120,6 +122,7 @@ struct starpu_codelet cl = {
     .where = STARPU_CPU|STARPU_CUDA,
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .cuda_funcs = { scal_gpu_13, scal_gpu_20, NULL },
     .nbuffers = 1,
     .modes = { STARPU_RW }
@@ -317,6 +320,7 @@ static struct starpu_perfmodel mult_perf_model = {
 struct starpu_codelet cl = {
     .where = STARPU_CPU,
     .cpu_funcs = { cpu_mult, NULL },
+    .cpu_funcs_name = { "cpu_mult", NULL },
     .nbuffers = 3,
     .modes = { STARPU_R, STARPU_R, STARPU_W },
     /* for the scheduling policy to be able to use performance models */
@@ -483,6 +487,7 @@ void func_cpu(void *descr[], void *_args)
 struct starpu_codelet mycodelet = {
         .where = STARPU_CPU,
         .cpu_funcs = { func_cpu, NULL },
+        .cpu_funcs_name = { "func_cpu", NULL },
         .nbuffers = 2,
         .modes = { STARPU_RW, STARPU_RW }
 };
@@ -575,6 +580,7 @@ the codelets for initialization and reduction:
 struct starpu_codelet bzero_variable_cl =
 {
         .cpu_funcs = { bzero_variable_cpu, NULL },
+        .cpu_funcs_name = { "bzero_variable_cpu", NULL },
         .cuda_funcs = { bzero_variable_cuda, NULL },
         .nbuffers = 1,
 }
@@ -597,6 +603,7 @@ static void accumulate_variable_cuda(void *descr[], void *cl_arg)
 struct starpu_codelet accumulate_variable_cl =
 {
         .cpu_funcs = { accumulate_variable_cpu, NULL },
+        .cpu_funcs_name = { "accumulate_variable_cpu", NULL },
         .cuda_funcs = { accumulate_variable_cuda, NULL },
         .nbuffers = 1,
 }
@@ -786,6 +793,7 @@ static struct starpu_codelet cl =
     .type = STARPU_SPMD,
     .max_parallelism = INT_MAX,
     .cpu_funcs = { func, NULL },
+    .cpu_funcs_name = { "func", NULL },
     .nbuffers = 1,
 }
 \endcode
@@ -1140,9 +1148,10 @@ enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] = {
 
 struct starpu_codelet dummy_big_cl =
 {
-	.cuda_funcs = {dummy_big_kernel, NULL},
-	.opencl_funcs = {dummy_big_kernel, NULL},
-	.cpu_funcs = {dummy_big_kernel, NULL},
+	.cuda_funcs = { dummy_big_kernel, NULL },
+	.opencl_funcs = { dummy_big_kernel, NULL },
+	.cpu_funcs = { dummy_big_kernel, NULL },
+	.cpu_funcs_name = { "dummy_big_kernel", NULL },
 	.nbuffers = STARPU_NMAXBUFS+1,
 	.dyn_modes = modes
 };

+ 50 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -72,6 +72,16 @@ to specify the codelet may be executed on a CUDA processing unit.
 This macro is used when setting the field starpu_codelet::where to
 specify the codelet may be executed on a OpenCL processing unit.
 
+\def STARPU_MIC
+\ingroup API_Codelet_And_Tasks
+This macro is used when setting the field starpu_codelet::where to
+specify the codelet may be executed on a MIC processing unit.
+
+\def STARPU_SCC
+\ingroup API_Codelet_And_Tasks
+This macro is used when setting the field starpu_codelet::where to
+specify the codelet may be executed on an SCC processing unit.
+
 \def STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
 \deprecated
 \ingroup API_Codelet_And_Tasks
@@ -178,6 +188,12 @@ starpu_codelet::where is set, then the field starpu_codelet::cpu_funcs
 is ignored if ::STARPU_CPU does not appear in the field
 starpu_codelet::where, it must be non-null otherwise.
 
+\var starpu_codelet::cpu_funcs_name
+Optional array of strings which provide the name of the CPU functions
+referenced in the array starpu_codelet::cpu_funcs. This can be used
+when running on MIC devices or the SCC platform, for StarPU to simply
+look up the MIC function implementation through its name.
+
 \var starpu_codelet::cuda_funcs
 Optional array of function pointers to the CUDA implementations of the
 codelet. It must be terminated by a NULL value. The functions must be
@@ -202,6 +218,32 @@ starpu_codelet::opencl_funcs is ignored if ::STARPU_OPENCL does not
 appear in the field starpu_codelet::where, it must be non-null
 otherwise.
 
+\var starpu_codelet::mic_funcs
+Optional array of function pointers to a function which returns the
+MIC implementation of the codelet. It must be terminated by a NULL
+value. The functions prototype must be:
+\code{.c}
+starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl)
+\endcode
+If the field starpu_codelet::where is set, then the field
+starpu_codelet::mic_funcs is ignored if ::STARPU_MIC does not appear
+in the field starpu_codelet::where. It can be null if
+starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+simply make a symbol lookup to get the implementation.
+
+\var starpu_codelet::scc_funcs
+Optional array of function pointers to a function which returns the
+SCC implementation of the codelet. It must be terminated by a NULL value.
+The functions prototype must be:
+\code{.c}
+starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl)
+\endcode
+If the field starpu_codelet::where is set, then the field
+starpu_codelet::scc_funcs is ignored if ::STARPU_SCC does not appear
+in the field starpu_codelet::where. It can be null if
+starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+simply make a symbol lookup to get the implementation.
+
 \var starpu_codelet::nbuffers
 Specify the number of arguments taken by the codelet. These arguments
 are managed by the DSM and are accessed from the <c>void *buffers[]</c>
@@ -325,6 +367,14 @@ the buffer in local store (LS) instead. This field is ignored for CPU,
 CUDA and OpenCL codelets, where the starpu_task::cl_arg pointer is
 given as such.
 
+\var starpu_task::cl_arg_free
+Optional field. In case starpu_task::cl_arg was allocated by the
+application through <c>malloc()</c>, setting starpu_task::cl_arg_free
+to 1 makes StarPU automatically call <c>free(cl_arg)</c> when
+destroying the task. This saves the user from defining a callback just
+for that. This is mostly useful when targetting MIC or SCC, where the
+codelet does not execute in the same memory space as the main thread.
+
 \var starpu_task::callback_func
 Optional field, the default value is <c>NULL</c>. This is a function
 pointer of prototype <c>void (*f)(void *)</c> which specifies a

+ 76 - 33
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -54,89 +54,132 @@ provided. It can still be useful to provide more specific method in
 case of e.g. available particular CUDA or OpenCL support.
 \ingroup API_Data_Interfaces
 \var starpu_data_copy_methods::ram_to_ram
-Define how to copy data from the src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node CPU
-node. Return 0 on success.
+Define how to copy data from the \p src_interface interface on the \p
+src_node CPU node to the \p dst_interface interface on the \p dst_node
+CPU  node. Return 0 on success.
 \var starpu_data_copy_methods::ram_to_cuda
-Define how to copy data from the src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node CUDA
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
 node. Return 0 on success.
 \var starpu_data_copy_methods::ram_to_opencl
-Define how to copy data from the src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
+
+\var starpu_data_copy_methods::ram_to_mic
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node MIC
+node. Return 0 on success.
+
 \var starpu_data_copy_methods::cuda_to_ram
-Define how to copy data from the src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
 \var starpu_data_copy_methods::cuda_to_cuda
-Define how to copy data from the src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CUDA
+Define how to copy data from the \p src_interface interface on the
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node. Return 0 on success.
 \var starpu_data_copy_methods::cuda_to_opencl
-Define how to copy data from the src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_ram
-Define how to copy data from the src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_cuda
-Define how to copy data from the src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CUDA node. Return 0 on success.
 \var starpu_data_copy_methods::opencl_to_opencl
-Define how to copy data from the src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
 
+\var starpu_data_copy_methods::mic_to_ram
+Define how to copy data from the \p src_interface interface on the
+\p src_node MIC node to the \p dst_interface interface on the \p dst_node CPU
+node. Return 0 on success.
+
+\var starpu_data_copy_methods::scc_src_to_sink
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+\var starpu_data_copy_methods::scc_sink_to_src
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+\var starpu_data_copy_methods::scc_sink_to_sink
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
+Must return 0 if the transfer was actually completed completely
+synchronously, or -EAGAIN if at least some transfers are still ongoing
+and should be awaited for by the core.
+
 \var starpu_data_copy_methods::ram_to_cuda_async
-Define how to copy data from the src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node CUDA
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 \var starpu_data_copy_methods::cuda_to_ram_async
-Define how to copy data from the src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CPU
+Define how to copy data from the \p src_interface interface on the
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 \var starpu_data_copy_methods::cuda_to_cuda_async
-Define how to copy data from the src_interface interface on the
-src_node CUDA node to the dst_interface interface on the dst_node CUDA
+Define how to copy data from the \p src_interface interface on the
+\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the core.
 
 \var starpu_data_copy_methods::ram_to_opencl_async
-Define how to copy data from the src_interface interface on the
-src_node CPU node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 of the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 core.
 \var starpu_data_copy_methods::opencl_to_ram_async
-Define how to copy data from the src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CPU node, by recording in event, a pointer to a cl_event, the event of
 the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 core.
 \var starpu_data_copy_methods::opencl_to_opencl_async
-Define how to copy data from the src_interface interface on the
-src_node OpenCL node to the dst_interface interface on the dst_node
+Define how to copy data from the \p src_interface interface on the
+\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 OpenCL node, by recording in event, a pointer to a cl_event, the event
 of the last submitted transfer. Must return 0 if the transfer was
 actually completed completely synchronously, or -EAGAIN if at least
 some transfers are still ongoing and should be awaited for by the
 core.
 
+\var starpu_data_copy_methods::ram_to_mic_async
+Define how to copy data from the \p src_interface interface on the
+\p src_node CPU node to the \p dst_interface interface on the \p dst_node
+MIC node. Must return 0 if the transfer was actually completed
+completely synchronously, or -EAGAIN if at least some transfers are
+still ongoing and should be awaited for by the core.
+\var starpu_data_copy_methods::mic_to_ram_async
+Define how to copy data from the \p src_interface interface on the
+\p src_node MIC node to the \p dst_interface interface on the \p dst_node
+CPU node. Must return 0 if the transfer was actually completed
+completely synchronously, or -EAGAIN if at least some transfers are
+still ongoing and should be awaited for by the core.
+
 \var starpu_data_copy_methods::any_to_any
-Define how to copy data from the src_interface interface on the
-src_node node to the dst_interface interface on the dst_node node.
+Define how to copy data from the \p src_interface interface on the
+\p src_node node to the \p dst_interface interface on the \p dst_node node.
 This is meant to be implemented through the starpu_interface_copy()
 helper, to which async_data should be passed as such, and will be used
 to manage asynchronicity. This must return -EAGAIN if any of the

+ 27 - 7
doc/doxygen/chapters/api/initialization.doxy

@@ -49,9 +49,13 @@ This is the number of OpenCL devices that StarPU can use. This can
 also be specified with the environment variable \ref STARPU_NOPENCL.
 (default = -1)
 \var starpu_conf::nmic
-This is the number of MIC devices that StarPU can use. (default = -1)
+This is the number of MIC devices that StarPU can use. This can also
+be specified with the environment variable \ref STARPU_NMIC.
+(default = -1)
 \var starpu_conf::nscc
-This is the number of SCC devices that StarPU can use. (default = -1)
+This is the number of SCC devices that StarPU can use. This can also
+be specified with the environment variable \ref STARPU_NSCC.
+(default = -1)
 
 \var starpu_conf::use_explicit_workers_bindid
 If this flag is set, the starpu_conf::workers_bindid array indicates
@@ -89,10 +93,14 @@ be used.
 \var starpu_conf::use_explicit_workers_mic_deviceid
 If this flag is set, the MIC workers will be attached to the MIC
 devices specified in the array starpu_conf::workers_mic_deviceid.
+Otherwise, StarPU affects the MIC devices in a round-robin fashion.
+This can also be specified with the environment variable \ref
+STARPU_WORKERS_MICID.
 (default = 0)
 \var starpu_conf::workers_mic_deviceid
 If the flag starpu_conf::use_explicit_workers_mic_deviceid is set, the
 array contains the logical identifiers of the MIC devices to be used.
+
 \var starpu_conf::use_explicit_workers_scc_deviceid
 If this flag is set, the SCC workers will be attached to the SCC
 devices specified in the array starpu_conf::workers_scc_deviceid.
@@ -100,6 +108,9 @@ devices specified in the array starpu_conf::workers_scc_deviceid.
 \var starpu_conf::workers_scc_deviceid
 If the flag starpu_conf::use_explicit_workers_scc_deviceid is set, the
 array contains the logical identifiers of the SCC devices to be used.
+Otherwise, StarPU affects the SCC devices in a round-robin fashion.
+This can also be specified with the environment variable \ref
+STARPU_WORKERS_SCCID.
 
 \var starpu_conf::bus_calibrate
 If this flag is set, StarPU will recalibrate the bus.  If this value
@@ -129,7 +140,9 @@ variable \ref STARPU_SINGLE_COMBINED_WORKER.
 
 \var starpu_conf::mic_sink_program_path
 Path to the kernel to execute on the MIC device, compiled for MIC
-architecture.
+architecture. When set to NULL, StarPU automatically looks next to the
+host program location.
+(default = NULL)
 
 \var starpu_conf::disable_asynchronous_copy
 This flag should be set to 1 to disable
@@ -164,7 +177,12 @@ option \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy
 (default = 0)
 
 \var starpu_conf::disable_asynchronous_mic_copy
-indicate if asynchronous copies to MIC devices should be disabled
+This flag should be set to 1 to disable asynchronous copies between
+CPUs and MIC accelerators. This can also be specified with the
+environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
+This can also be specified at compilation time by giving to the
+configure script the option \ref disable-asynchronous-mic-copy "--disable-asynchronous-mic-copy".
+(default = 0).
 
 \var starpu_conf::cuda_opengl_interoperability
 Enable CUDA/OpenGL interoperation on these CUDA
@@ -200,9 +218,11 @@ returns 0. Otherwise, -ENODEV indicates that no worker was available
 
 \fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 \ingroup API_Initialization_and_Termination
-Alternative initialization method with argc and argv. This is used by
-MIC, MPI, and SCC implementation. Do not call starpu_init() and
-starpu_initialize() in the same program.
+This is the same as starpu_init(), but also takes the \p argc and \p
+argv as defined by the application. This is needed for SCC execution
+to initialize the communication library.
+Do not call starpu_init() and starpu_initialize() in the
+same program.
 
 \fn int starpu_conf_init(struct starpu_conf *conf)
 \ingroup API_Initialization_and_Termination

+ 28 - 0
doc/doxygen/chapters/api/mic_extensions.doxy

@@ -0,0 +1,28 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup API_MIC_Extensions MIC Extensions
+
+\def STARPU_USE_MIC
+\ingroup API_MIC_Extensions
+This macro is defined when StarPU has been installed with MIC support.
+It should be used in your code to detect the availability of MIC.
+
+\fn int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
+\ingroup API_MIC_Extensions
+Initiate a lookup on each MIC device to find the adress of the
+function named \p func_name, store them in the global array kernels
+and return the index in the array through \p symbol.
+
+\fn starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
+\ingroup API_MIC_Extensions
+If success, return the pointer to the function defined by \p symbol on
+the device linked to the called device. This can for instance be used
+in a starpu_mic_func_t implementation.
+
+*/

+ 12 - 0
doc/doxygen/chapters/api/multiformat_data_interface.doxy

@@ -25,6 +25,18 @@ The different fields are:
         pointer to a codelet which converts from CPU to CUDA
 \var starpu_multiformat_data_interface_ops::cuda_to_cpu_cl
         pointer to a codelet which converts from CUDA to CPU
+\var starpu_multiformat_data_interface_ops::mic_elemsize
+        the size of each element on MIC devices
+\var starpu_multiformat_data_interface_ops::cpu_to_mic_cl
+        pointer to a codelet which converts from CPU to MIC
+\var starpu_multiformat_data_interface_ops::mic_to_cpu_cl
+        pointer to a codelet which converts from MIC to CPU
+\var starpu_multiformat_data_interface_ops::scc_elemsize
+        the size of each element on SCC devices
+\var starpu_multiformat_data_interface_ops::cpu_to_scc_cl
+        pointer to a codelet which converts from CPU to SCC
+\var starpu_multiformat_data_interface_ops::scc_to_cpu_cl
+        pointer to a codelet which converts from SCC to CPU
 
 \struct starpu_multiformat_interface
 todo

+ 28 - 0
doc/doxygen/chapters/api/scc_extensions.doxy

@@ -0,0 +1,28 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup API_SCC_Extensions SCC Extensions
+
+\def STARPU_USE_SCC
+\ingroup API_SCC_Extensions
+This macro is defined when StarPU has been installed with SCC support.
+It should be used in your code to detect the availability of SCC.
+
+\fn int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name)
+\ingroup API_SCC_Extensions
+Initiate a lookup on each SCC device to find the adress of the
+function named \p func_name, store them in the global array kernels
+and return the index in the array through \p symbol.
+
+\fn starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol)
+\ingroup API_SCC_Extensions
+If success, return the pointer to the function defined by \p symbol on
+the device linked to the called device. This can for instance be used
+in a starpu_scc_func_t implementation.
+
+*/

+ 14 - 0
doc/doxygen/chapters/api/workers.doxy

@@ -80,6 +80,20 @@ returned value should be at most \ref STARPU_MAXCPUS.
 This function returns the number of CUDA devices controlled by
 StarPU. The returned value should be at most \ref STARPU_MAXCUDADEVS.
 
+\fn unsigned starpu_mic_worker_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of MIC workers controlled by StarPU.
+
+\fn unsigned starpu_mic_device_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of MIC devices controlled by StarPU.
+The returned value should be at most \ref STARPU_MAXMICDEVS.
+
+\fn unsigned starpu_scc_worker_get_count(void)
+\ingroup API_Workers_Properties
+This function returns the number of SCC devices controlled by StarPU.
+The returned value should be at most \ref STARPU_MAXSCCDEVS.
+
 \fn unsigned starpu_opencl_worker_get_count(void)
 \ingroup API_Workers_Properties
 This function returns the number of OpenCL devices controlled by

+ 2 - 0
doc/doxygen/chapters/basic_examples.doxy

@@ -77,6 +77,7 @@ struct starpu_codelet cl =
 {
     .where = STARPU_CPU,
     .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
     .nbuffers = 0
 };
 \endcode
@@ -526,6 +527,7 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 struct starpu_codelet cl =
 {
     .cpu_funcs = { scal_cpu_func, NULL },
+    .cpu_funcs_name = { "scal_cpu_func", NULL },
     .nbuffers = 1,
     .modes = { STARPU_RW }
 };

+ 1 - 0
doc/doxygen/chapters/code/forkmode.c

@@ -36,6 +36,7 @@ static struct starpu_codelet cl =
     .type = STARPU_FORKJOIN,
     .max_parallelism = INT_MAX,
     .cpu_funcs = {scal_cpu_func, NULL},
+    .cpu_funcs_name = {"scal_cpu_func", NULL},
     .nbuffers = 1,
 };
 //! [To be included]

+ 1 - 0
doc/doxygen/chapters/code/multiformat.c

@@ -41,6 +41,7 @@ void opencl_to_cpu_func(void *buffers[], void *args);
 struct starpu_codelet opencl_to_cpu_cl = {
     .where = STARPU_CPU,
     .cpu_funcs = { opencl_to_cpu_func, NULL },
+    .cpu_funcs_name = { "opencl_to_cpu_func", NULL },
     .nbuffers = 1,
     .modes = { STARPU_RW }
 };

+ 1 - 0
doc/doxygen/chapters/code/simgrid.c

@@ -19,6 +19,7 @@
 static struct starpu_codelet cl11 =
 {
 	.cpu_funcs = {chol_cpu_codelet_update_u11, NULL},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u11", NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {chol_cublas_codelet_update_u11, NULL},
 #elif defined(STARPU_SIMGRID)

+ 1 - 0
doc/doxygen/doxygen.cfg

@@ -1617,6 +1617,7 @@ INCLUDE_FILE_PATTERNS  =
 PREDEFINED             = STARPU_USE_OPENCL=1 \
                          STARPU_USE_CUDA=1 \
                          STARPU_USE_MIC=1 \
+                         STARPU_USE_SCC=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \

+ 2 - 5
doc/doxygen/refman.tex

@@ -195,10 +195,8 @@ Documentation License”.
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
 \input{group__API__CUDA__Extensions}
 \input{group__API__OpenCL__Extensions}
-
-%\input{group__MIC__Extensions}
-%\input{group__SCC__Extensions}
-
+\input{group__API__MIC__Extensions}
+\input{group__API__SCC__Extensions}
 \input{group__API__Miscellaneous__Helpers}
 \input{group__API__FxT__Support}
 \input{group__API__FFT__Support}
@@ -209,7 +207,6 @@ Documentation License”.
 \input{group__API__Running__Drivers}
 \input{group__API__Expert__Mode}
 \input{group__API__StarPUTop__Interface}
-
 \input{group__API__Scheduling__Contexts}
 \input{group__API__Scheduling__Policy}
 \input{group__API__Scheduling__Context__Hypervisor}