Browse Source

update docs

Nathalie Furmento 6 years ago
parent
commit
4f5e6a76dc

+ 5 - 3
doc/doxygen/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2013-2018                                Inria
-# Copyright (C) 2010-2018                                CNRS
+# Copyright (C) 2010-2019                                CNRS
 # Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -235,6 +235,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/include/starpu_bound.h		\
 	$(top_srcdir)/include/starpu_clusters_util.h	\
 	$(top_srcdir)/include/starpu_cublas.h		\
+	$(top_srcdir)/include/starpu_cusparse.h		\
 	$(top_srcdir)/include/starpu_cuda.h		\
 	$(top_srcdir)/include/starpu_data_filters.h	\
 	$(top_srcdir)/include/starpu_data.h		\
@@ -272,6 +273,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/include/starpu_worker.h		\
 	$(top_srcdir)/include/fstarpu_mod.f90		\
 	$(top_srcdir)/mpi/include/starpu_mpi.h 		\
+	$(top_srcdir)/mpi/include/starpu_mpi_lb.h	\
 	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90		\
 	$(top_srcdir)/sc_hypervisor/include/sc_hypervisor.h 		\
 	$(top_srcdir)/sc_hypervisor/include/sc_hypervisor_config.h 	\
@@ -300,8 +302,8 @@ $(DOX_PDF): $(DOX_TAG) refman.tex
 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\
 	$(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ;\
 	$(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ;\
-	$(SED) -i s'/\\item Module\\-Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' index.tex ;\
-	$(SED) -i s'/\\item File\\-Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' index.tex ;\
+	$(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' index.tex ;\
+	$(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' index.tex ;\
 	$(PDFLATEX) refman.tex ;\
 	$(MAKEINDEX) refman.idx ;\
 	$(PDFLATEX) refman.tex ;\

+ 3 - 3
doc/doxygen/chapters/000_introduction.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2011-2013,2016                           Inria
  * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
  *
@@ -75,7 +75,7 @@ StarPU takes particular care of scheduling tasks efficiently, using
 well-known algorithms from the literature (\ref TaskSchedulingPolicy).
 In addition, it allows scheduling experts, such as compiler or
 computational library developers, to implement custom scheduling
-policies in a portable fashion (\ref DefiningANewSchedulingPolicy).
+policies in a portable fashion (\ref HowToDefineANewSchedulingPolicy).
 
 The remainder of this section describes the main concepts used in StarPU.
 
@@ -281,7 +281,7 @@ The documentation chapters include
 <li> \ref Scheduling
 <li> \ref SchedulingContexts
 <li> \ref SchedulingContextHypervisor
-<li> \ref ModularizedScheduler
+<li> \ref HowToDefineANewSchedulingPolicy
 <li> \ref DebuggingTools
 <li> \ref OnlinePerformanceTools
 <li> \ref OfflinePerformanceTools

+ 3 - 3
doc/doxygen/chapters/101_building.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2011,2012,2018                           Inria
  * Copyright (C) 2009-2011,2013-2016                      Université de Bordeaux
  *
@@ -471,8 +471,8 @@ Will show the performance of the cholesky factorization with the attila
 system. It will be interesting to try with different matrix sizes and
 schedulers.
 
-Performance models are available for cholesky_*, lu_*, *gemm, with block sizes
-320, 640, or 960 (plus 1440 for sirocco), and for stencil with block size 128x128x128, 192x192x192, and
+Performance models are available for <c>cholesky_*</c>, <c>lu_*</c>, <c>*gemm</c>, with block sizes
+320, 640, or 960 (plus 1440 for sirocco), and for <c>stencil</c> with block size 128x128x128, 192x192x192, and
 256x256x256.
 
 */

+ 7 - 7
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2013,2015,2017                      Inria
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -101,7 +101,7 @@ kernels. This will lower the potential for overlapping.
 
 Calling starpu_cublas_init() makes StarPU already do appropriate calls for the
 CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
-one then has to call <c>cublasSetKernelStream(starpu_cuda_get_local_stream())</c> at
+one then has to call <c>cublasSetKernelStream(</c>starpu_cuda_get_local_stream()<c>)</c> at
 the beginning of the codelet to make sure that CUBLAS is really using the proper
 stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS
 kernels with the proper configuration.
@@ -330,11 +330,11 @@ overrides the hostname of the system.
 By default, StarPU stores separate performance models for each GPU. To avoid
 having to calibrate performance models for each GPU of a homogeneous set of GPU
 devices for instance, the model can be shared by setting
-<c>export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1</c> ,
-<c>export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1</c> ,
-<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MIC=1</c> ,
-<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS=1</c> , or
-<c>export STARPU_PERF_MODEL_HOMOGENEOUS_SCC=1</c> (depending on your GPU device type).
+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1</c> (\ref STARPU_PERF_MODEL_HOMOGENEOUS_CUDA),
+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1</c> (\ref STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL),
+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MIC=1</c> (\ref STARPU_PERF_MODEL_HOMOGENEOUS_MIC),
+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS=1</c> (\ref STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS), or
+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_SCC=1</c> (\ref STARPU_PERF_MODEL_HOMOGENEOUS_SCC), depending on your GPU device type.
 
 To force continuing calibration,
 use <c>export STARPU_CALIBRATE=1</c> (\ref STARPU_CALIBRATE). This may be necessary if your application

+ 1 - 1
doc/doxygen/chapters/301_tasks.doxy

@@ -428,7 +428,7 @@ be executed, and is allowed to read from <c>i</c> to use it e.g. as an
 index. Note that this macro is only avaible when compiling StarPU with
 the compiler <c>gcc</c>.
 
-There is several ways of calling the function starpu_codelet_unpack_args().
+StarPU also provides a utility function starpu_codelet_unpack_args() to retrieve the ::STARPU_VALUE arguments passed to the task. There is several ways of calling this function starpu_codelet_unpack_args().
 
 \code{.c}
 void func_cpu(void *descr[], void *_args)

+ 9 - 9
doc/doxygen/chapters/310_data_management.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  *
@@ -366,8 +366,8 @@ starpu_data_partition_plan() returns the handles for the partition in <c>vert_ha
 
 One can then submit tasks working on the main handle, and tasks working on
 <c>vert_handle</c> handles. Between using the main handle and <c>vert_handle</c>
-handles, StarPU will automatically call starpu_data_partition_submit and
-starpu_data_unpartition_submit.
+handles, StarPU will automatically call starpu_data_partition_submit() and
+starpu_data_unpartition_submit().
 
 All this code is asynchronous, just submitting which tasks, partitioning and
 unpartitioning will be done at runtime.
@@ -445,9 +445,9 @@ And now we can start using vertical slices, etc.
 
 \section DefiningANewDataFilter Defining A New Data Filter
 
-StarPU provides a series of predefined filters in API_Data_Partition, but
+StarPU provides a series of predefined filters in \ref API_Data_Partition, but
 additional filters can be defined by the application. The principle is that the
-filter function just fills the memory location of the i-th subpart of a data.
+filter function just fills the memory location of the <c>i-th</c> subpart of a data.
 Examples are provided in <c>src/datawizard/interfaces/*_filters.c</c>,
 and see \ref starpu_data_filter::filter_func for the details.
 
@@ -765,7 +765,7 @@ the type starpu_data_interface_ops. We only define here the basic
 operations needed to run simple applications. The source code for the
 different functions can be found in the file
 <c>examples/interface/complex_interface.c</c>, the details of the hooks to be
-provided are documented \ref starpu_data_interface_ops .
+provided are documented in \ref starpu_data_interface_ops .
 
 \code{.c}
 static struct starpu_data_interface_ops interface_complex_ops =
@@ -840,12 +840,12 @@ main memory instead of copied in the GPU, a pivoting vector for instance.
 This can be achieved by setting the starpu_codelet::specific_nodes flag to
 <c>1</c>, and then fill the starpu_codelet::nodes array (or starpu_codelet::dyn_nodes when
 starpu_codelet::nbuffers is greater than \ref STARPU_NMAXBUFS) with the node numbers
-where data should be copied to, or <c>STARPU_SPECIFIC_NODE_LOCAL</c> to let
+where data should be copied to, or ::STARPU_SPECIFIC_NODE_LOCAL to let
 StarPU copy it to the memory node where the task will be executed.
 
-<c>STARPU_SPECIFIC_NODE_CPU</c> can also be used to request data to be
+::STARPU_SPECIFIC_NODE_CPU can also be used to request data to be
 put in CPU-accessible memory (and let StarPU choose the NUMA node).
-<c>STARPU_SPECIFIC_NODE_FAST</c> and <c>STARPU_SPECIFIC_NODE_SLOW</c> can als be
+::STARPU_SPECIFIC_NODE_FAST and ::STARPU_SPECIFIC_NODE_SLOW can also be
 used
 
 For instance,

+ 3 - 3
doc/doxygen/chapters/320_scheduling.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  *
@@ -79,7 +79,7 @@ specified for a codelet, every task built from this codelet will be scheduled
 using an <b>eager</b> fallback policy.
 
 <b>Troubleshooting:</b> Configuring and recompiling StarPU using the
-<c>--enable-verbose</c> configure flag displays some statistics at the end of
+\ref enable-verbose "--enable-verbose" configure option displays some statistics at the end of
 execution about the percentage of tasks which have been scheduled by a DM*
 family policy using performance model hints. A low or zero percentage may be
 the sign that performance models are not converging or that codelets do not
@@ -238,7 +238,7 @@ to a bag of tasks. When the application is finished with submitting tasks,
 it calls starpu_do_schedule() (or starpu_task_wait_for_all(), which calls
 starpu_do_schedule()), and the starpu_sched_policy::do_schedule method of the
 scheduler is called. This method calls _starpu_graph_compute_depths to compute
-the bottom-up ranks, and then uses these rank to set priorities over tasks.
+the bottom-up ranks, and then uses these ranks to set priorities over tasks.
 
 It then has two priority queues, one for CPUs, and one for GPUs, and uses a dumb
 heuristic based on the duration of the task over CPUs and GPUs to decide between

+ 3 - 1
doc/doxygen/chapters/520_files.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2017, 2019                          CNRS
  * Copyright (C) 2011-2013,2018                           Inria
  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
  *
@@ -25,6 +25,7 @@
 \file starpu_bound.h
 \file starpu_clusters_util.h
 \file starpu_cublas.h
+\file starpu_cusparse.h
 \file starpu_cuda.h
 \file starpu_data_filters.h
 \file starpu_data.h
@@ -62,6 +63,7 @@
 \file starpu_worker.h
 
 \file starpu_mpi.h
+\file starpu_mpi_lb.h
 
 \file sc_hypervisor_config.h
 \file sc_hypervisor.h

+ 21 - 1
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2013,2015,2017                      Inria
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -1057,4 +1057,24 @@ submission loop.
 \ingroup API_Codelet_And_Tasks
 Create (and submit) an empty task that unlocks a tag once all its dependencies are fulfilled.
 
+\def STARPU_SPECIFIC_NODE_LOCAL
+\ingroup API_Codelet_And_Tasks
+Value to be set in the field starpu_codelet::nodes to request StarPU to put the data in CPU-accessible memory (and let StarPU choose the NUMA node).
+
+\def STARPU_SPECIFIC_NODE_CPU
+\ingroup API_Codelet_And_Tasks
+todo
+
+\def STARPU_SPECIFIC_NODE_SLOW
+\ingroup API_Codelet_And_Tasks
+todo
+
+\def STARPU_SPECIFIC_NODE_FAST
+\ingroup API_Codelet_And_Tasks
+todo
+
+\fn void starpu_do_schedule(void)
+\ingroup API_Codelet_And_Tasks
+todo
+
 */

+ 2 - 2
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2013                                Inria
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -102,7 +102,7 @@ For each task not going through the scheduler (because starpu_task::execute_on_a
 \var void (*starpu_sched_policy::do_schedule)(unsigned sched_ctx_id)
         Optional field. This method is called when it is a good time to start
         scheduling tasks. This is notably called when the application calls
-        starpu_task_wait_for_all or starpu_do_schedule explicitly.
+        starpu_task_wait_for_all() or starpu_do_schedule() explicitly.
 \var void (*starpu_sched_policy::add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
         Initialize scheduling structures corresponding to each worker used by the policy.
 \var void (*starpu_sched_policy::remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)

+ 4 - 1
doc/doxygen/doxygen-config.cfg.in

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2011-2014,2016,2018                      Inria
-# Copyright (C) 2010-2017                                CNRS
+# Copyright (C) 2010-2017, 2019                          CNRS
 # Copyright (C) 2009-2014                                Université de Bordeaux
 # Copyright (C) 2013                                     Simon Archipoff
 # Copyright (C) 2011                                     Télécom-SudParis
@@ -24,6 +24,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/include/starpu_bitmap.h \
 	 		 @top_srcdir@/include/starpu_bound.h \
 	 		 @top_srcdir@/include/starpu_clusters_util.h \
+			 @top_srcdir@/include/starpu_cusparse.h \
 			 @top_srcdir@/include/starpu_cublas.h \
 			 @top_srcdir@/include/starpu_cuda.h \
 			 @top_srcdir@/include/starpu_data_filters.h \
@@ -63,6 +64,8 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/include/starpu_worker.h \
 			 @top_srcdir@/include/fstarpu_mod.f90 \
 			 @top_srcdir@/mpi/include/ \
+			 @top_srcdir@/mpi/include/starpu_mpi.h \
+			 @top_srcdir@/mpi/include/starpu_mpi_lb.h \
 			 @top_srcdir@/mpi/include/fstarpu_mpi_mod.f90 \
 			 @top_srcdir@/starpufft/include/starpufft.h \
 			 @top_srcdir@/sc_hypervisor/include \

+ 3 - 1
doc/doxygen/refman.tex

@@ -1,7 +1,7 @@
 % StarPU --- Runtime system for heterogeneous multicore architectures.
 %
 % Copyright (C) 2013-2016,2018                           Inria
-% Copyright (C) 2013-2018                                CNRS
+% Copyright (C) 2013-2019                                CNRS
 % Copyright (C) 2014,2018                                Université de Bordeaux
 % Copyright (C) 2013                                     Simon Archipoff
 %
@@ -277,6 +277,7 @@ Documentation License”.
 \input{starpu__clusters__util_8h}
 \input{starpu__config_8h}
 \input{starpu__cublas_8h}
+\input{starpu__cusparse_8h}
 \input{starpu__cuda_8h}
 \input{starpu__data_8h}
 \input{starpu__data__filters_8h}
@@ -290,6 +291,7 @@ Documentation License”.
 \input{starpu__mic_8h}
 \input{starpu__mod_8f90}
 \input{starpu__mpi_8h}
+\input{starpu__mpi__lb_8h}
 \input{starpu__opencl_8h}
 \input{starpu__openmp_8h}
 \input{starpu__perfmodel_8h}

+ 3 - 1
include/starpu_task.h

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2017                                Inria
  * Copyright (C) 2009-2018                                Université de Bordeaux
- * Copyright (C) 2010-2015,2017,2018                      CNRS
+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2016                                     Uppsala University
  *
@@ -94,6 +94,8 @@ typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
 #define STARPU_SPECIFIC_NODE_LOCAL (-1)
 #define STARPU_SPECIFIC_NODE_CPU (-2)
 #define STARPU_SPECIFIC_NODE_SLOW (-3)
+#define STARPU_SPECIFIC_NODE_FAST (-4)
+
 struct starpu_task;
 struct starpu_codelet
 {