Parcourir la source

doc: always use the same environment (cartouche+smallexample) for source code

Nathalie Furmento il y a 13 ans
Parent
commit
d5b24e39d3

+ 15 - 8
doc/chapters/advanced-examples.texi

@@ -57,7 +57,8 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 struct starpu_codelet cl = @{
     .where = STARPU_CPU,
     .cpu_funcs = @{ scal_cpu_func, scal_sse_func, NULL @},
-    .nbuffers = 1
+    .nbuffers = 1,
+    .modes = @{ STARPU_RW @}
 @};
 @end smallexample
 @end cartouche
@@ -142,7 +143,8 @@ struct starpu_codelet cl = @{
     .can_execute = can_execute,
     .cpu_funcs = @{ cpu_func, NULL @},
     .cuda_funcs = @{ scal_gpu_13, scal_gpu_20, NULL @},
-    .nbuffers = 1
+    .nbuffers = 1,
+    .modes = @{ STARPU_RW @}
 @};
 @end smallexample
 @end cartouche
@@ -339,6 +341,7 @@ struct starpu_codelet cl = @{
     .where = STARPU_CPU,
     .cpu_funcs = @{ cpu_mult, NULL @},
     .nbuffers = 3,
+    .modes = @{ STARPU_R, STARPU_R, STARPU_W @},
     /* for the scheduling policy to be able to use performance models */
     .model = &mult_perf_model
 @};
@@ -576,7 +579,8 @@ will be able to convert data from one data structure to the other when needed.
 Note that the heft scheduler is the only one optimized for this interface. The
 user must provide StarPU with conversion codelets :
 
-@example
+@cartouche
+@smallexample
 #define NX 1024
 struct point array_of_structs[NX];
 starpu_data_handle_t handle;
@@ -615,14 +619,16 @@ struct starpu_multiformat_data_interface_ops format_ops = @{
     ...
 @};
 starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
-@end example
+@end smallexample
+@end cartouche
 
 Kernels can be written almost as for any other interface. Note that
 STARPU_MULTIFORMAT_GET_PTR shall only be used for CPU kernels. CUDA kernels
 must use STARPU_MULTIFORMAT_GET_CUDA_PTR, and OpenCL kernels must use
 STARPU_MULTIFORMAT_GET_OPENCL_PTR. STARPU_MULTIFORMAT_GET_NX may be used in any
 kind of kernel.
-@example
+@cartouche
+@smallexample
 static void
 multiformat_scal_cpu_func(void *buffers[], void *args)
 @{
@@ -644,7 +650,8 @@ extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
 
 	...
 @}
-@end example
+@end smallexample
+@end cartouche
 
 A full example may be found in @code{examples/basic_examples/multiformat.c}.
 
@@ -659,7 +666,7 @@ renderbuffer objects into CUDA. To achieve this with StarPU, it simply needs to
 be given the CUDA pointer at registration, for instance:
 
 @cartouche
-@example
+@smallexample
 	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
 		if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER)
 			break;
@@ -677,7 +684,7 @@ be given the CUDA pointer at registration, for instance:
 	cudaGraphicsUnmapResources(1, &resource, 0);
 
 	/* Now display it */
-@end example
+@end smallexample
 @end cartouche
 
 @node More examples

+ 18 - 10
doc/chapters/basic-api.texi

@@ -454,23 +454,27 @@ Register the @var{size}-byte element pointed to by @var{ptr}, which is
 typically a scalar, and initialize @var{handle} to represent this data
 item.
 
+@cartouche
 @smallexample
 float var;
 starpu_data_handle_t var_handle;
 starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
 @end smallexample
+@end cartouche
 @end deftypefun
 
 @deftypefun void starpu_vector_data_register ({starpu_data_handle_t *}@var{handle}, uint32_t @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{count}, size_t @var{size})
 Register the @var{count} @var{size}-byte elements pointed to by
 @var{ptr} and initialize @var{handle} to represent it.
 
-@example
+@cartouche
+@smallexample
 float vector[NX];
 starpu_data_handle_t vector_handle;
 starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
                             sizeof(vector[0]));
-@end example
+@end smallexample
+@end cartouche
 @end deftypefun
 
 @deftypefun void starpu_matrix_data_register ({starpu_data_handle_t *}@var{handle}, uint32_t @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{ld}, uint32_t @var{nx}, uint32_t @var{ny}, size_t @var{size})
@@ -480,13 +484,15 @@ pointed by @var{ptr} and initialize @var{handle} to represent it.
 each row; a non-zero @var{ld} adds padding, which can be useful for
 alignment purposes.
 
-@example
+@cartouche
+@smallexample
 float *matrix;
 starpu_data_handle_t matrix_handle;
 matrix = (float*)malloc(width * height * sizeof(float));
 starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix,
                             width, width, height, sizeof(float));
-@end example
+@end smallexample
+@end cartouche
 @end deftypefun
 
 @deftypefun void starpu_block_data_register ({starpu_data_handle_t *}@var{handle}, uint32_t @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{ldy}, uint32_t @var{ldz}, uint32_t @var{nx}, uint32_t @var{ny}, uint32_t @var{nz}, size_t @var{size})
@@ -495,13 +501,15 @@ elements pointed by @var{ptr} and initialize @var{handle} to represent
 it.  Again, @var{ldy} and @var{ldz} specify the number of extra elements
 present at the end of each row or column.
 
-@example
+@cartouche
+@smallexample
 float *block;
 starpu_data_handle_t block_handle;
 block = (float*)malloc(nx*ny*nz*sizeof(float));
 starpu_block_data_register(&block_handle, 0, (uintptr_t)block,
                            nx, nx*ny, nx, ny, nz, sizeof(float));
-@end example
+@end smallexample
+@end cartouche
 @end deftypefun
 
 @deftypefun void starpu_bcsr_data_register (starpu_data_handle_t *@var{handle}, uint32_t @var{home_node}, uint32_t @var{nnz}, uint32_t @var{nrow}, uintptr_t @var{nzval}, uint32_t *@var{colind}, uint32_t *@var{rowptr}, uint32_t @var{firstentry}, uint32_t @var{r}, uint32_t @var{c}, size_t @var{elemsize})
@@ -1351,11 +1359,11 @@ typically need to be explicitly casted. Using the
 @code{starpu_tag_declare_deps_array} function avoids this hazard.
 
 @cartouche
-@example
+@smallexample
 /*  Tag 0x1 depends on tags 0x32 and 0x52 */
 starpu_tag_declare_deps((starpu_tag_t)0x1,
         2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
-@end example
+@end smallexample
 @end cartouche
 @end deftypefun
 
@@ -1364,11 +1372,11 @@ This function is similar to @code{starpu_tag_declare_deps}, except
 that its does not take a variable number of arguments but an array of
 tags of size @var{ndeps}.
 @cartouche
-@example
+@smallexample
 /*  Tag 0x1 depends on tags 0x32 and 0x52 */
 starpu_tag_t tag_array[2] = @{0x32, 0x52@};
 starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
-@end example
+@end smallexample
 @end cartouche
 @end deftypefun
 

+ 20 - 10
doc/chapters/c-extensions.texi

@@ -98,7 +98,8 @@ Declare the given function as an implementation of @var{task} to run on
 
 Here is an example:
 
-@example
+@cartouche
+@smallexample
 #define __output  __attribute__ ((output))
 
 static void matmul (const float *A, const float *B,
@@ -125,7 +126,8 @@ matmul_cpu (const float *A, const float *B, __output float *C,
           C[j * nx + i] += A[j * nz + k] * B[k * nx + i];
       @}
 @}
-@end example
+@end smallexample
+@end cartouche
 
 @noindent
 A @code{matmult} task is defined; it has only one implementation,
@@ -135,7 +137,8 @@ buffer.
 
 CUDA and OpenCL implementations can be declared in a similar way:
 
-@example
+@cartouche
+@smallexample
 static void matmul_cuda (const float *A, const float *B, float *C,
                          size_t nx, size_t ny, size_t nz)
   __attribute__ ((task_implementation ("cuda", matmul)));
@@ -143,7 +146,8 @@ static void matmul_cuda (const float *A, const float *B, float *C,
 static void matmul_opencl (const float *A, const float *B, float *C,
                            size_t nx, size_t ny, size_t nz)
   __attribute__ ((task_implementation ("opencl", matmul)));
-@end example
+@end smallexample
+@end cartouche
 
 @noindent
 The CUDA and OpenCL implementations typically either invoke a kernel
@@ -151,7 +155,8 @@ written in CUDA or OpenCL (for similar code, @pxref{CUDA Kernel}, and
 @pxref{OpenCL Kernel}), or call a library function that uses CUDA or
 OpenCL under the hood, such as CUBLAS functions:
 
-@example
+@cartouche
+@smallexample
 static void
 matmul_cuda (const float *A, const float *B, float *C,
              size_t nx, size_t ny, size_t nz)
@@ -161,16 +166,19 @@ matmul_cuda (const float *A, const float *B, float *C,
                0.0f, C, 0);
   cudaStreamSynchronize (starpu_cuda_get_local_stream ());
 @}
-@end example
+@end smallexample
+@end cartouche
 
 A task can be invoked like a regular C function:
 
-@example
+@cartouche
+@smallexample
 matmul (&A[i * zdim * bydim + k * bzdim * bydim],
         &B[k * xdim * bzdim + j * bxdim * bzdim],
         &C[i * xdim * bydim + j * bxdim * bydim],
         bxdim, bydim, bzdim);
-@end example
+@end smallexample
+@end cartouche
 
 @noindent
 This leads to an @dfn{asynchronous invocation}, whereby @code{matmult}'s
@@ -225,7 +233,8 @@ supported C extensions.
 The code below illustrates how to define a task and its implementations
 in a way that allows it to be compiled without the GCC plug-in:
 
-@example
+@cartouche
+@smallexample
 /* The macros below abstract over the attributes specific to
    StarPU-GCC and the name of the CPU implementation.  */
 #ifdef STARPU_GCC_PLUGIN
@@ -279,7 +288,8 @@ main (int argc, char *argv[])
 
   return EXIT_SUCCESS;
 @}
-@end example
+@end smallexample
+@end cartouche
 
 Note that attributes such as @code{task} are simply ignored by GCC when
 the StarPU plug-in is not loaded, so the @code{__task} macro could be

+ 25 - 13
doc/chapters/perf-feedback.texi

@@ -2,7 +2,7 @@
 
 @c This file is part of the StarPU Handbook.
 @c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
 @c See the file starpu.texi for copying conditions.
 
@@ -135,47 +135,59 @@ Variables to be monitored can be registered by calling the
 @code{starpu_top_add_data_boolean}, @code{starpu_top_add_data_integer},
 @code{starpu_top_add_data_float} functions, e.g.:
 
-@example
+@cartouche
+@smallexample
 starpu_top_data *data = starpu_top_add_data_integer("mynum", 0, 100, 1);
-@end example
+@end smallexample
+@end cartouche
 
 The application should then call @code{starpu_top_init_and_wait} to give its name
 and wait for StarPU-Top to get a start request from the user. The name is used
 by StarPU-Top to quickly reload a previously-saved layout of parameter display.
 
-@example
+@cartouche
+@smallexample
 starpu_top_init_and_wait("the application");
-@end example
+@end smallexample
+@end cartouche
 
 The new values can then be provided thanks to
 @code{starpu_top_update_data_boolean}, @code{starpu_top_update_data_integer},
 @code{starpu_top_update_data_float}, e.g.:
 
-@example
+@cartouche
+@smallexample
 starpu_top_update_data_integer(data, mynum);
-@end example
+@end smallexample
+@end cartouche
 
 Updateable parameters can be registered thanks to @code{starpu_top_register_parameter_boolean}, @code{starpu_top_register_parameter_integer}, @code{starpu_top_register_parameter_float}, e.g.:
 
-@example
+@cartouche
+@smallexample
 float alpha;
 starpu_top_register_parameter_float("alpha", &alpha, 0, 10, modif_hook);
-@end example
+@end smallexample
+@end cartouche
 
 @code{modif_hook} is a function which will be called when the parameter is being modified, it can for instance print the new value:
 
-@example
+@cartouche
+@smallexample
 void modif_hook(struct starpu_top_param *d) @{
     fprintf(stderr,"%s has been modified: %f\n", d->name, alpha);
 @}
-@end example
+@end smallexample
+@end cartouche
 
 Task schedulers should notify StarPU-Top when it has decided when a task will be
 scheduled, so that it can show it in its Gantt chart, for instance:
 
-@example
+@cartouche
+@smallexample
 starpu_top_task_prevision(task, workerid, begin, end);
-@end example
+@end smallexample
+@end cartouche
 
 Starting StarPU-Top and the application can be done two ways:
 

+ 13 - 7
doc/chapters/perf-optimization.texi

@@ -2,7 +2,7 @@
 
 @c This file is part of the StarPU Handbook.
 @c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
 @c See the file starpu.texi for copying conditions.
 
@@ -47,16 +47,20 @@ that this data will not be re-used by further tasks, it should advise StarPU to
 immediately replicate it to a desired list of memory nodes (given through a
 bitmask). This can be understood like the write-through mode of CPU caches.
 
-@example
+@cartouche
+@smallexample
 starpu_data_set_wt_mask(img_handle, 1<<0);
-@end example
+@end smallexample
+@end cartouche
 
 will for instance request to always automatically transfer a replicate into the
 main memory (node 0), as bit 0 of the write-through bitmask is being set.
 
-@example
+@cartouche
+@smallexample
 starpu_data_set_wt_mask(img_handle, ~0U);
-@end example
+@end smallexample
+@end cartouche
 
 will request to always automatically broadcast the updated data to all memory
 nodes.
@@ -265,10 +269,12 @@ dedicated CUDA stream for its computations. StarPU provides one by the use of
 @code{starpu_cuda_get_local_stream()} which should be used by all CUDA codelet
 operations. For instance:
 
-@example
+@cartouche
+@smallexample
 func <<<grid,block,0,starpu_cuda_get_local_stream()>>> (foo, bar);
 cudaStreamSynchronize(starpu_cuda_get_local_stream());
-@end example
+@end smallexample
+@end cartouche
 
 StarPU already does appropriate calls for the CUBLAS library.