14 年之前 · ef038c838d
--- a/doc/chapters/advanced-api.texi
+++ b/doc/chapters/advanced-api.texi
@@ -2,7 +2,7 @@
 
				 
			
 
				 @c This file is part of the StarPU Handbook.
			
 
				 @c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
			
 
				-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				 @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
			
 
				 @c See the file starpu.texi for copying conditions.
			
 
				 
			
@@ -50,13 +50,20 @@ See @code{src/datawizard/interfaces/vector_interface.c} for now.
 
				 @deftp {Data Type} {struct starpu_multiformat_data_interface_ops}
			
 
				 todo. The different fields are:
			
 
				 @table @asis
			
 
				-@item @code{cpu_elemsize} the size of each element on CPUs,
			
 
				-@item @code{opencl_elemsize} the size of each element on OpenCL devices,
			
 
				-@item @code{cuda_elemsize} the size of each element on CUDA devices,
			
 
				-@item @code{cpu_to_opencl_cl} pointer to a codelet which converts from CPU to OpenCL
			
 
				-@item @code{opencl_to_cpu_cl} pointer to a codelet which converts from OpenCL to CPU
			
 
				-@item @code{cpu_to_cuda_cl} pointer to a codelet which converts from CPU to CUDA
			
 
				-@item @code{cuda_to_cpu_cl} pointer to a codelet which converts from CUDA to CPU
			
 
				+@item @code{cpu_elemsize}
			
 
				+the size of each element on CPUs,
			
 
				+@item @code{opencl_elemsize}
			
 
				+the size of each element on OpenCL devices,
			
 
				+@item @code{cuda_elemsize}
			
 
				+the size of each element on CUDA devices,
			
 
				+@item @code{cpu_to_opencl_cl}
			
 
				+pointer to a codelet which converts from CPU to OpenCL
			
 
				+@item @code{opencl_to_cpu_cl}
			
 
				+pointer to a codelet which converts from OpenCL to CPU
			
 
				+@item @code{cpu_to_cuda_cl}
			
 
				+pointer to a codelet which converts from CPU to CUDA
			
 
				+@item @code{cuda_to_cpu_cl}
			
 
				+pointer to a codelet which converts from CUDA to CPU
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
@@ -78,11 +85,16 @@ the format.
 
				 The task bundle structure describes a list of tasks that should be
			
 
				 scheduled together whenever possible. The different fields are:
			
 
				 @table @asis
			
 
				-@item @code{mutex} Mutex protecting the bundle
			
 
				-@item @code{int previous_workerid} last worker previously assigned a task from the bundle (-1 if none)
			
 
				-@item @code{struct starpu_task_bundle_entry *list} list of tasks
			
 
				-@item @code{int destroy} If this flag is set, the bundle structure is automatically free'd when the bundle is deinitialized.
			
 
				-@item @code{int closed} Is the bundle closed ?
			
 
				+@item @code{mutex}
			
 
				+Mutex protecting the bundle
			
 
				+@item @code{int previous_workerid}
			
 
				+last worker previously assigned a task from the bundle (-1 if none)
			
 
				+@item @code{struct starpu_task_bundle_entry *list}
			
 
				+list of tasks
			
 
				+@item @code{int destroy}
			
 
				+If this flag is set, the bundle structure is automatically free'd when the bundle is deinitialized.
			
 
				+@item @code{int closed}
			
 
				+Is the bundle closed ?
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -598,14 +598,16 @@ void cpu_to_opencl_opencl_func(void *buffers[], void *args);
 
				 struct starpu_codelet cpu_to_opencl_cl = @{
			
 
				     .where = STARPU_OPENCL,
			
 
				     .opencl_funcs = @{ cpu_to_opencl_opencl_func, NULL @},
			
 
				-    .nbuffers = 1
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = @{ STARPU_RW @}
			
 
				 @};
			
 
				 
			
 
				 void opencl_to_cpu_func(void *buffers[], void *args);
			
 
				 struct starpu_codelet opencl_to_cpu_cl = @{
			
 
				     .where = STARPU_CPU,
			
 
				     .cpu_funcs = @{ opencl_to_cpu_func, NULL @},
			
 
				-    .nbuffers = 1
			
 
				+    .nbuffers = 1,
			
 
				+    .modes = @{ STARPU_RW @}
			
 
				 @};
			
 
				 #endif
			
 
				 
			
@@ -667,23 +669,22 @@ be given the CUDA pointer at registration, for instance:
 
				 
			
 
				 @cartouche
			
 
				 @smallexample
			
 
				-	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
			
 
				-		if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER)
			
 
				-			break;
			
 
				+for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
			
 
				+	if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER)
			
 
				+		break;
			
 
				 
			
 
				-	cudaSetDevice(starpu_worker_get_devid(workerid));
			
 
				-	cudaGraphicsResourceGetMappedPointer((void**)&output,
			
 
				-                                     &num_bytes, resource);
			
 
				-	starpu_vector_data_register(&handle, starpu_worker_get_memory_node(workerid), output, num_bytes / sizeof(float4), sizeof(float4));
			
 
				+cudaSetDevice(starpu_worker_get_devid(workerid));
			
 
				+cudaGraphicsResourceGetMappedPointer((void**)&output, &num_bytes, resource);
			
 
				+starpu_vector_data_register(&handle, starpu_worker_get_memory_node(workerid), output, num_bytes / sizeof(float4), sizeof(float4));
			
 
				 
			
 
				-	starpu_insert_task(&cl, STARPU_RW, handle, 0);
			
 
				+starpu_insert_task(&cl, STARPU_RW, handle, 0);
			
 
				 
			
 
				-	starpu_data_unregister(handle);
			
 
				+starpu_data_unregister(handle);
			
 
				 
			
 
				-	cudaSetDevice(starpu_worker_get_devid(workerid));
			
 
				-	cudaGraphicsUnmapResources(1, &resource, 0);
			
 
				+cudaSetDevice(starpu_worker_get_devid(workerid));
			
 
				+cudaGraphicsUnmapResources(1, &resource, 0);
			
 
				 
			
 
				-	/* Now display it */
			
 
				+/* Now display it */
			
 
				 @end smallexample
			
 
				 @end cartouche
			
 
				 
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -412,13 +412,13 @@ be called from task callbacks. Upon successful completion, this function
 
				 returns 0.
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun void STARPU_DATA_ACQUIRE_CB (starpu_data_handle_t @var{handle}, {enum starpu_access_mode} @var{mode}, code)
			
 
				+@defmac STARPU_DATA_ACQUIRE_CB (starpu_data_handle_t @var{handle}, {enum starpu_access_mode} @var{mode}, code)
			
 
				 @code{STARPU_DATA_ACQUIRE_CB} is the same as @code{starpu_data_acquire_cb},
			
 
				 except that the code to be executed in a callback is directly provided as a
			
 
				 macro parameter, and the data handle is automatically released after it. This
			
 
				 permits to easily execute code which depends on the value of some registered
			
 
				 data. This is non-blocking too and may be called from task callbacks.
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @deftypefun void starpu_data_release (starpu_data_handle_t @var{handle})
			
 
				 This function releases the piece of data acquired by the application either by
			
@@ -589,13 +589,13 @@ todo
 
				 todo
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_VARIABLE_GET_PTR ({void *}@var{interface})
			
 
				+@defmac STARPU_VARIABLE_GET_PTR ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun size_t STARPU_VARIABLE_GET_ELEMSIZE ({void *}@var{interface})
			
 
				+@defmac STARPU_VARIABLE_GET_ELEMSIZE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @node Accessing Vector Data Interfaces
			
 
				 @subsubsection Vector Data Interfaces
			
@@ -612,25 +612,25 @@ todo
 
				 todo
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_VECTOR_GET_PTR ({void *}@var{interface})
			
 
				+@defmac STARPU_VECTOR_GET_PTR ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_VECTOR_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				+@defmac STARPU_VECTOR_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_VECTOR_GET_OFFSET ({void *}@var{interface})
			
 
				+@defmac STARPU_VECTOR_GET_OFFSET ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_VECTOR_GET_NX ({void *}@var{interface})
			
 
				+@defmac STARPU_VECTOR_GET_NX ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun size_t STARPU_VECTOR_GET_ELEMSIZE ({void *}@var{interface})
			
 
				+@defmac STARPU_VECTOR_GET_ELEMSIZE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @node Accessing Matrix Data Interfaces
			
 
				 @subsubsection Matrix Data Interfaces
			
@@ -655,33 +655,33 @@ todo
 
				 todo
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_MATRIX_GET_PTR ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_PTR ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_MATRIX_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_MATRIX_GET_OFFSET ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_OFFSET ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_MATRIX_GET_NX ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_NX ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_MATRIX_GET_NY ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_NY ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_MATRIX_GET_LD ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_LD ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun size_t STARPU_MATRIX_GET_ELEMSIZE ({void *}@var{interface})
			
 
				+@defmac STARPU_MATRIX_GET_ELEMSIZE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @node Accessing Block Data Interfaces
			
 
				 @subsubsection Block Data Interfaces
			
@@ -714,41 +714,41 @@ todo
 
				 todo
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_BLOCK_GET_PTR ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_PTR ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_BLOCK_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_DEV_HANDLE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_BLOCK_GET_OFFSET ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_OFFSET ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_BLOCK_GET_NX ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_NX ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_BLOCK_GET_NY ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_NY ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_BLOCK_GET_NZ ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_NZ ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_BLOCK_GET_LDY ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_LDY ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_BLOCK_GET_LDZ ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_LDZ ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun size_t STARPU_BLOCK_GET_ELEMSIZE ({void *}@var{interface})
			
 
				+@defmac STARPU_BLOCK_GET_ELEMSIZE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @node Accessing BCSR Data Interfaces
			
 
				 @subsubsection BCSR Data Interfaces
			
@@ -821,33 +821,33 @@ todo
 
				 todo
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_CSR_GET_NNZ ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_NNZ ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_CSR_GET_NROW ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_NROW ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uintptr_t STARPU_CSR_GET_NZVAL ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_NZVAL ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun {uint32_t *} STARPU_CSR_GET_COLIND ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_COLIND ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun {uint32_t *} STARPU_CSR_GET_ROWPTR ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_ROWPTR ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun uint32_t STARPU_CSR_GET_FIRSTENTRY ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_FIRSTENTRY ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				-@deftypefun size_t STARPU_CSR_GET_ELEMSIZE ({void *}@var{interface})
			
 
				+@defmac STARPU_CSR_GET_ELEMSIZE ({void *}@var{interface})
			
 
				 todo
			
 
				-@end deftypefun
			
 
				+@end defmac
			
 
				 
			
 
				 @node Data Partition
			
 
				 @section Data Partition
			
@@ -1045,7 +1045,7 @@ indicates that it is only available on Cell SPUs.
 
				 This field has been made deprecated. One should use instead the
			
 
				 @code{cpu_funcs} field.
			
 
				 
			
 
				-@item @code{cpu_funcs}
			
 
				+@item @code{cpu_funcs} (optional)
			
 
				 Is an array of function pointers to the CPU implementations of the codelet.
			
 
				 It must be terminated by a NULL value.
			
 
				 The functions prototype must be: @code{void cpu_func(void *buffers[], void *cl_arg)}. The first
			
@@ -1059,7 +1059,7 @@ the @code{where} field, it must be non-null otherwise.
 
				 This field has been made deprecated. One should use instead the
			
 
				 @code{cuda_funcs} field.
			
 
				 
			
 
				-@item @code{cuda_funcs}
			
 
				+@item @code{cuda_funcs} (optional)
			
 
				 Is an array of function pointers to the CUDA implementations of the codelet.
			
 
				 It must be terminated by a NULL value.
			
 
				 @emph{The functions must be host-functions written in the CUDA runtime
			
@@ -1072,7 +1072,7 @@ field, it must be non-null otherwise.
 
				 This field has been made deprecated. One should use instead the
			
 
				 @code{opencl_funcs} field.
			
 
				 
			
 
				-@item @code{opencl_funcs}
			
 
				+@item @code{opencl_funcs} (optional)
			
 
				 Is an array of function pointers to the OpenCL implementations of the codelet.
			
 
				 It must be terminated by a NULL value.
			
 
				 The functions prototype must be:
			
@@ -1084,7 +1084,7 @@ This pointer is ignored if @code{STARPU_OPENCL} does not appear in the
 
				 This field has been made deprecated. One should use instead the
			
 
				 @code{gordon_funcs} field.
			
 
				 
			
 
				-@item @code{gordon_funcs}
			
 
				+@item @code{gordon_funcs} (optional)
			
 
				 Is an array of index of the Cell SPU implementations of the codelet within the
			
 
				 Gordon library.
			
 
				 It must be terminated by a NULL value.
			
--- a/doc/chapters/configuration.texi
+++ b/doc/chapters/configuration.texi
@@ -2,7 +2,7 @@
 
				 
			
 
				 @c This file is part of the StarPU Handbook.
			
 
				 @c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
			
 
				-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				 @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
			
 
				 @c See the file starpu.texi for copying conditions.
			
 
				 
			
@@ -38,22 +38,14 @@ The following arguments can be given to the @code{configure} script.
 
				 
			
 
				 @node --enable-debug
			
 
				 @subsubsection @code{--enable-debug}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable debugging messages.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-fast
			
 
				 @subsubsection @code{--enable-fast}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Do not enforce assertions, saves a lot of time spent to compute them otherwise.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-verbose
			
 
				 @subsubsection @code{--enable-verbose}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Augment the verbosity of the debugging messages. This can be disabled
			
 
				 at runtime by setting the environment variable @code{STARPU_SILENT} to
			
 
				 any value.
			
@@ -61,14 +53,10 @@ any value.
 
				 @smallexample
			
 
				 % STARPU_SILENT=1 ./vector_scal
			
 
				 @end smallexample
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-coverage
			
 
				 @subsubsection @code{--enable-coverage}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable flags for the @code{gcov} coverage tool.
			
 
				-@end table
			
 
				 
			
 
				 @node Configuring workers
			
 
				 @subsection Configuring workers
			
@@ -94,132 +82,82 @@ Enable flags for the @code{gcov} coverage tool.
 
				 
			
 
				 @node --enable-maxcpus
			
 
				 @subsubsection @code{--enable-maxcpus=<number>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				-Defines the maximum number of CPU cores that StarPU will support, then
			
 
				+Define the maximum number of CPU cores that StarPU will support, then
			
 
				 available as the @code{STARPU_MAXCPUS} macro.
			
 
				-@end table
			
 
				 
			
 
				 @node --disable-cpu
			
 
				 @subsubsection @code{--disable-cpu}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Disable the use of CPUs of the machine. Only GPUs etc. will be used.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-maxcudadev
			
 
				 @subsubsection @code{--enable-maxcudadev=<number>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				-Defines the maximum number of CUDA devices that StarPU will support, then
			
 
				+Define the maximum number of CUDA devices that StarPU will support, then
			
 
				 available as the @code{STARPU_MAXCUDADEVS} macro.
			
 
				-@end table
			
 
				 
			
 
				 @node --disable-cuda
			
 
				 @subsubsection @code{--disable-cuda}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Disable the use of CUDA, even if a valid CUDA installation was detected.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-cuda-dir
			
 
				 @subsubsection @code{--with-cuda-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the directory where CUDA is installed. This directory should notably contain
			
 
				 @code{include/cuda.h}.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-cuda-include-dir
			
 
				 @subsubsection @code{--with-cuda-include-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the directory where CUDA headers are installed. This directory should
			
 
				 notably contain @code{cuda.h}. This defaults to @code{/include} appended to the
			
 
				 value given to @code{--with-cuda-dir}.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-cuda-lib-dir
			
 
				 @subsubsection @code{--with-cuda-lib-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the directory where the CUDA library is installed. This directory should
			
 
				 notably contain the CUDA shared libraries (e.g. libcuda.so). This defaults to
			
 
				 @code{/lib} appended to the value given to @code{--with-cuda-dir}.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node --disable-cuda-memcpy-peer
			
 
				 @subsubsection @code{--disable-cuda-memcpy-peer}
			
 
				-@table @asis
			
 
				-@item @emph{Description}
			
 
				-Explicitely disables peer transfers when using CUDA 4.0
			
 
				-@end table
			
 
				+Explicitely disable peer transfers when using CUDA 4.0
			
 
				 
			
 
				 @node --enable-maxopencldev
			
 
				 @subsubsection @code{--enable-maxopencldev=<number>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				-Defines the maximum number of OpenCL devices that StarPU will support, then
			
 
				+Define the maximum number of OpenCL devices that StarPU will support, then
			
 
				 available as the @code{STARPU_MAXOPENCLDEVS} macro.
			
 
				-@end table
			
 
				 
			
 
				 @node --disable-opencl
			
 
				 @subsubsection @code{--disable-opencl}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Disable the use of OpenCL, even if the SDK is detected.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-opencl-dir
			
 
				 @subsubsection @code{--with-opencl-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of the OpenCL SDK. This directory should notably contain
			
 
				 @code{include/CL/cl.h} (or @code{include/OpenCL/cl.h} on Mac OS).
			
 
				-@end table
			
 
				 
			
 
				 @node --with-opencl-include-dir
			
 
				 @subsubsection @code{--with-opencl-include-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of OpenCL headers. This directory should notably contain
			
 
				 @code{CL/cl.h} (or @code{OpenCL/cl.h} on Mac OS). This defaults to
			
 
				 @code{/include} appended to the value given to @code{--with-opencl-dir}.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node --with-opencl-lib-dir
			
 
				 @subsubsection @code{--with-opencl-lib-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of the OpenCL library. This directory should notably
			
 
				 contain the OpenCL shared libraries (e.g. libOpenCL.so). This defaults to
			
 
				 @code{/lib} appended to the value given to @code{--with-opencl-dir}.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-gordon
			
 
				 @subsubsection @code{--enable-gordon}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable the use of the Gordon runtime for Cell SPUs.
			
 
				 @c TODO: rather default to enabled when detected
			
 
				-@end table
			
 
				 
			
 
				 @node --with-gordon-dir
			
 
				 @subsubsection @code{--with-gordon-dir=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of the Gordon SDK.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-maximplementations
			
 
				 @subsubsection @code{--enable-maximplementations=<number>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				-Defines the number of implementations that can be defined for a single kind of
			
 
				+Define the number of implementations that can be defined for a single kind of
			
 
				 device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
			
 
				-@end table
			
 
				 
			
 
				 @node Advanced configuration
			
 
				 @subsection Advanced configuration
			
@@ -244,120 +182,75 @@ device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
 
				 
			
 
				 @node --enable-perf-debug
			
 
				 @subsubsection @code{--enable-perf-debug}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable performance debugging through gprof.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-model-debug
			
 
				 @subsubsection @code{--enable-model-debug}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable performance model debugging.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-stats
			
 
				 @subsubsection @code{--enable-stats}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable statistics.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-maxbuffers
			
 
				 @subsubsection @code{--enable-maxbuffers=<nbuffers>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Define the maximum number of buffers that tasks will be able to take
			
 
				 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-allocation-cache
			
 
				 @subsubsection @code{--enable-allocation-cache}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable the use of a data allocation cache to avoid the cost of it with
			
 
				 CUDA. Still experimental.
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-opengl-render
			
 
				 @subsubsection @code{--enable-opengl-render}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Enable the use of OpenGL for the rendering of some examples.
			
 
				 @c TODO: rather default to enabled when detected
			
 
				-@end table
			
 
				 
			
 
				 @node --enable-blas-lib
			
 
				 @subsubsection @code{--enable-blas-lib=<name>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the blas library to be used by some of the examples. The
			
 
				 library has to be 'atlas' or 'goto'.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-magma
			
 
				 @subsubsection @code{--with-magma=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify where magma is installed. This directory should notably contain
			
 
				 @code{include/magmablas.h}.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-fxt
			
 
				 @subsubsection @code{--with-fxt=<path>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of FxT (for generating traces and rendering them
			
 
				 using ViTE). This directory should notably contain
			
 
				 @code{include/fxt/fxt.h}.
			
 
				 @c TODO add ref to other section
			
 
				-@end table
			
 
				 
			
 
				 @node --with-perf-model-dir
			
 
				 @subsubsection @code{--with-perf-model-dir=<dir>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify where performance models should be stored (instead of defaulting to the
			
 
				 current user's home).
			
 
				-@end table
			
 
				 
			
 
				 @node --with-mpicc
			
 
				 @subsubsection @code{--with-mpicc=<path to mpicc>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of the @code{mpicc} compiler to be used for starpumpi.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-goto-dir
			
 
				 @subsubsection @code{--with-goto-dir=<dir>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of GotoBLAS.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-atlas-dir
			
 
				 @subsubsection @code{--with-atlas-dir=<dir>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the location of ATLAS. This directory should notably contain
			
 
				 @code{include/cblas.h}.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-mkl-cflags
			
 
				 @subsubsection @code{--with-mkl-cflags=<cflags>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the compilation flags for the MKL Library.
			
 
				-@end table
			
 
				 
			
 
				 @node --with-mkl-ldflags
			
 
				 @subsubsection @code{--with-mkl-ldflags=<ldflags>}
			
 
				-@table @asis
			
 
				-@item @emph{Description}:
			
 
				 Specify the linking flags for the MKL Library. Note that the
			
 
				 @url{http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/}
			
 
				 website provides a script to determine the linking flags.
			
 
				-@end table
			
 
				 
			
 
				 @node Execution configuration through environment variables
			
 
				 @section Execution configuration through environment variables
			
@@ -387,50 +280,33 @@ variables.
 
				 
			
 
				 @node STARPU_NCPUS
			
 
				 @subsubsection @code{STARPU_NCPUS} -- Number of CPU workers
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 Specify the number of CPU workers (thus not including workers dedicated to control acceleratores). Note that by default, StarPU will not allocate
			
 
				 more CPU workers than there are physical CPUs, and that some CPUs are used to control
			
 
				 the accelerators.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_NCUDA
			
 
				 @subsubsection @code{STARPU_NCUDA} -- Number of CUDA workers
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 Specify the number of CUDA devices that StarPU can use. If
			
 
				 @code{STARPU_NCUDA} is lower than the number of physical devices, it is
			
 
				 possible to select which CUDA devices should be used by the means of the
			
 
				 @code{STARPU_WORKERS_CUDAID} environment variable. By default, StarPU will
			
 
				 create as many CUDA workers as there are CUDA devices.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_NOPENCL
			
 
				 @subsubsection @code{STARPU_NOPENCL} -- Number of OpenCL workers
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 OpenCL equivalent of the @code{STARPU_NCUDA} environment variable.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_NGORDON
			
 
				 @subsubsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 Specify the number of SPUs that StarPU can use.
			
 
				-@end table
			
 
				-
			
 
				 
			
 
				 @node STARPU_WORKERS_CPUID
			
 
				 @subsubsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 Passing an array of integers (starting from 0) in @code{STARPU_WORKERS_CPUID}
			
 
				 specifies on which logical CPU the different workers should be
			
 
				 bound. For instance, if @code{STARPU_WORKERS_CPUID = "0 1 4 5"}, the first
			
@@ -454,13 +330,9 @@ third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1).
 
				 This variable is ignored if the @code{use_explicit_workers_bindid} flag of the
			
 
				 @code{starpu_conf} structure passed to @code{starpu_init} is set.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_WORKERS_CUDAID
			
 
				 @subsubsection @code{STARPU_WORKERS_CUDAID} -- Select specific CUDA devices
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 Similarly to the @code{STARPU_WORKERS_CPUID} environment variable, it is
			
 
				 possible to select which CUDA devices should be used by StarPU. On a machine
			
 
				 equipped with 4 GPUs, setting @code{STARPU_WORKERS_CUDAID = "1 3"} and
			
@@ -470,18 +342,14 @@ the one reported by CUDA).
 
				 
			
 
				 This variable is ignored if the @code{use_explicit_workers_cuda_gpuid} flag of
			
 
				 the @code{starpu_conf} structure passed to @code{starpu_init} is set.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_WORKERS_OPENCLID
			
 
				 @subsubsection @code{STARPU_WORKERS_OPENCLID} -- Select specific OpenCL devices
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.
			
 
				 
			
 
				 This variable is ignored if the @code{use_explicit_workers_opencl_gpuid} flag of
			
 
				 the @code{starpu_conf} structure passed to @code{starpu_init} is set.
			
 
				-@end table
			
 
				 
			
 
				 @node Scheduling
			
 
				 @subsection Configuring the Scheduling engine
			
@@ -496,22 +364,15 @@ the @code{starpu_conf} structure passed to @code{starpu_init} is set.
 
				 
			
 
				 @node STARPU_SCHED
			
 
				 @subsubsection @code{STARPU_SCHED} -- Scheduling policy
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				-
			
 
				-This chooses between the different scheduling policies proposed by StarPU: work
			
 
				+Choose between the different scheduling policies proposed by StarPU: work
			
 
				 random, stealing, greedy, with performance models, etc.
			
 
				 
			
 
				 Use @code{STARPU_SCHED=help} to get the list of available schedulers.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_CALIBRATE
			
 
				 @subsubsection @code{STARPU_CALIBRATE} -- Calibrate performance models
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 If this variable is set to 1, the performance models are calibrated during
			
 
				 the execution. If it is set to 2, the previous values are dropped to restart
			
 
				 calibration from scratch. Setting this variable to 0 disable calibration, this
			
@@ -519,13 +380,9 @@ is the default behaviour.
 
				 
			
 
				 Note: this currently only applies to @code{dm}, @code{dmda} and @code{heft} scheduling policies.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_PREFETCH
			
 
				 @subsubsection @code{STARPU_PREFETCH} -- Use data prefetch
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 This variable indicates whether data prefetching should be enabled (0 means
			
 
				 that it is disabled). If prefetching is enabled, when a task is scheduled to be
			
 
				 executed e.g. on a GPU, StarPU will request an asynchronous transfer in
			
@@ -533,30 +390,20 @@ advance, so that data is already present on the GPU when the task starts. As a
 
				 result, computation and data transfers are overlapped.
			
 
				 Note that prefetching is enabled by default in StarPU.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_SCHED_ALPHA
			
 
				 @subsubsection @code{STARPU_SCHED_ALPHA} -- Computation factor
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 To estimate the cost of a task StarPU takes into account the estimated
			
 
				 computation time (obtained thanks to performance models). The alpha factor is
			
 
				 the coefficient to be applied to it before adding it to the communication part.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node STARPU_SCHED_BETA
			
 
				 @subsubsection @code{STARPU_SCHED_BETA} -- Communication factor
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 To estimate the cost of a task StarPU takes into account the estimated
			
 
				 data transfer time (obtained thanks to performance models). The beta factor is
			
 
				 the coefficient to be applied to it before adding it to the computation part.
			
 
				 
			
 
				-@end table
			
 
				-
			
 
				 @node Misc
			
 
				 @subsection Miscellaneous and debug
			
 
				 
			
@@ -570,46 +417,31 @@ the coefficient to be applied to it before adding it to the computation part.
 
				 
			
 
				 @node STARPU_SILENT
			
 
				 @subsubsection @code{STARPU_SILENT} -- Disable verbose mode
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 This variable allows to disable verbose mode at runtime when StarPU
			
 
				 has been configured with the option @code{--enable-verbose}.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_LOGFILENAME
			
 
				 @subsubsection @code{STARPU_LOGFILENAME} -- Select debug file name
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}:
			
 
				 This variable specifies in which file the debugging output should be saved to.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_FXT_PREFIX
			
 
				 @subsubsection @code{STARPU_FXT_PREFIX} -- FxT trace location
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}
			
 
				 This variable specifies in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_LIMIT_GPU_MEM
			
 
				 @subsubsection @code{STARPU_LIMIT_GPU_MEM} -- Restrict memory size on the GPUs
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}
			
 
				 This variable specifies the maximum number of megabytes that should be
			
 
				 available to the application on each GPUs. In case this value is smaller than
			
 
				 the size of the memory of a GPU, StarPU pre-allocates a buffer to waste memory
			
 
				 on the device. This variable is intended to be used for experimental purposes
			
 
				 as it emulates devices that have a limited amount of memory.
			
 
				-@end table
			
 
				 
			
 
				 @node STARPU_GENERATE_TRACE
			
 
				 @subsubsection @code{STARPU_GENERATE_TRACE} -- Generate a Paje trace when StarPU is shut down
			
 
				-@table @asis
			
 
				 
			
 
				-@item @emph{Description}
			
 
				 When set to 1, this variable indicates that StarPU should automatically
			
 
				 generate a Paje trace when starpu_shutdown is called.
			
 
				-@end table
			
--- a/doc/chapters/tips-tricks.texi
+++ b/doc/chapters/tips-tricks.texi
@@ -2,7 +2,7 @@
 
				 
			
 
				 @c This file is part of the StarPU Handbook.
			
 
				 @c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
			
 
				-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				 @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
			
 
				 @c See the file starpu.texi for copying conditions.
			
 
				 
			
@@ -25,12 +25,15 @@ be able to use FFTW.
 
				 
			
 
				 Some global array stores the instanciated objects:
			
 
				 
			
 
				+@cartouche
			
 
				 @smallexample
			
 
				 fftw_plan plan_cpu[STARPU_NMAXWORKERS];
			
 
				 @end smallexample
			
 
				+@end cartouche
			
 
				 
			
 
				 At initialisation time of libstarpu, the objects are initialized:
			
 
				 
			
 
				+@cartouche
			
 
				 @smallexample
			
 
				 int workerid;
			
 
				 for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) @{
			
@@ -41,9 +44,11 @@ for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) @{
 
				     @}
			
 
				 @}
			
 
				 @end smallexample
			
 
				+@end cartouche
			
 
				 
			
 
				 And in the codelet body, they are used:
			
 
				 
			
 
				+@cartouche
			
 
				 @smallexample
			
 
				 static void fft(void *descr[], void *_args)
			
 
				 @{
			
@@ -54,6 +59,7 @@ static void fft(void *descr[], void *_args)
 
				     fftw_execute(plan, ...);
			
 
				 @}
			
 
				 @end smallexample
			
 
				+@end cartouche
			
 
				 
			
 
				 Another way to go which may be needed is to execute some code from the workers
			
 
				 themselves thanks to @code{starpu_execute_on_each_worker}. This may be required
			
@@ -61,6 +67,7 @@ by CUDA to behave properly due to threading issues. For instance, StarPU's
 
				 @code{starpu_helper_cublas_init} looks like the following to call
			
 
				 @code{cublasInit} from the workers themselves:
			
 
				 
			
 
				+@cartouche
			
 
				 @smallexample
			
 
				 static void init_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED)
			
 
				 @{
			
@@ -72,3 +79,4 @@ void starpu_helper_cublas_init(void)
 
				     starpu_execute_on_each_worker(init_cublas_func, NULL, STARPU_CUDA);
			
 
				 @}
			
 
				 @end smallexample
			
 
				+@end cartouche