| 
					
				 | 
			
			
				@@ -60,64 +60,143 @@ The header starpu.h should be included in any code using StarPU. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \subsection DefiningACodelet Defining A Codelet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+A codelet is a structure that represents a computational kernel. Such a codelet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+may contain an implementation of the same kernel on different architectures 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+(e.g. CUDA, x86, ...). For compatibility, make sure that the whole 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+structure is properly initialized to zero, either by using the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function starpu_codelet_init(), or by letting the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+compiler implicitly do it as examplified above. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+The field starpu_codelet::nbuffers specifies the number of data buffers that are 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+manipulated by the codelet: here the codelet does not access or modify any data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+that is controlled by our data management library. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+We create a codelet which may only be executed on the CPUs. When a CPU 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+core will execute a codelet, it will call the function 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<c>cpu_func</c>, which \em must have the following prototype: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+void (*cpu_func)(void *buffers[], void *cl_arg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+In this example, we can ignore the first argument of this function which gives a 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+description of the input and output buffers (e.g. the size and the location of 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+the matrices) since there is none. We also ignore the second argument 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+which is a pointer to optional arguments for the codelet. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-struct params 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    int i; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    float f; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 void cpu_func(void *buffers[], void *cl_arg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    struct params *params = cl_arg; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    printf("Hello world (params = {%i, %f} )\n", params->i, params->f); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    printf("Hello world\n"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 struct starpu_codelet cl = 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    .where = STARPU_CPU, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     .cpu_funcs = { cpu_func, NULL }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    .cpu_funcs_name = { "cpu_func", NULL }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     .nbuffers = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-A codelet is a structure that represents a computational kernel. Such a codelet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-may contain an implementation of the same kernel on different architectures 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-(e.g. CUDA, x86, ...). For compatibility, make sure that the whole 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-structure is properly initialized to zero, either by using the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-function starpu_codelet_init(), or by letting the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-compiler implicitly do it as examplified above. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\subsection SubmittingATask Submitting A Task 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-The field starpu_codelet::nbuffers specifies the number of data buffers that are 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-manipulated by the codelet: here the codelet does not access or modify any data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-that is controlled by our data management library. Note that the argument 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-passed to the codelet (the parameter <c>cl_arg</c> of the function 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-<c>cpu_func</c>) does not count as a buffer since it is not managed by 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-our data management library, but just contain trivial parameters. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+Before submitting any tasks to StarPU, starpu_init() must be called. The 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<c>NULL</c> argument specifies that we use the default configuration. Tasks cannot 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+be submitted after the termination of StarPU by a call to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_shutdown(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+In the example above, a task structure is allocated by a call to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_task_create(). This function only allocates and fills the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+corresponding structure with the default settings, but it does not 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+submit the task to StarPU. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \internal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-TODO need a crossref to the proper description of "where" see bla for more ... 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+not really clear ;) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endinternal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-We create a codelet which may only be executed on the CPUs. The field 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_codelet::where is a bitmask that defines where the codelet may 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-be executed. Here, the value ::STARPU_CPU means that only CPUs can 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-execute this codelet. Note that field starpu_codelet::where is 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-optional, when unset its value is automatically set based on the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-availability of the different fields <c>XXX_funcs</c>. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-When a CPU core executes a codelet, it calls the function 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-<c>cpu_func</c>, which \em must have the following prototype: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+The field starpu_task::cl is a pointer to the codelet which the task will 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+execute: in other words, the codelet structure describes which computational 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+kernel should be offloaded on the different architectures, and the task 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+structure is a wrapper containing a codelet and the piece of data on which the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+codelet should operate. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+If the field starpu_task::synchronous is non-zero, task submission 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+will be synchronous: the function starpu_task_submit() will not return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+until the task has been executed. Note that the function starpu_shutdown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+does not guarantee that asynchronous tasks have been executed before 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+it returns, starpu_task_wait_for_all() can be used to that effect, or 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+data can be unregistered (starpu_data_unregister()), which will 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+implicitly wait for all the tasks scheduled to work on it, unless 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+explicitly disabled thanks to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_data_set_default_sequential_consistency_flag() or 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_data_set_sequential_consistency_flag(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-void (*cpu_func)(void *buffers[], void *cl_arg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+int main(int argc, char **argv) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* initialize StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_init(NULL); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    struct starpu_task *task = starpu_task_create(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->cl = &cl; /* Pointer to the codelet defined above */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* starpu_task_submit will be a blocking call. If unset, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_task_wait() needs to be called after submitting the task. */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->synchronous = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* submit the task to StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_task_submit(task); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* terminate StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_shutdown(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-In this example, we can ignore the first argument of this function which gives a 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-description of the input and output buffers (e.g. the size and the location of 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-the matrices) since there is none. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-The second argument is a pointer to a buffer passed as an 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-argument to the codelet by the means of the field starpu_task::cl_arg. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\subsection ExecutionOfHelloWorld Execution Of Hello World 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\verbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+$ make hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+$ ./hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+Hello world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endverbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\subsection PassingArgumentsToTheCodelet Passing Arguments To The Codelet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+The optional field starpu_task::cl_arg field is a pointer to a buffer 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+(of size starpu_task::cl_arg_size) with some parameters for the kernel 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+described by the codelet. For instance, if a codelet implements a 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+computational kernel that multiplies its input vector by a constant, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+the constant could be specified by the means of this buffer, instead 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+of registering it as a StarPU data. It must however be noted that 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+StarPU avoids making copy whenever possible and rather passes the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+pointer as such, so the buffer which is pointed at must be kept allocated 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+until the task terminates, and if several tasks are submitted with 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+various parameters, each of them must be given a pointer to their 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+own buffer. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct params 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    int i; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    float f; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+void cpu_func(void *buffers[], void *cl_arg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    struct params *params = cl_arg; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    printf("Hello world (params = {%i, %f} )\n", params->i, params->f); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+As said before, the field starpu_codelet::nbuffers specifies the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+number of data buffers that are manipulated by the codelet. It does 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+not count the argument --- the parameter <c>cl_arg</c> of the function 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<c>cpu_func</c> --- since it is not managed by our data management 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+library, but just contains trivial parameters. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \internal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 TODO rewrite so that it is a little clearer ? 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -130,14 +209,7 @@ buffer will be modified as well: this for instance implies that the buffer 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 cannot be used as a synchronization medium. If synchronization is needed, data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 has to be registered to StarPU, see \ref VectorScalingUsingStarPUAPI. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-\subsection SubmittingATask Submitting A Task 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-void callback_func(void *callback_arg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    printf("Callback function (arg %x)\n", callback_arg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 int main(int argc, char **argv) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     /* initialize StarPU */ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -151,9 +223,6 @@ int main(int argc, char **argv) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     task->cl_arg = ¶ms; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     task->cl_arg_size = sizeof(params); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    task->callback_func = callback_func; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    task->callback_arg = 0x42; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     /* starpu_task_submit will be a blocking call */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     task->synchronous = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -167,37 +236,14 @@ int main(int argc, char **argv) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-Before submitting any tasks to StarPU, starpu_init() must be called. The 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-<c>NULL</c> argument specifies that we use the default configuration. Tasks cannot 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-be submitted after the termination of StarPU by a call to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_shutdown(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-In the example above, a task structure is allocated by a call to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_task_create(). This function only allocates and fills the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-corresponding structure with the default settings, but it does not 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-submit the task to StarPU. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-\internal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-not really clear ;) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-\endinternal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\verbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+$ make hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+$ ./hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+Hello world (params = {1, 2.000000} ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endverbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-The field starpu_task::cl is a pointer to the codelet which the task will 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-execute: in other words, the codelet structure describes which computational 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-kernel should be offloaded on the different architectures, and the task 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-structure is a wrapper containing a codelet and the piece of data on which the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-codelet should operate. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-The optional field starpu_task::cl_arg field is a pointer to a buffer 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-(of size starpu_task::cl_arg_size) with some parameters for the kernel 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-described by the codelet. For instance, if a codelet implements a 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-computational kernel that multiplies its input vector by a constant, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-the constant could be specified by the means of this buffer, instead 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-of registering it as a StarPU data. It must however be noted that 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-StarPU avoids making copy whenever possible and rather passes the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-pointer as such, so the buffer which is pointed at must be kept allocated 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-until the task terminates, and if several tasks are submitted with 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-various parameters, each of them must be given a pointer to their 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-own buffer. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\subsection DefiningACallback Defining A Callback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Once a task has been executed, an optional callback function 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 starpu_task::callback_func is called when defined. 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -210,27 +256,66 @@ function. The prototype of a callback function must be: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 void (*callback_function)(void *); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-If the field starpu_task::synchronous is non-zero, task submission 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-will be synchronous: the function starpu_task_submit() will not return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-until the task has been executed. Note that the function starpu_shutdown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-does not guarantee that asynchronous tasks have been executed before 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-it returns, starpu_task_wait_for_all() can be used to that effect, or 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-data can be unregistered (starpu_data_unregister()), which will 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-implicitly wait for all the tasks scheduled to work on it, unless 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-explicitly disabled thanks to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_data_set_default_sequential_consistency_flag() or 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_data_set_sequential_consistency_flag(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+void callback_func(void *callback_arg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    printf("Callback function (arg %x)\n", callback_arg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-\subsection ExecutionOfHelloWorld Execution Of Hello World 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+int main(int argc, char **argv) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* initialize StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_init(NULL); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    struct starpu_task *task = starpu_task_create(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->cl = &cl; /* Pointer to the codelet defined above */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->callback_func = callback_func; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->callback_arg = 0x42; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* starpu_task_submit will be a blocking call */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    task->synchronous = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* submit the task to StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_task_submit(task); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    /* terminate StarPU */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    starpu_shutdown(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \verbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 $ make hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 $ ./hello_world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-Hello world (params = {1, 2.000000} ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+Hello world 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Callback function (arg 42) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endverbatim 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\subsection WhereToExecuteACodelet Where To Execute A Codelet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\code{.c} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct starpu_codelet cl = 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    .where = STARPU_CPU, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    .cpu_funcs = { cpu_func, NULL }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    .cpu_funcs_name = { "cpu_func", NULL }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     .nbuffers = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+\endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+We create a codelet which may only be executed on the CPUs. The 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+optional field starpu_codelet::where is a bitmask that defines where 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+the codelet may be executed. Here, the value ::STARPU_CPU means that 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+only CPUs can execute this codelet. When the optional field 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_codelet::where is unset, its value is automatically set based 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+on the availability of the different fields <c>XXX_funcs</c>. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+TODO: explain starpu_codelet::cpu_funcs_name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \section VectorScalingUsingTheCExtension Vector Scaling Using the C Extension 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 The previous example has shown how to submit tasks. In this section, 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -444,14 +529,14 @@ The following lines show how to declare an array of <c>NX</c> elements of type 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 float vector[NX]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 starpu_data_handle_t vector_handle; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             sizeof(vector[0])); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 \endcode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 The first argument, called the <b>data handle</b>, is an opaque pointer which 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 designates the array in StarPU. This is also the structure which is used to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 describe which data is used by a task. The second argument is the node number 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-where the data originally resides. Here it is 0 since the array <c>vector</c> is in 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+where the data originally resides. Here it is STARPU_MAIN_RAM since the array <c>vector</c> is in 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 the main memory. Then comes the pointer <c>vector</c> where the data can be found in main memory, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 the number of elements in the vector and the size of each element. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 The following shows how to construct a StarPU task that will manipulate the 
			 |