13 年之前 · f1cb2d818f
--- a/doc/chapters/advanced-api.texi
+++ b/doc/chapters/advanced-api.texi
@@ -85,27 +85,27 @@ starpu_data_handle handle;
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 void cpu_to_opencl_opencl_func(void *buffers[], void *args);
			
 
				 starpu_codelet cpu_to_opencl_cl = @{
			
 
				-	.where = STARPU_OPENCL,
			
 
				-	.opencl_func = cpu_to_opencl_opencl_func,
			
 
				-	.nbuffers = 1
			
 
				+    .where = STARPU_OPENCL,
			
 
				+    .opencl_func = cpu_to_opencl_opencl_func,
			
 
				+    .nbuffers = 1
			
 
				 @};
			
 
				 
			
 
				 void opencl_to_cpu_func(void *buffers[], void *args);
			
 
				 starpu_codelet opencl_to_cpu_cl = @{
			
 
				-	.where = STARPU_CPU,
			
 
				-	.cpu_func = opencl_to_cpu_func,
			
 
				-	.nbuffers = 1
			
 
				+    .where = STARPU_CPU,
			
 
				+    .cpu_func = opencl_to_cpu_func,
			
 
				+    .nbuffers = 1
			
 
				 @};
			
 
				 #endif
			
 
				 
			
 
				 struct starpu_multiformat_data_interface_ops format_ops = @{
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-	.opencl_elemsize = 2 * sizeof(float),
			
 
				-	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				-	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+    .opencl_elemsize = 2 * sizeof(float),
			
 
				+    .cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+    .opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				 #endif
			
 
				-	.cpu_elemsize = 2 * sizeof(float),
			
 
				-	...
			
 
				+    .cpu_elemsize = 2 * sizeof(float),
			
 
				+    ...
			
 
				 @};
			
 
				 starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
			
 
				 @end example
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -32,19 +32,19 @@ to use SSE to scale a vector. The codelet can be written as follows :
 
				 
			
 
				 void scal_sse_func(void *buffers[], void *cl_arg)
			
 
				 @{
			
 
				-	float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				-	unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
			
 
				-	unsigned int n_iterations = n/4;
			
 
				-	if (n % 4 != 0)
			
 
				-		n_iterations++;
			
 
				-
			
 
				-	__m128 *VECTOR = (__m128*) vector;
			
 
				-	__m128 factor __attribute__((aligned(16)));
			
 
				-	factor = _mm_set1_ps(*(float *) cl_arg);
			
 
				-
			
 
				-	unsigned int i;	
			
 
				-	for (i = 0; i < n_iterations; i++)
			
 
				-		VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]);
			
 
				+    float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				+    unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
			
 
				+    unsigned int n_iterations = n/4;
			
 
				+    if (n % 4 != 0)
			
 
				+        n_iterations++;
			
 
				+
			
 
				+    __m128 *VECTOR = (__m128*) vector;
			
 
				+    __m128 factor __attribute__((aligned(16)));
			
 
				+    factor = _mm_set1_ps(*(float *) cl_arg);
			
 
				+
			
 
				+    unsigned int i;    
			
 
				+    for (i = 0; i < n_iterations; i++)
			
 
				+        VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]);
			
 
				 @}
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -57,10 +57,10 @@ to the special value @code{STARPU_MULTIPLE_CPU_IMPLEMENTATIONS}. Note that
 
				 @cartouche
			
 
				 @smallexample
			
 
				 starpu_codelet cl = @{
			
 
				-	.where = STARPU_CPU,
			
 
				-	.cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS,
			
 
				-	.cpu_funcs = @{ scal_cpu_func, scal_sse_func @},
			
 
				-	.nbuffers = 1
			
 
				+    .where = STARPU_CPU,
			
 
				+    .cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS,
			
 
				+    .cpu_funcs = @{ scal_cpu_func, scal_sse_func @},
			
 
				+    .nbuffers = 1
			
 
				 @};
			
 
				 @end smallexample
			
 
				 @end cartouche
			
@@ -394,9 +394,9 @@ task->buffers[1].mode = STARPU_RW;
 
				 char *arg_buffer;
			
 
				 size_t arg_buffer_size;
			
 
				 starpu_pack_cl_args(&arg_buffer, &arg_buffer_size,
			
 
				-		    STARPU_VALUE, &ifactor, sizeof(ifactor),
			
 
				-		    STARPU_VALUE, &ffactor, sizeof(ffactor),
			
 
				-		    0);
			
 
				+                    STARPU_VALUE, &ifactor, sizeof(ifactor),
			
 
				+                    STARPU_VALUE, &ffactor, sizeof(ffactor),
			
 
				+                    0);
			
 
				 task->cl_arg = arg_buffer;
			
 
				 task->cl_arg_size = arg_buffer_size;
			
 
				 int ret = starpu_task_submit(task);
			
--- a/doc/chapters/basic-examples.texi
+++ b/doc/chapters/basic-examples.texi
@@ -591,9 +591,9 @@ be compiled at run-time when calling the function
 
				 
			
 
				 @cartouche
			
 
				 @smallexample
			
 
				-CFLAGS	+=	$(shell pkg-config --cflags libstarpu)
			
 
				-LDFLAGS	+=	$(shell pkg-config --libs libstarpu)
			
 
				-CC	=	gcc
			
 
				+CFLAGS  += $(shell pkg-config --cflags libstarpu)
			
 
				+LDFLAGS += $(shell pkg-config --libs libstarpu)
			
 
				+CC       = gcc
			
 
				 
			
 
				 vector_scal: vector_scal.o vector_scal_cpu.o vector_scal_cuda.o vector_scal_opencl.o
			
 
				 
			
--- a/doc/chapters/vector_scal_cpu.texi
+++ b/doc/chapters/vector_scal_cpu.texi
@@ -42,18 +42,18 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 
				 
			
 
				 void scal_sse_func(void *buffers[], void *cl_arg)
			
 
				 @{
			
 
				-	float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				-	unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
			
 
				-	unsigned int n_iterations = n/4;
			
 
				-	if (n % 4 != 0)
			
 
				-		n_iterations++;
			
 
				-
			
 
				-	__m128 *VECTOR = (__m128*) vector;
			
 
				-	__m128 factor __attribute__((aligned(16)));
			
 
				-	factor = _mm_set1_ps(*(float *) cl_arg);
			
 
				-
			
 
				-	unsigned int i;
			
 
				-	for (i = 0; i < n_iterations; i++)
			
 
				-		VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]);
			
 
				+    float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				+    unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
			
 
				+    unsigned int n_iterations = n/4;
			
 
				+    if (n % 4 != 0)
			
 
				+        n_iterations++;
			
 
				+
			
 
				+    __m128 *VECTOR = (__m128*) vector;
			
 
				+    __m128 factor __attribute__((aligned(16)));
			
 
				+    factor = _mm_set1_ps(*(float *) cl_arg);
			
 
				+
			
 
				+    unsigned int i;
			
 
				+    for (i = 0; i < n_iterations; i++)
			
 
				+        VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]);
			
 
				 @}
			
 
				 @end smallexample