Browse Source

Added an SSE codelet to the vector scaling example.

Cyril Roelandt 14 years ago
parent
commit
2787e1fa46
2 changed files with 20 additions and 1 deletions
  1. 3 1
      examples/basic_examples/vector_scal.c
  2. 17 0
      examples/basic_examples/vector_scal_cpu.c

+ 3 - 1
examples/basic_examples/vector_scal.c

@@ -31,6 +31,7 @@
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
 extern void scal_cpu_func(void *buffers[], void *_args);
+extern void scal_sse_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 
@@ -47,7 +48,8 @@ static struct starpu_perfmodel_t vector_scal_power_model = {
 static starpu_codelet cl = {
 	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
 	/* CPU implementation of the codelet */
-	.cpu_func = scal_cpu_func,
+	.cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS,
+	.cpu_funcs = { scal_cpu_func, scal_sse_func},
 #ifdef STARPU_USE_CUDA
 	/* CUDA implementation of the codelet */
 	.cuda_func = scal_cuda_func,

+ 17 - 0
examples/basic_examples/vector_scal_cpu.c

@@ -19,6 +19,7 @@
  */
 
 #include <starpu.h>
+#include <xmmintrin.h>
 
 /* This kernel takes a buffer and scales it by a constant factor */
 void scal_cpu_func(void *buffers[], void *cl_arg)
@@ -52,3 +53,19 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 		val[i] *= *factor;
 }
 
+void scal_sse_func(void *buffers[], void *cl_arg)
+{
+	float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
+	unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
+	unsigned int n_iterations = n/4;
+	if (n % 4 != 0)
+		n_iterations++;
+
+	__m128 *VECTOR = (__m128*) vector;
+	__m128 factor __attribute__((aligned(16)));
+	factor = _mm_set1_ps(*(float *) cl_arg);
+
+	unsigned int i;	
+	for (i = 0; i < n_iterations; i++)
+		VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]);
+}