Browse Source

Actually achieve parallelism instead of performing the computation 1024 times.

Samuel Thibault 14 years ago
parent
commit
9aa661f1ba
2 changed files with 4 additions and 2 deletions
  1. 1 1
      examples/spmv/spmv_kernels.c
  2. 3 1
      examples/spmv/spmv_opencl.cl

+ 1 - 1
examples/spmv/spmv_kernels.c

@@ -62,7 +62,7 @@ void spmv_kernel_opencl(void *descr[], void *args)
         if (err) STARPU_OPENCL_REPORT_ERROR(err);
 
 	{
-                size_t global=1024;
+                size_t global=nrow;
 		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}

+ 3 - 1
examples/spmv/spmv_opencl.cl

@@ -21,7 +21,9 @@ __kernel void spmv(unsigned nnz, unsigned nrow,
                    __global float *vecout, unsigned nx_out)
 {
 	unsigned row;
-	for (row = 0; row < nrow; row++)
+	// for (row = 0; row < nrow; row++)
+	row = get_global_id(0);
+	if (row < nrow)
 	{
 		float tmp = 0.0f;
 		unsigned index;