12 years ago · 989a9c16de
--- a/examples/matvecmult/matvecmult_kernel.cl
+++ b/examples/matvecmult/matvecmult_kernel.cl
@@ -1,49 +1,31 @@
 
				 /*
			
 
				- * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
			
 
				+ * StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * NVIDIA Corporation and its licensors retain all intellectual property and
			
 
				- * proprietary rights in and to this software and related documentation.
			
 
				- * Any use, reproduction, disclosure, or distribution of this software
			
 
				- * and related documentation without an express license agreement from
			
 
				- * NVIDIA Corporation is strictly prohibited.
			
 
				+ * Copyright (C) 2014  Université de Bordeaux 1
			
 
				  *
			
 
				- * Please refer to the applicable NVIDIA end user license agreement (EULA)
			
 
				- * associated with this source code for terms and conditions that govern
			
 
				- * your use of this NVIDIA software.
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				  *
			
 
				- */
			
 
				-
			
 
				-/* Matrix-vector multiplication: W = M * V.
			
 
				- * Device code.
			
 
				- *
			
 
				- * This sample implements matrix-vector multiplication.
			
 
				- * It has been written for clarity of exposition to illustrate various OpenCL
			
 
				- * programming principles and optimizatoins, not with the goal of providing
			
 
				- * the most performant generic kernel for matrix-vector multiplication.
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				  *
			
 
				- * CUBLAS provides high-performance matrix-vector multiplication on GPU.
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-__kernel void matVecMult(
			
 
				-                         __global float* M,
			
 
				-                         __global float* V,
			
 
				-                         int width, int height,
			
 
				-                         __global float* W
			
 
				-                         )
			
 
				+__kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y)
			
 
				 {
			
 
				-        // Row index
			
 
				-        uint y = get_global_id(0);
			
 
				-        if (y < height)
			
 
				+	const int i = get_global_id(0);
			
 
				+	if (i < m)
			
 
				 	{
			
 
				-                // Row pointer
			
 
				-                const __global float* row = M + y * width;
			
 
				+		float val = 0;
			
 
				+		int j;
			
 
				 
			
 
				-                // Compute dot product
			
 
				-                float dotProduct = 0;
			
 
				-                for (int x = 0; x < width; ++x)
			
 
				-                        dotProduct += row[x] * V[x];
			
 
				+		for (j = 0; j < n; j++)
			
 
				+		       val += A[i*n+j] * X[j];
			
 
				 
			
 
				-                // Write result to global memory
			
 
				-                W[y] = dotProduct;
			
 
				-        }
			
 
				+		Y[i] = val;
			
 
				+	}
			
 
				 }