Browse Source

rewrite matvecmult kernel

Samuel Thibault 11 years ago
parent
commit
989a9c16de
1 changed files with 19 additions and 37 deletions
  1. 19 37
      examples/matvecmult/matvecmult_kernel.cl

+ 19 - 37
examples/matvecmult/matvecmult_kernel.cl

@@ -1,49 +1,31 @@
 /*
- * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
+ * StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * NVIDIA Corporation and its licensors retain all intellectual property and
- * proprietary rights in and to this software and related documentation.
- * Any use, reproduction, disclosure, or distribution of this software
- * and related documentation without an express license agreement from
- * NVIDIA Corporation is strictly prohibited.
+ * Copyright (C) 2014  Université de Bordeaux 1
  *
- * Please refer to the applicable NVIDIA end user license agreement (EULA)
- * associated with this source code for terms and conditions that govern
- * your use of this NVIDIA software.
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
  *
- */
-
-/* Matrix-vector multiplication: W = M * V.
- * Device code.
- *
- * This sample implements matrix-vector multiplication.
- * It has been written for clarity of exposition to illustrate various OpenCL
- * programming principles and optimizatoins, not with the goal of providing
- * the most performant generic kernel for matrix-vector multiplication.
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *
- * CUBLAS provides high-performance matrix-vector multiplication on GPU.
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-__kernel void matVecMult(
-                         __global float* M,
-                         __global float* V,
-                         int width, int height,
-                         __global float* W
-                         )
+__kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y)
 {
-        // Row index
-        uint y = get_global_id(0);
-        if (y < height)
+	const int i = get_global_id(0);
+	if (i < m)
 	{
-                // Row pointer
-                const __global float* row = M + y * width;
+		float val = 0;
+		int j;
 
-                // Compute dot product
-                float dotProduct = 0;
-                for (int x = 0; x < width; ++x)
-                        dotProduct += row[x] * V[x];
+		for (j = 0; j < n; j++)
+		       val += A[i*n+j] * X[j];
 
-                // Write result to global memory
-                W[y] = dotProduct;
-        }
+		Y[i] = val;
+	}
 }