|
@@ -1,49 +1,31 @@
|
|
|
/*
|
|
|
- * Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
|
|
|
+ * StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * NVIDIA Corporation and its licensors retain all intellectual property and
|
|
|
- * proprietary rights in and to this software and related documentation.
|
|
|
- * Any use, reproduction, disclosure, or distribution of this software
|
|
|
- * and related documentation without an express license agreement from
|
|
|
- * NVIDIA Corporation is strictly prohibited.
|
|
|
+ * Copyright (C) 2014 Université de Bordeaux 1
|
|
|
*
|
|
|
- * Please refer to the applicable NVIDIA end user license agreement (EULA)
|
|
|
- * associated with this source code for terms and conditions that govern
|
|
|
- * your use of this NVIDIA software.
|
|
|
+ * StarPU is free software; you can redistribute it and/or modify
|
|
|
+ * it under the terms of the GNU Lesser General Public License as published by
|
|
|
+ * the Free Software Foundation; either version 2.1 of the License, or (at
|
|
|
+ * your option) any later version.
|
|
|
*
|
|
|
- */
|
|
|
-
|
|
|
-/* Matrix-vector multiplication: W = M * V.
|
|
|
- * Device code.
|
|
|
- *
|
|
|
- * This sample implements matrix-vector multiplication.
|
|
|
- * It has been written for clarity of exposition to illustrate various OpenCL
|
|
|
- * programming principles and optimizatoins, not with the goal of providing
|
|
|
- * the most performant generic kernel for matrix-vector multiplication.
|
|
|
+ * StarPU is distributed in the hope that it will be useful, but
|
|
|
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
*
|
|
|
- * CUBLAS provides high-performance matrix-vector multiplication on GPU.
|
|
|
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
|
|
|
*/
|
|
|
|
|
|
-__kernel void matVecMult(
|
|
|
- __global float* M,
|
|
|
- __global float* V,
|
|
|
- int width, int height,
|
|
|
- __global float* W
|
|
|
- )
|
|
|
+__kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y)
|
|
|
{
|
|
|
- // Row index
|
|
|
- uint y = get_global_id(0);
|
|
|
- if (y < height)
|
|
|
+ const int i = get_global_id(0);
|
|
|
+ if (i < m)
|
|
|
{
|
|
|
- // Row pointer
|
|
|
- const __global float* row = M + y * width;
|
|
|
+ float val = 0;
|
|
|
+ int j;
|
|
|
|
|
|
- // Compute dot product
|
|
|
- float dotProduct = 0;
|
|
|
- for (int x = 0; x < width; ++x)
|
|
|
- dotProduct += row[x] * V[x];
|
|
|
+ for (j = 0; j < n; j++)
|
|
|
+ val += A[i*n+j] * X[j];
|
|
|
|
|
|
- // Write result to global memory
|
|
|
- W[y] = dotProduct;
|
|
|
- }
|
|
|
+ Y[i] = val;
|
|
|
+ }
|
|
|
}
|