cpu_mult.c 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2018-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2018 Alexis Juven
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <stdint.h>
  18. #include <starpu.h>
  19. /*
  20. * The codelet is passed 3 matrices, the "descr" union-type field gives a
  21. * description of the layout of those 3 matrices in the local memory (ie. RAM
  22. * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have
  23. * registered data with the "matrix" data interface, we use the matrix macros.
  24. */
  25. void cpu_mult(void *descr[], void *arg)
  26. {
  27. (void)arg;
  28. float *subA, *subB, *subC;
  29. uint32_t nxC, nyC, nyA;
  30. uint32_t ldA, ldB, ldC;
  31. /* .blas.ptr gives a pointer to the first element of the local copy */
  32. subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
  33. subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]);
  34. subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]);
  35. /* .blas.nx is the number of rows (consecutive elements) and .blas.ny
  36. * is the number of lines that are separated by .blas.ld elements (ld
  37. * stands for leading dimension).
  38. * NB: in case some filters were used, the leading dimension is not
  39. * guaranteed to be the same in main memory (on the original matrix)
  40. * and on the accelerator! */
  41. nxC = STARPU_MATRIX_GET_NX(descr[2]);
  42. nyC = STARPU_MATRIX_GET_NY(descr[2]);
  43. nyA = STARPU_MATRIX_GET_NY(descr[0]);
  44. ldA = STARPU_MATRIX_GET_LD(descr[0]);
  45. ldB = STARPU_MATRIX_GET_LD(descr[1]);
  46. ldC = STARPU_MATRIX_GET_LD(descr[2]);
  47. /* we assume a FORTRAN-ordering! */
  48. unsigned i,j,k;
  49. for (i = 0; i < nyC; i++)
  50. {
  51. for (j = 0; j < nxC; j++)
  52. {
  53. float sum = 0.0;
  54. for (k = 0; k < nyA; k++)
  55. {
  56. sum += subA[j+k*ldA]*subB[k+i*ldB];
  57. }
  58. subC[j + i*ldC] = sum;
  59. }
  60. }
  61. }