func_sgemm_ibm.c 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009, 2010 Université de Bordeaux 1
  4. * Copyright (C) 2010 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include "func_gemm_ibm.h"
  18. #include <blas_s.h>
  19. void func_sgemm_ibm(__attribute__ ((unused)) void **alloc,
  20. __attribute__ ((unused)) void **in,
  21. __attribute__ ((unused)) void **inout,
  22. __attribute__ ((unused)) void **out)
  23. {
  24. /* we assume data will be in A:R,B:R,C:RW mode
  25. * -> in[0] : describe problem
  26. * -> in[1] : A
  27. * -> in[2] : B
  28. * -> inout[0] : C
  29. *
  30. * C = AB + C
  31. * but, being in fortran ordering, we compute
  32. * t(C) = t(B)t(A) + t(C) instead
  33. */
  34. struct ibm_gemm_block_conf *conf = in[0];
  35. float *A = in[1];
  36. float *B = in[2];
  37. float *C = inout[0];
  38. sgemm_spu(conf->m, conf->n, conf->k, B, A, C);
  39. }