func_sgemm_ibm.c 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "func_sgemm_ibm.h"
  17. #include <blas_s.h>
  18. void func_sgemm_ibm(__attribute__ ((unused)) void **alloc,
  19. __attribute__ ((unused)) void **in,
  20. __attribute__ ((unused)) void **inout,
  21. __attribute__ ((unused)) void **out)
  22. {
  23. /* we assume data will be in A:R,B:R,C:RW mode
  24. * -> in[0] : describe problem
  25. * -> in[1] : A
  26. * -> in[2] : B
  27. * -> inout[0] : C
  28. *
  29. * C = AB + C
  30. * but, being in fortran ordering, we compute
  31. * t(C) = t(B)t(A) + t(C) instead
  32. */
  33. struct ibm_sgemm_block_conf *conf = in[0];
  34. float *A = in[1];
  35. float *B = in[2];
  36. float *C = inout[0];
  37. sgemm_spu(conf->m, conf->n, conf->k, B, A, C);
  38. }