func_sgemm_ibm.c 643 B

123456789101112131415161718192021222324252627
  1. #include "func_sgemm_ibm.h"
  2. #include <blas_s.h>
  3. void func_sgemm_ibm(__attribute__ ((unused)) void **alloc,
  4. __attribute__ ((unused)) void **in,
  5. __attribute__ ((unused)) void **inout,
  6. __attribute__ ((unused)) void **out)
  7. {
  8. /* we assume data will be in A:R,B:R,C:RW mode
  9. * -> in[0] : describe problem
  10. * -> in[1] : A
  11. * -> in[2] : B
  12. * -> inout[0] : C
  13. *
  14. * C = AB + C
  15. * but, being in fortran ordering, we compute
  16. * t(C) = t(B)t(A) + t(C) instead
  17. */
  18. struct ibm_sgemm_block_conf *conf = in[0];
  19. float *A = in[1];
  20. float *B = in[2];
  21. float *C = inout[0];
  22. sgemm_spu(conf->m, conf->n, conf->k, B, A, C);
  23. }