dw_mult.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef __MULT_H__
  17. #define __MULT_H__
  18. #include <string.h>
  19. #include <math.h>
  20. #include <sys/types.h>
  21. #include <sys/time.h>
  22. #include <pthread.h>
  23. #include <signal.h>
  24. #include <common/blas.h>
  25. #include <common/blas_model.h>
  26. #include <starpu.h>
  27. #ifdef USE_CUDA
  28. #include <cuda.h>
  29. #include <cublas.h>
  30. #endif
  31. #ifdef USE_GORDON
  32. #include <cell/gordon/spu/functions.h>
  33. #endif
  34. #define MAXSLICESX 64
  35. #define MAXSLICESY 64
  36. #define MAXSLICESZ 64
  37. #define BLAS3_FLOP(n1,n2,n3) \
  38. (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
  39. #define BLAS3_LS(n1,n2,n3) \
  40. ((2*(n1)*(n3) + (n1)*(n2) + (n2)*(n3))*sizeof(float))
  41. struct block_conf {
  42. uint32_t m;
  43. uint32_t n;
  44. uint32_t k;
  45. uint32_t pad;
  46. };
  47. #define NITER 100
  48. unsigned niter = NITER;
  49. unsigned nslicesx = 4;
  50. unsigned nslicesy = 4;
  51. unsigned nslicesz = 4;
  52. unsigned xdim = 256;
  53. unsigned ydim = 256;
  54. unsigned zdim = 64;
  55. unsigned norandom = 0;
  56. unsigned pin = 0;
  57. unsigned use_common_model = 0;
  58. /* to compute MFlop/s */
  59. uint64_t flop_cublas = 0;
  60. uint64_t flop_atlas = 0;
  61. /* to compute MB/s (load/store) */
  62. uint64_t ls_cublas = 0;
  63. uint64_t ls_atlas = 0;
  64. struct timeval start;
  65. struct timeval end;
  66. static int taskcounter __attribute__ ((unused));
  67. static struct block_conf conf __attribute__ ((aligned (128)));
  68. #define BLOCKSIZEX (xdim / nslicesx)
  69. #define BLOCKSIZEY (ydim / nslicesy)
  70. #define BLOCKSIZEZ (zdim / nslicesz)
  71. static void parse_args(int argc, char **argv)
  72. {
  73. int i;
  74. for (i = 1; i < argc; i++) {
  75. if (strcmp(argv[i], "-nblocks") == 0) {
  76. char *argptr;
  77. nslicesx = strtol(argv[++i], &argptr, 10);
  78. nslicesy = nslicesx;
  79. nslicesz = nslicesx;
  80. }
  81. if (strcmp(argv[i], "-nblocksx") == 0) {
  82. char *argptr;
  83. nslicesx = strtol(argv[++i], &argptr, 10);
  84. }
  85. if (strcmp(argv[i], "-nblocksy") == 0) {
  86. char *argptr;
  87. nslicesy = strtol(argv[++i], &argptr, 10);
  88. }
  89. if (strcmp(argv[i], "-nblocksz") == 0) {
  90. char *argptr;
  91. nslicesz = strtol(argv[++i], &argptr, 10);
  92. }
  93. if (strcmp(argv[i], "-x") == 0) {
  94. char *argptr;
  95. xdim = strtol(argv[++i], &argptr, 10);
  96. }
  97. if (strcmp(argv[i], "-y") == 0) {
  98. char *argptr;
  99. ydim = strtol(argv[++i], &argptr, 10);
  100. }
  101. if (strcmp(argv[i], "-z") == 0) {
  102. char *argptr;
  103. zdim = strtol(argv[++i], &argptr, 10);
  104. }
  105. if (strcmp(argv[i], "-iter") == 0) {
  106. char *argptr;
  107. niter = strtol(argv[++i], &argptr, 10);
  108. }
  109. if (strcmp(argv[i], "-no-random") == 0) {
  110. norandom = 1;
  111. }
  112. if (strcmp(argv[i], "-pin") == 0) {
  113. pin = 1;
  114. }
  115. if (strcmp(argv[i], "-common-model") == 0) {
  116. use_common_model = 1;
  117. }
  118. }
  119. assert(nslicesx <= MAXSLICESX);
  120. assert(nslicesy <= MAXSLICESY);
  121. assert(nslicesz <= MAXSLICESZ);
  122. }
  123. static void display_memory_consumption(void)
  124. {
  125. fprintf(stderr, "Total memory : %ld MB\n",
  126. (MAXSLICESY*MAXSLICESZ*sizeof(float *)
  127. + MAXSLICESZ*MAXSLICESX*sizeof(float *)
  128. + MAXSLICESY*MAXSLICESX*sizeof(float *)
  129. + MAXSLICESY*MAXSLICESZ*sizeof(starpu_data_handle)
  130. + MAXSLICESZ*MAXSLICESX*sizeof(starpu_data_handle)
  131. + MAXSLICESY*MAXSLICESX*sizeof(starpu_data_handle)
  132. + ydim*zdim*sizeof(float)
  133. + zdim*xdim*sizeof(float)
  134. + ydim*xdim*sizeof(float))/(1024*1024) );
  135. }
  136. #endif // __MULT_H__