lu_kernels_model.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011,2014 Inria
  4. * Copyright (C) 2008-2011,2014 Université de Bordeaux
  5. * Copyright (C) 2010-2015,2017 CNRS
  6. * Copyright (C) 2013 Thibaut Lambert
  7. * Copyright (C) 2011 Télécom-SudParis
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include "lu_kernels_model.h"
  21. /*
  22. * As a convention, in that file, buffers[0] is represented by A,
  23. * buffers[1] is B ...
  24. */
  25. /*
  26. * Number of flops of Gemm
  27. */
  28. /* #define USE_PERTURBATION 1 */
  29. #ifdef USE_PERTURBATION
  30. #define PERTURBATE(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a))
  31. #else
  32. #define PERTURBATE(a) (a)
  33. #endif
  34. /*
  35. *
  36. * Generic models
  37. *
  38. */
  39. double task_11_cost(struct starpu_task *task, unsigned nimpl)
  40. {
  41. (void)nimpl;
  42. uint32_t n;
  43. n = starpu_matrix_get_nx(task->handles[0]);
  44. double cost = ((n*n*n)/537.5);
  45. return PERTURBATE(cost);
  46. }
  47. double task_12_cost(struct starpu_task *task, unsigned nimpl)
  48. {
  49. (void)nimpl;
  50. uint32_t n;
  51. n = starpu_matrix_get_nx(task->handles[0]);
  52. /* double cost = ((n*n*n)/1744.695); */
  53. double cost = ((n*n*n)/3210.80);
  54. /* fprintf(stderr, "task 12 predicts %e\n", cost); */
  55. return PERTURBATE(cost);
  56. }
  57. double task_21_cost(struct starpu_task *task, unsigned nimpl)
  58. {
  59. (void)nimpl;
  60. uint32_t n;
  61. n = starpu_matrix_get_nx(task->handles[0]);
  62. /* double cost = ((n*n*n)/1744.695); */
  63. double cost = ((n*n*n)/3691.53);
  64. /* fprintf(stderr, "task 12 predicts %e\n", cost); */
  65. return PERTURBATE(cost);
  66. }
  67. double task_22_cost(struct starpu_task *task, unsigned nimpl)
  68. {
  69. (void)nimpl;
  70. uint32_t nx, ny, nz;
  71. nx = starpu_matrix_get_nx(task->handles[2]);
  72. ny = starpu_matrix_get_ny(task->handles[2]);
  73. nz = starpu_matrix_get_ny(task->handles[0]);
  74. double cost = ((nx*ny*nz)/4110.0);
  75. return PERTURBATE(cost);
  76. }
  77. /*
  78. *
  79. * Models for CUDA
  80. *
  81. */
  82. double task_11_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  83. {
  84. (void)arch;
  85. (void)nimpl;
  86. uint32_t n;
  87. n = starpu_matrix_get_nx(task->handles[0]);
  88. double cost = ((n*n*n)/1853.7806);
  89. /* printf("CUDA task 11 ; predict %e\n", cost); */
  90. return PERTURBATE(cost);
  91. }
  92. double task_12_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  93. {
  94. (void)arch;
  95. (void)nimpl;
  96. uint32_t n;
  97. n = starpu_matrix_get_nx(task->handles[0]);
  98. double cost = ((n*n*n)/42838.5718);
  99. /* printf("CUDA task 12 ; predict %e\n", cost); */
  100. return PERTURBATE(cost);
  101. }
  102. double task_21_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  103. {
  104. (void)arch;
  105. (void)nimpl;
  106. uint32_t n;
  107. n = starpu_matrix_get_nx(task->handles[0]);
  108. double cost = ((n*n*n)/49208.667);
  109. /* printf("CUDA task 21 ; predict %e\n", cost); */
  110. return PERTURBATE(cost);
  111. }
  112. double task_22_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  113. {
  114. (void)arch;
  115. (void)nimpl;
  116. uint32_t nx, ny, nz;
  117. nx = starpu_matrix_get_nx(task->handles[2]);
  118. ny = starpu_matrix_get_ny(task->handles[2]);
  119. nz = starpu_matrix_get_ny(task->handles[0]);
  120. double cost = ((nx*ny*nz)/57523.560);
  121. /* printf("CUDA task 22 ; predict %e\n", cost); */
  122. return PERTURBATE(cost);
  123. }
  124. /*
  125. *
  126. * Models for CPUs
  127. *
  128. */
  129. double task_11_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  130. {
  131. (void)arch;
  132. (void)nimpl;
  133. uint32_t n;
  134. n = starpu_matrix_get_nx(task->handles[0]);
  135. double cost = ((n*n*n)/537.5);
  136. /* printf("CPU task 11 ; predict %e\n", cost); */
  137. return PERTURBATE(cost);
  138. }
  139. double task_12_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  140. {
  141. (void)arch;
  142. (void)nimpl;
  143. uint32_t n;
  144. n = starpu_matrix_get_nx(task->handles[0]);
  145. double cost = ((n*n*n)/6668.224);
  146. /* printf("CPU task 12 ; predict %e\n", cost); */
  147. return PERTURBATE(cost);
  148. }
  149. double task_21_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  150. {
  151. (void)arch;
  152. (void)nimpl;
  153. uint32_t n;
  154. n = starpu_matrix_get_nx(task->handles[0]);
  155. double cost = ((n*n*n)/6793.8423);
  156. /* printf("CPU task 21 ; predict %e\n", cost); */
  157. return PERTURBATE(cost);
  158. }
  159. double task_22_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  160. {
  161. (void)arch;
  162. (void)nimpl;
  163. uint32_t nx, ny, nz;
  164. nx = starpu_matrix_get_nx(task->handles[2]);
  165. ny = starpu_matrix_get_ny(task->handles[2]);
  166. nz = starpu_matrix_get_ny(task->handles[0]);
  167. double cost = ((nx*ny*nz)/4203.0175);
  168. /* printf("CPU task 22 ; predict %e\n", cost); */
  169. return PERTURBATE(cost);
  170. }
  171. void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol,
  172. double (*cost_function)(struct starpu_task *, unsigned),
  173. double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned),
  174. double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned))
  175. {
  176. (void)cost_function;
  177. model->symbol = symbol;
  178. model->type = STARPU_HISTORY_BASED;
  179. starpu_perfmodel_init(model);
  180. starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1);
  181. if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0)
  182. {
  183. starpu_perfmodel_set_per_devices_cost_function(model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1);
  184. }
  185. }