lu_kernels_model.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2011 Télécom-SudParis
  5. * Copyright (C) 2013 Thibaut Lambert
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include "lu_kernels_model.h"
  19. /*
  20. * As a convention, in that file, buffers[0] is represented by A,
  21. * buffers[1] is B ...
  22. */
  23. /*
  24. * Number of flops of Gemm
  25. */
  26. /* #define USE_PERTURBATION 1 */
  27. #ifdef USE_PERTURBATION
  28. #define PERTURBATE(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a))
  29. #else
  30. #define PERTURBATE(a) (a)
  31. #endif
  32. /*
  33. *
  34. * Generic models
  35. *
  36. */
  37. double task_11_cost(struct starpu_task *task, unsigned nimpl)
  38. {
  39. (void)nimpl;
  40. uint32_t n;
  41. n = starpu_matrix_get_nx(task->handles[0]);
  42. double cost = ((n*n*n)/537.5);
  43. return PERTURBATE(cost);
  44. }
  45. double task_12_cost(struct starpu_task *task, unsigned nimpl)
  46. {
  47. (void)nimpl;
  48. uint32_t n;
  49. n = starpu_matrix_get_nx(task->handles[0]);
  50. /* double cost = ((n*n*n)/1744.695); */
  51. double cost = ((n*n*n)/3210.80);
  52. /* fprintf(stderr, "task 12 predicts %e\n", cost); */
  53. return PERTURBATE(cost);
  54. }
  55. double task_21_cost(struct starpu_task *task, unsigned nimpl)
  56. {
  57. (void)nimpl;
  58. uint32_t n;
  59. n = starpu_matrix_get_nx(task->handles[0]);
  60. /* double cost = ((n*n*n)/1744.695); */
  61. double cost = ((n*n*n)/3691.53);
  62. /* fprintf(stderr, "task 12 predicts %e\n", cost); */
  63. return PERTURBATE(cost);
  64. }
  65. double task_22_cost(struct starpu_task *task, unsigned nimpl)
  66. {
  67. (void)nimpl;
  68. uint32_t nx, ny, nz;
  69. nx = starpu_matrix_get_nx(task->handles[2]);
  70. ny = starpu_matrix_get_ny(task->handles[2]);
  71. nz = starpu_matrix_get_ny(task->handles[0]);
  72. double cost = ((nx*ny*nz)/4110.0);
  73. return PERTURBATE(cost);
  74. }
  75. /*
  76. *
  77. * Models for CUDA
  78. *
  79. */
  80. double task_11_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  81. {
  82. (void)arch;
  83. (void)nimpl;
  84. uint32_t n;
  85. n = starpu_matrix_get_nx(task->handles[0]);
  86. double cost = ((n*n*n)/1853.7806);
  87. /* printf("CUDA task 11 ; predict %e\n", cost); */
  88. return PERTURBATE(cost);
  89. }
  90. double task_12_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  91. {
  92. (void)arch;
  93. (void)nimpl;
  94. uint32_t n;
  95. n = starpu_matrix_get_nx(task->handles[0]);
  96. double cost = ((n*n*n)/42838.5718);
  97. /* printf("CUDA task 12 ; predict %e\n", cost); */
  98. return PERTURBATE(cost);
  99. }
  100. double task_21_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  101. {
  102. (void)arch;
  103. (void)nimpl;
  104. uint32_t n;
  105. n = starpu_matrix_get_nx(task->handles[0]);
  106. double cost = ((n*n*n)/49208.667);
  107. /* printf("CUDA task 21 ; predict %e\n", cost); */
  108. return PERTURBATE(cost);
  109. }
  110. double task_22_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  111. {
  112. (void)arch;
  113. (void)nimpl;
  114. uint32_t nx, ny, nz;
  115. nx = starpu_matrix_get_nx(task->handles[2]);
  116. ny = starpu_matrix_get_ny(task->handles[2]);
  117. nz = starpu_matrix_get_ny(task->handles[0]);
  118. double cost = ((nx*ny*nz)/57523.560);
  119. /* printf("CUDA task 22 ; predict %e\n", cost); */
  120. return PERTURBATE(cost);
  121. }
  122. /*
  123. *
  124. * Models for CPUs
  125. *
  126. */
  127. double task_11_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  128. {
  129. (void)arch;
  130. (void)nimpl;
  131. uint32_t n;
  132. n = starpu_matrix_get_nx(task->handles[0]);
  133. double cost = ((n*n*n)/537.5);
  134. /* printf("CPU task 11 ; predict %e\n", cost); */
  135. return PERTURBATE(cost);
  136. }
  137. double task_12_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  138. {
  139. (void)arch;
  140. (void)nimpl;
  141. uint32_t n;
  142. n = starpu_matrix_get_nx(task->handles[0]);
  143. double cost = ((n*n*n)/6668.224);
  144. /* printf("CPU task 12 ; predict %e\n", cost); */
  145. return PERTURBATE(cost);
  146. }
  147. double task_21_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  148. {
  149. (void)arch;
  150. (void)nimpl;
  151. uint32_t n;
  152. n = starpu_matrix_get_nx(task->handles[0]);
  153. double cost = ((n*n*n)/6793.8423);
  154. /* printf("CPU task 21 ; predict %e\n", cost); */
  155. return PERTURBATE(cost);
  156. }
  157. double task_22_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  158. {
  159. (void)arch;
  160. (void)nimpl;
  161. uint32_t nx, ny, nz;
  162. nx = starpu_matrix_get_nx(task->handles[2]);
  163. ny = starpu_matrix_get_ny(task->handles[2]);
  164. nz = starpu_matrix_get_ny(task->handles[0]);
  165. double cost = ((nx*ny*nz)/4203.0175);
  166. /* printf("CPU task 22 ; predict %e\n", cost); */
  167. return PERTURBATE(cost);
  168. }
  169. void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol,
  170. double (*cost_function)(struct starpu_task *, unsigned),
  171. double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned),
  172. double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned))
  173. {
  174. (void)cost_function;
  175. model->symbol = symbol;
  176. model->type = STARPU_HISTORY_BASED;
  177. starpu_perfmodel_init(model);
  178. starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1);
  179. if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0)
  180. {
  181. starpu_perfmodel_set_per_devices_cost_function(model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1);
  182. }
  183. }