perf_counters_02.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2019-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <assert.h>
  18. #include <inttypes.h>
  19. #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
  20. /* global counters */
  21. static int id_g_total_submitted;
  22. static int id_g_peak_submitted;
  23. static int id_g_peak_ready;
  24. /* per worker counters */
  25. static int id_w_total_executed;
  26. static int id_w_cumul_execution_time;
  27. /* per_codelet counters */
  28. static int id_c_total_submitted;
  29. static int id_c_peak_submitted;
  30. static int id_c_peak_ready;
  31. static int id_c_total_executed;
  32. static int id_c_cumul_execution_time;
  33. void g_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context)
  34. {
  35. (void) listener;
  36. (void) context;
  37. int64_t g_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_total_submitted);
  38. int64_t g_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_submitted);
  39. int64_t g_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_ready);
  40. printf("global: g_total_submitted = %"PRId64", g_peak_submitted = %"PRId64", g_peak_ready = %"PRId64"\n", g_total_submitted, g_peak_submitted, g_peak_ready);
  41. }
  42. void w_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context)
  43. {
  44. (void) listener;
  45. (void) context;
  46. int workerid = starpu_worker_get_id();
  47. int64_t w_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_w_total_executed);
  48. double w_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_w_cumul_execution_time);
  49. printf("worker[%d]: w_total_executed = %"PRId64", w_cumul_execution_time = %lf\n", workerid, w_total_executed, w_cumul_execution_time);
  50. }
  51. void c_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context)
  52. {
  53. (void) listener;
  54. struct starpu_codelet *cl = context;
  55. int64_t c_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_submitted);
  56. int64_t c_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_submitted);
  57. int64_t c_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_ready);
  58. int64_t c_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_executed);
  59. double c_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_c_cumul_execution_time);
  60. if (cl->name != NULL)
  61. {
  62. printf("codelet[%s]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl->name, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time);
  63. }
  64. else
  65. {
  66. printf("codelet[%p]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time);
  67. }
  68. }
  69. void f(void *buffers[], void *cl_args)
  70. {
  71. int *int_vector = (int*)STARPU_VECTOR_GET_PTR(buffers[0]);
  72. int NX = (int)STARPU_VECTOR_GET_NX(buffers[0]);
  73. const int niters;
  74. starpu_codelet_unpack_args(cl_args, &niters);
  75. int i;
  76. for (i=0; i<niters; i++)
  77. {
  78. int_vector[i % NX] += i;
  79. }
  80. }
  81. struct starpu_codelet cl =
  82. {
  83. .cpu_funcs = {f},
  84. .cpu_funcs_name = {"f"},
  85. .nbuffers = 1,
  86. .name = "perf_counter_f"
  87. };
  88. const enum starpu_perf_counter_scope g_scope = starpu_perf_counter_scope_global;
  89. const enum starpu_perf_counter_scope w_scope = starpu_perf_counter_scope_per_worker;
  90. const enum starpu_perf_counter_scope c_scope = starpu_perf_counter_scope_per_codelet;
  91. #define NVECTORS 5
  92. #define NTASKS 1000
  93. #define NITER 1000
  94. #define VECTOR_LEN 2
  95. int main(int argc, char **argv)
  96. {
  97. struct starpu_conf conf;
  98. starpu_conf_init(&conf);
  99. /* Start collecting perfomance counter right after initialization */
  100. conf.start_perf_counter_collection = 1;
  101. int ret;
  102. ret = starpu_init(&conf);
  103. if (ret == -ENODEV)
  104. return 77;
  105. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  106. struct starpu_perf_counter_set *g_set = starpu_perf_counter_set_alloc(g_scope);
  107. STARPU_ASSERT(g_set != NULL);
  108. struct starpu_perf_counter_set *w_set = starpu_perf_counter_set_alloc(w_scope);
  109. STARPU_ASSERT(w_set != NULL);
  110. struct starpu_perf_counter_set *c_set = starpu_perf_counter_set_alloc(c_scope);
  111. STARPU_ASSERT(c_set != NULL);
  112. id_g_total_submitted = starpu_perf_counter_name_to_id(g_scope, "starpu.task.g_total_submitted");
  113. STARPU_ASSERT(id_g_total_submitted != -1);
  114. id_g_peak_submitted = starpu_perf_counter_name_to_id(g_scope, "starpu.task.g_peak_submitted");
  115. STARPU_ASSERT(id_g_peak_submitted != -1);
  116. id_g_peak_ready = starpu_perf_counter_name_to_id(g_scope, "starpu.task.g_peak_ready");
  117. STARPU_ASSERT(id_g_peak_ready != -1);
  118. id_w_total_executed = starpu_perf_counter_name_to_id(w_scope, "starpu.task.w_total_executed");
  119. STARPU_ASSERT(id_w_total_executed != -1);
  120. id_w_cumul_execution_time = starpu_perf_counter_name_to_id(w_scope, "starpu.task.w_cumul_execution_time");
  121. STARPU_ASSERT(id_w_cumul_execution_time != -1);
  122. id_c_total_submitted = starpu_perf_counter_name_to_id(c_scope, "starpu.task.c_total_submitted");
  123. STARPU_ASSERT(id_c_total_submitted != -1);
  124. id_c_peak_submitted = starpu_perf_counter_name_to_id(c_scope, "starpu.task.c_peak_submitted");
  125. STARPU_ASSERT(id_c_peak_submitted != -1);
  126. id_c_peak_ready = starpu_perf_counter_name_to_id(c_scope, "starpu.task.c_peak_ready");
  127. STARPU_ASSERT(id_c_peak_ready != -1);
  128. id_c_total_executed = starpu_perf_counter_name_to_id(c_scope, "starpu.task.c_total_executed");
  129. STARPU_ASSERT(id_c_total_executed != -1);
  130. id_c_cumul_execution_time = starpu_perf_counter_name_to_id(c_scope, "starpu.task.c_cumul_execution_time");
  131. STARPU_ASSERT(id_c_cumul_execution_time != -1);
  132. starpu_perf_counter_set_enable_id(g_set, id_g_total_submitted);
  133. starpu_perf_counter_set_enable_id(g_set, id_g_peak_submitted);
  134. starpu_perf_counter_set_enable_id(g_set, id_g_peak_ready);
  135. starpu_perf_counter_set_enable_id(w_set, id_w_total_executed);
  136. starpu_perf_counter_set_enable_id(w_set, id_w_cumul_execution_time);
  137. starpu_perf_counter_set_enable_id(c_set, id_c_total_submitted);
  138. starpu_perf_counter_set_enable_id(c_set, id_c_peak_submitted);
  139. starpu_perf_counter_set_enable_id(c_set, id_c_peak_ready);
  140. starpu_perf_counter_set_enable_id(c_set, id_c_total_executed);
  141. starpu_perf_counter_set_enable_id(c_set, id_c_cumul_execution_time);
  142. struct starpu_perf_counter_listener * g_listener = starpu_perf_counter_listener_init(g_set, g_listener_cb, (void *)(uintptr_t)42);
  143. struct starpu_perf_counter_listener * w_listener = starpu_perf_counter_listener_init(w_set, w_listener_cb, (void *)(uintptr_t)17);
  144. struct starpu_perf_counter_listener * c_listener = starpu_perf_counter_listener_init(c_set, c_listener_cb, (void *)(uintptr_t)76);
  145. starpu_perf_counter_set_global_listener(g_listener);
  146. starpu_perf_counter_set_all_per_worker_listeners(w_listener);
  147. starpu_perf_counter_set_per_codelet_listener(&cl, c_listener);
  148. int* vector[NVECTORS];
  149. starpu_data_handle_t vector_h[NVECTORS];
  150. int v;
  151. for (v=0; v<NVECTORS; v++)
  152. {
  153. vector[v] = calloc(VECTOR_LEN, sizeof(*(vector[v])));
  154. STARPU_ASSERT(vector[v] != NULL);
  155. {
  156. int i;
  157. for (i=0; i<VECTOR_LEN; i++)
  158. {
  159. vector[v][i] = i;
  160. }
  161. }
  162. starpu_vector_data_register(&vector_h[v], STARPU_MAIN_RAM, (uintptr_t)vector[v], VECTOR_LEN, sizeof(*vector[v]));
  163. }
  164. {
  165. int i;
  166. for (i=0; i<NTASKS; i++)
  167. {
  168. v = i % NVECTORS;
  169. const int niter = NITER;
  170. starpu_insert_task(&cl,
  171. STARPU_RW, vector_h[v],
  172. STARPU_VALUE, &niter, sizeof(int),
  173. 0);
  174. }
  175. }
  176. for (v=0; v<NVECTORS; v++)
  177. {
  178. starpu_data_unregister(vector_h[v]);
  179. free(vector[v]);
  180. }
  181. starpu_perf_counter_unset_per_codelet_listener(&cl);
  182. starpu_perf_counter_unset_all_per_worker_listeners();
  183. starpu_perf_counter_unset_global_listener();
  184. starpu_perf_counter_listener_exit(c_listener);
  185. starpu_perf_counter_listener_exit(w_listener);
  186. starpu_perf_counter_listener_exit(g_listener);
  187. starpu_perf_counter_set_disable_id(c_set, id_c_cumul_execution_time);
  188. starpu_perf_counter_set_disable_id(c_set, id_c_total_executed);
  189. starpu_perf_counter_set_disable_id(c_set, id_c_peak_ready);
  190. starpu_perf_counter_set_disable_id(c_set, id_c_peak_submitted);
  191. starpu_perf_counter_set_disable_id(c_set, id_c_total_submitted);
  192. starpu_perf_counter_set_disable_id(w_set, id_w_cumul_execution_time);
  193. starpu_perf_counter_set_disable_id(w_set, id_w_total_executed);
  194. starpu_perf_counter_set_disable_id(g_set, id_g_peak_ready);
  195. starpu_perf_counter_set_disable_id(g_set, id_g_peak_submitted);
  196. starpu_perf_counter_set_disable_id(g_set, id_g_total_submitted);
  197. starpu_perf_counter_set_free(c_set);
  198. c_set = NULL;
  199. starpu_perf_counter_set_free(w_set);
  200. w_set = NULL;
  201. starpu_perf_counter_set_free(g_set);
  202. g_set = NULL;
  203. starpu_shutdown();
  204. return 0;
  205. }