energy_model.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #ifdef STARPU_PAPI
  18. #include <papi.h>
  19. #endif
  20. #ifdef STARPU_HAVE_HWLOC
  21. #include <hwloc.h>
  22. #endif
  23. #include <starpu_perfmodel.h>
  24. #include <starpu_profiling.h>
  25. #include <common/config.h>
  26. #include <common/utils.h>
  27. #ifdef HAVE_UNISTD_H
  28. #include <unistd.h>
  29. #endif
  30. #include <sys/stat.h>
  31. #include <core/perfmodel/perfmodel.h>
  32. #include <core/jobs.h>
  33. #include <core/workers.h>
  34. #include <datawizard/datawizard.h>
  35. #include <core/task.h>
  36. #ifdef STARPU_USE_CUDA
  37. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  38. #include <nvml.h>
  39. #include <cuda.h>
  40. #include <cuda_runtime.h>
  41. #endif
  42. #endif
  43. #define ERROR_RETURN(retval, function) do { PAPI_perror(function); fprintf(stderr, "Error %d %s:line %d\n", retval,__FILE__,__LINE__); return(retval); } while (0)
  44. #if 0
  45. #define debug(fmt, ...) printf(fmt, ## __VA_ARGS__)
  46. #else
  47. #define debug(fmt, ...)
  48. #endif
  49. #ifdef STARPU_PAPI
  50. #ifdef STARPU_HAVE_HWLOC
  51. static const int N_EVTS = 2;
  52. static int nsockets;
  53. static const char* event_names[] =
  54. {
  55. "rapl::RAPL_ENERGY_PKG:cpu=%d",
  56. "rapl::RAPL_ENERGY_DRAM:cpu=%d"
  57. };
  58. static int add_event(int EventSet, int socket);
  59. /* PAPI variables*/
  60. /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
  61. static int EventSet = PAPI_NULL;
  62. #endif
  63. #endif
  64. static double t1;
  65. #ifdef STARPU_USE_CUDA
  66. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  67. static unsigned long long energy_begin, energy_end;
  68. static nvmlDevice_t device;
  69. #endif
  70. #endif
  71. int starpu_energy_start(int workerid STARPU_ATTRIBUTE_UNUSED, enum starpu_worker_archtype archi)
  72. {
  73. t1 = starpu_timing_now();
  74. switch (archi)
  75. {
  76. #ifdef STARPU_PAPI
  77. #ifdef STARPU_HAVE_HWLOC
  78. case STARPU_CPU_WORKER:
  79. {
  80. STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
  81. int retval, number;
  82. struct _starpu_machine_config *config = _starpu_get_machine_config();
  83. hwloc_topology_t topology = config->topology.hwtopology;
  84. nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
  85. if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
  86. ERROR_RETURN(retval, "PAPI_library_init");
  87. /* Creating the eventset */
  88. if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
  89. ERROR_RETURN(retval, "PAPI_create_eventset");
  90. int i;
  91. for (i = 0 ; i < nsockets ; i ++ )
  92. {
  93. /* return the index of socket */
  94. hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i);
  95. if ( (retval = add_event(EventSet, obj->os_index)) != PAPI_OK)
  96. {
  97. if (retval == PAPI_EPERM)
  98. _STARPU_DISP("PAPI could not access counters due to permissions errors. Perhaps your system requires to run measurements as root?\n");
  99. else if (retval == PAPI_ENOEVNT)
  100. _STARPU_DISP("PAPI could not access counters. Perhaps your system requires to run measurements as root?\n");
  101. ERROR_RETURN(retval, "PAPI_add_named_event");
  102. }
  103. }
  104. /* get the number of events in the event set */
  105. number = 0;
  106. if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK)
  107. ERROR_RETURN(retval, "PAPI_list_events");
  108. debug("There are %d events in the event set\n", number);
  109. /* Start counting */
  110. if ( (retval = PAPI_start(EventSet)) != PAPI_OK)
  111. ERROR_RETURN(retval, "PAPI_start");
  112. return retval;
  113. }
  114. #endif
  115. #endif
  116. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  117. case STARPU_CUDA_WORKER:
  118. {
  119. STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
  120. int devid = starpu_worker_get_devid(workerid);
  121. int ret = nvmlDeviceGetHandleByIndex_v2 (devid, &device);
  122. if (ret != NVML_SUCCESS)
  123. {
  124. _STARPU_DISP("Could not get CUDA device %d from nvml\n", devid);
  125. return -1;
  126. }
  127. ret = nvmlDeviceGetTotalEnergyConsumption ( device, &energy_begin );
  128. if (ret != NVML_SUCCESS)
  129. {
  130. _STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid);
  131. return -1;
  132. }
  133. return 0;
  134. }
  135. break;
  136. #endif
  137. default:
  138. printf("Error: worker is not supported ! \n");
  139. return -1;
  140. }
  141. }
  142. int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi)
  143. {
  144. double energy = 0.;
  145. int retval = 0;
  146. unsigned cpuid = 0;
  147. double t2 = starpu_timing_now();
  148. double t STARPU_ATTRIBUTE_UNUSED = t2 - t1;
  149. switch (archi)
  150. {
  151. #ifdef STARPU_PAPI
  152. #ifdef STARPU_HAVE_HWLOC
  153. case STARPU_CPU_WORKER:
  154. {
  155. STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
  156. /*This is where we store the values we read from the eventset */
  157. long long values[nsockets*N_EVTS];
  158. /* Stop counting and store the values into the array */
  159. if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
  160. ERROR_RETURN(retval, "PAPI_stop");
  161. int k,s;
  162. for( s = 0 ; s < nsockets ; s ++)
  163. {
  164. for(k = 0 ; k < N_EVTS; k++)
  165. {
  166. double delta = values[s * N_EVTS + k]*0.23/1.0e9;
  167. energy += delta;
  168. debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n",
  169. event_names[k],
  170. delta, t, delta/(t*1.0E-6));
  171. }
  172. }
  173. /*removes all events from a PAPI event set */
  174. if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
  175. ERROR_RETURN(retval, "PAPI_cleanup_eventset");
  176. /*deallocates the memory associated with an empty PAPI EventSet*/
  177. if ( (retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
  178. ERROR_RETURN(retval, "PAPI_destroy_eventset");
  179. break;
  180. }
  181. #endif
  182. #endif
  183. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  184. case STARPU_CUDA_WORKER:
  185. {
  186. STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
  187. int ret = nvmlDeviceGetTotalEnergyConsumption(device, &energy_end);
  188. if (ret != NVML_SUCCESS)
  189. return -1;
  190. energy = (energy_end - energy_begin) / 1000.;
  191. debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6));
  192. break;
  193. }
  194. #endif
  195. default:
  196. {
  197. printf("Error: worker type %d is not supported! \n", archi);
  198. return -1;
  199. break;
  200. }
  201. }
  202. struct starpu_perfmodel_arch *arch;
  203. if (workerid == -1)
  204. /* Just take one of them */
  205. workerid = starpu_worker_get_by_type(archi, 0);
  206. arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
  207. starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
  208. return retval;
  209. }
  210. #ifdef STARPU_PAPI
  211. #ifdef STARPU_HAVE_HWLOC
  212. static int add_event(int eventSet, int socket)
  213. {
  214. int retval, i;
  215. for (i = 0; i < N_EVTS; i++)
  216. {
  217. char buf[255];
  218. snprintf(buf, sizeof(buf), event_names[i], socket);
  219. /* printf("Activating multiplex\n"); */
  220. /* retval = PAPI_set_multiplex(eventSet); */
  221. /* if(retval != PAPI_OK) { */
  222. /* _STARPU_DISP("cannot set multiplex\n"); */
  223. /* return retval; */
  224. /* } */
  225. retval = PAPI_add_named_event(eventSet, buf);
  226. if (retval != PAPI_OK)
  227. {
  228. if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
  229. {
  230. /* Ok, too bad */
  231. _STARPU_DISP("Note: DRAM energy measurement not available\n");
  232. return PAPI_OK;
  233. }
  234. _STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
  235. return retval;
  236. }
  237. }
  238. return ( PAPI_OK );
  239. }
  240. #endif
  241. #endif