energy_model.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #ifdef STARPU_PAPI
  18. #include <papi.h>
  19. #endif
  20. #ifdef STARPU_HAVE_HWLOC
  21. #include <hwloc.h>
  22. #endif
  23. #include <starpu_perfmodel.h>
  24. #include <starpu_profiling.h>
  25. #include <common/config.h>
  26. #include <common/utils.h>
  27. #ifdef HAVE_UNISTD_H
  28. #include <unistd.h>
  29. #endif
  30. #include <sys/stat.h>
  31. #include <core/perfmodel/perfmodel.h>
  32. #include <core/jobs.h>
  33. #include <core/workers.h>
  34. #include <datawizard/datawizard.h>
  35. #include <core/task.h>
  36. #ifdef STARPU_USE_CUDA
  37. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  38. #include <nvml.h>
  39. #include <cuda.h>
  40. #include <cuda_runtime.h>
  41. #endif
  42. #endif
  43. #define ERROR_RETURN(retval) do { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); return(retval); } while (0)
  44. #if 0
  45. #define debug(fmt, ...) printf(fmt, ## __VA_ARGS__)
  46. #else
  47. #define debug(fmt, ...)
  48. #endif
  49. #ifdef STARPU_PAPI
  50. static const int N_EVTS = 2;
  51. static int nsockets;
  52. static const char* event_names[] =
  53. {
  54. "rapl::RAPL_ENERGY_PKG:cpu=%d",
  55. "rapl::RAPL_ENERGY_DRAM:cpu=%d"
  56. };
  57. static int add_event(int EventSet, int socket);
  58. /* PAPI variables*/
  59. /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
  60. static int EventSet = PAPI_NULL;
  61. #endif
  62. static double t1;
  63. #ifdef STARPU_USE_CUDA
  64. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  65. static unsigned long long energy_begin, energy_end;
  66. static nvmlDevice_t device;
  67. #endif
  68. #endif
  69. int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
  70. {
  71. t1 = starpu_timing_now();
  72. switch (archi)
  73. {
  74. #ifdef STARPU_PAPI
  75. #ifdef STARPU_HAVE_HWLOC
  76. case STARPU_CPU_WORKER:
  77. {
  78. STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
  79. int retval, number;
  80. struct _starpu_machine_config *config = _starpu_get_machine_config();
  81. hwloc_topology_t topology = config->topology.hwtopology;
  82. nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
  83. if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
  84. ERROR_RETURN(retval);
  85. /* Creating the eventset */
  86. if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
  87. ERROR_RETURN(retval);
  88. int i;
  89. for (i = 0 ; i < nsockets ; i ++ )
  90. {
  91. /* return the index of socket */
  92. hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i);
  93. if ( (retval = add_event(EventSet, obj->os_index)) != PAPI_OK)
  94. ERROR_RETURN(retval);
  95. }
  96. /* get the number of events in the event set */
  97. number = 0;
  98. if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK)
  99. ERROR_RETURN(retval);
  100. debug("There are %d events in the event set\n", number);
  101. /* Start counting */
  102. if ( (retval = PAPI_start(EventSet)) != PAPI_OK)
  103. ERROR_RETURN(retval);
  104. return retval;
  105. }
  106. #endif
  107. #endif
  108. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  109. case STARPU_CUDA_WORKER:
  110. {
  111. STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
  112. int devid = starpu_worker_get_devid(workerid);
  113. int ret = nvmlDeviceGetHandleByIndex_v2 (devid, &device);
  114. if (ret != NVML_SUCCESS)
  115. {
  116. _STARPU_DISP("Could not get CUDA device %d from nvml\n", devid);
  117. return -1;
  118. }
  119. ret = nvmlDeviceGetTotalEnergyConsumption ( device, &energy_begin );
  120. if (ret != NVML_SUCCESS)
  121. {
  122. _STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid);
  123. return -1;
  124. }
  125. return 0;
  126. }
  127. break;
  128. #endif
  129. default:
  130. printf("Error: worker is not supported ! \n");
  131. return -1;
  132. }
  133. }
  134. int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi)
  135. {
  136. double energy = 0.;
  137. int retval;
  138. unsigned cpuid = 0;
  139. double t2 = starpu_timing_now();
  140. double t STARPU_ATTRIBUTE_UNUSED = t2 - t1;
  141. switch (archi)
  142. {
  143. #ifdef STARPU_PAPI
  144. #ifdef STARPU_HAVE_HWLOC
  145. case STARPU_CPU_WORKER:
  146. {
  147. STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
  148. /*This is where we store the values we read from the eventset */
  149. long long values[nsockets*N_EVTS];
  150. /* Stop counting and store the values into the array */
  151. if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
  152. ERROR_RETURN(retval);
  153. int k,s;
  154. for( s = 0 ; s < nsockets ; s ++)
  155. {
  156. for(k = 0 ; k < N_EVTS; k++)
  157. {
  158. double delta = values[s * N_EVTS + k]*0.23/1.0e9;
  159. energy += delta;
  160. debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n",
  161. event_names[k],
  162. delta, t, delta/(t*1.0E-6));
  163. }
  164. }
  165. /*removes all events from a PAPI event set */
  166. if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
  167. ERROR_RETURN(retval);
  168. /*deallocates the memory associated with an empty PAPI EventSet*/
  169. if ( (retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
  170. ERROR_RETURN(retval);
  171. break;
  172. }
  173. #endif
  174. #endif
  175. #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
  176. case STARPU_CUDA_WORKER:
  177. {
  178. STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
  179. int ret = nvmlDeviceGetTotalEnergyConsumption(device, &energy_end );
  180. if (ret != NVML_SUCCESS)
  181. return -1;
  182. energy = (energy_end - energy_begin) / 1000.;
  183. debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6));
  184. break;
  185. }
  186. #endif
  187. default:
  188. {
  189. printf("Error: worker type %d is not supported! \n", archi);
  190. return -1;
  191. break;
  192. }
  193. }
  194. struct starpu_perfmodel_arch *arch;
  195. if (workerid == -1)
  196. /* Just take one of them */
  197. workerid = starpu_worker_get_by_type(archi, 0);
  198. arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
  199. starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
  200. return retval;
  201. }
  202. #ifdef STARPU_PAPI
  203. #ifdef STARPU_HAVE_HWLOC
  204. static int add_event(int eventSet, int socket)
  205. {
  206. int retval, i;
  207. for (i = 0; i < N_EVTS; i++)
  208. {
  209. char buf[255];
  210. snprintf(buf, sizeof(buf), event_names[i], socket);
  211. /* printf("Activating multiplex\n"); */
  212. /* retval = PAPI_set_multiplex(eventSet); */
  213. /* if(retval != PAPI_OK) { */
  214. /* _STARPU_DISP("cannot set multiplex\n"); */
  215. /* return retval; */
  216. /* } */
  217. retval = PAPI_add_named_event(eventSet, buf);
  218. if (retval != PAPI_OK)
  219. {
  220. if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
  221. {
  222. /* Ok, too bad */
  223. _STARPU_DISP("Note: DRAM energy measurement not available\n");
  224. return PAPI_OK;
  225. }
  226. _STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
  227. return retval;
  228. }
  229. }
  230. return ( PAPI_OK );
  231. }
  232. #endif
  233. #endif