123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- #include <starpu.h>
- #ifdef STARPU_PAPI
- #include <papi.h>
- #endif
- #ifdef STARPU_HAVE_HWLOC
- #include <hwloc.h>
- #endif
- #include <starpu_perfmodel.h>
- #include <starpu_profiling.h>
- #include <common/config.h>
- #include <common/utils.h>
- #ifdef HAVE_UNISTD_H
- #include <unistd.h>
- #endif
- #include <sys/stat.h>
- #include <core/perfmodel/perfmodel.h>
- #include <core/jobs.h>
- #include <core/workers.h>
- #include <datawizard/datawizard.h>
- #include <core/task.h>
- #ifdef STARPU_USE_CUDA
- #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
- #include <nvml.h>
- #include <cuda.h>
- #include <cuda_runtime.h>
- #endif
- #endif
- #define ERROR_RETURN(retval, function) do { PAPI_perror(function); fprintf(stderr, "Error %d %s:line %d\n", retval,__FILE__,__LINE__); return(retval); } while (0)
- #if 0
- #define debug(fmt, ...) printf(fmt, ## __VA_ARGS__)
- #else
- #define debug(fmt, ...)
- #endif
- #ifdef STARPU_PAPI
- #ifdef STARPU_HAVE_HWLOC
- static const int N_EVTS = 2;
- static int nsockets;
- static const char* event_names[] =
- {
- "rapl::RAPL_ENERGY_PKG:cpu=%d",
- "rapl::RAPL_ENERGY_DRAM:cpu=%d"
- };
- static int add_event(int EventSet, int socket);
- /* PAPI variables*/
- /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
- static int EventSet = PAPI_NULL;
- #endif
- #endif
- static double t1;
- #ifdef STARPU_USE_CUDA
- #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
- static unsigned long long energy_begin, energy_end;
- static nvmlDevice_t device;
- #endif
- #endif
- int starpu_energy_start(int workerid STARPU_ATTRIBUTE_UNUSED, enum starpu_worker_archtype archi)
- {
- t1 = starpu_timing_now();
- switch (archi)
- {
- #ifdef STARPU_PAPI
- #ifdef STARPU_HAVE_HWLOC
- case STARPU_CPU_WORKER:
- {
- STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
- int retval, number;
- struct _starpu_machine_config *config = _starpu_get_machine_config();
- hwloc_topology_t topology = config->topology.hwtopology;
- nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
- if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
- ERROR_RETURN(retval, "PAPI_library_init");
- /* Creating the eventset */
- if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_create_eventset");
- int i;
- for (i = 0 ; i < nsockets ; i ++ )
- {
- /* return the index of socket */
- hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i);
- if ( (retval = add_event(EventSet, obj->os_index)) != PAPI_OK)
- {
- if (retval == PAPI_EPERM)
- _STARPU_DISP("PAPI could not access counters due to permissions errors. Perhaps your system requires to run measurements as root?\n");
- else if (retval == PAPI_ENOEVNT)
- _STARPU_DISP("PAPI could not access counters. Perhaps your system requires to run measurements as root?\n");
- ERROR_RETURN(retval, "PAPI_add_named_event");
- }
- }
- /* get the number of events in the event set */
- number = 0;
- if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_list_events");
- debug("There are %d events in the event set\n", number);
- /* Start counting */
- if ( (retval = PAPI_start(EventSet)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_start");
- return retval;
- }
- #endif
- #endif
- #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
- case STARPU_CUDA_WORKER:
- {
- STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
- int devid = starpu_worker_get_devid(workerid);
- int ret = nvmlDeviceGetHandleByIndex_v2 (devid, &device);
- if (ret != NVML_SUCCESS)
- {
- _STARPU_DISP("Could not get CUDA device %d from nvml\n", devid);
- return -1;
- }
- ret = nvmlDeviceGetTotalEnergyConsumption ( device, &energy_begin );
- if (ret != NVML_SUCCESS)
- {
- _STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid);
- return -1;
- }
- return 0;
- }
- break;
- #endif
- default:
- printf("Error: worker is not supported ! \n");
- return -1;
- }
- }
- int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi)
- {
- double energy = 0.;
- int retval = 0;
- unsigned cpuid = 0;
- double t2 = starpu_timing_now();
- double t STARPU_ATTRIBUTE_UNUSED = t2 - t1;
- switch (archi)
- {
- #ifdef STARPU_PAPI
- #ifdef STARPU_HAVE_HWLOC
- case STARPU_CPU_WORKER:
- {
- STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
- /*This is where we store the values we read from the eventset */
- long long values[nsockets*N_EVTS];
- /* Stop counting and store the values into the array */
- if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_stop");
- int k,s;
- for( s = 0 ; s < nsockets ; s ++)
- {
- for(k = 0 ; k < N_EVTS; k++)
- {
- double delta = values[s * N_EVTS + k]*0.23/1.0e9;
- energy += delta;
- debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n",
- event_names[k],
- delta, t, delta/(t*1.0E-6));
- }
- }
- /*removes all events from a PAPI event set */
- if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_cleanup_eventset");
- /*deallocates the memory associated with an empty PAPI EventSet*/
- if ( (retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
- ERROR_RETURN(retval, "PAPI_destroy_eventset");
- break;
- }
- #endif
- #endif
- #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
- case STARPU_CUDA_WORKER:
- {
- STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
- int ret = nvmlDeviceGetTotalEnergyConsumption(device, &energy_end);
- if (ret != NVML_SUCCESS)
- return -1;
- energy = (energy_end - energy_begin) / 1000.;
- debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6));
- break;
- }
- #endif
- default:
- {
- printf("Error: worker type %d is not supported! \n", archi);
- return -1;
- break;
- }
- }
- struct starpu_perfmodel_arch *arch;
- if (workerid == -1)
- /* Just take one of them */
- workerid = starpu_worker_get_by_type(archi, 0);
- arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
- starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
- return retval;
- }
- #ifdef STARPU_PAPI
- #ifdef STARPU_HAVE_HWLOC
- static int add_event(int eventSet, int socket)
- {
- int retval, i;
- for (i = 0; i < N_EVTS; i++)
- {
- char buf[255];
- snprintf(buf, sizeof(buf), event_names[i], socket);
- /* printf("Activating multiplex\n"); */
- /* retval = PAPI_set_multiplex(eventSet); */
- /* if(retval != PAPI_OK) { */
- /* _STARPU_DISP("cannot set multiplex\n"); */
- /* return retval; */
- /* } */
- retval = PAPI_add_named_event(eventSet, buf);
- if (retval != PAPI_OK)
- {
- if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
- {
- /* Ok, too bad */
- _STARPU_DISP("Note: DRAM energy measurement not available\n");
- return PAPI_OK;
- }
- _STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
- return retval;
- }
- }
- return ( PAPI_OK );
- }
- #endif
- #endif
|