exa2pro
/
starpu-max


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
							/* StarPU --- Runtime system for heterogeneous multicore architectures.
 *
 * Copyright (C) 2008-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 *
 * StarPU is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * StarPU is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

#include <starpu.h>
#ifdef STARPU_PAPI
#include <papi.h>
#endif
#ifdef STARPU_HAVE_HWLOC
#include <hwloc.h>
#endif
#include <starpu_perfmodel.h>
#include <starpu_profiling.h>
#include <common/config.h>
#include <common/utils.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <sys/stat.h>
#include <core/perfmodel/perfmodel.h>
#include <core/jobs.h>
#include <core/workers.h>
#include <datawizard/datawizard.h>
#include <core/task.h>

#ifdef STARPU_USE_CUDA
#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
#include <nvml.h>
#include <cuda.h>
#include <cuda_runtime.h>
#endif
#endif

#define ERROR_RETURN(retval) do { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__);  return(retval); } while (0)

#if 0
#define debug(fmt, ...) printf(fmt, ## __VA_ARGS__)
#else
#define debug(fmt, ...)
#endif

#ifdef STARPU_PAPI
static const int N_EVTS = 2;

static int nsockets;

static const char* event_names[] =
{
	"rapl::RAPL_ENERGY_PKG:cpu=%d",
	"rapl::RAPL_ENERGY_DRAM:cpu=%d"
};

static int add_event(int EventSet, int socket);

/* PAPI variables*/

/*must be initialized to PAPI_NULL before calling PAPI_create_event*/
static int EventSet = PAPI_NULL;

#endif

static double t1;

#ifdef STARPU_USE_CUDA
#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
static unsigned long long energy_begin, energy_end;
static nvmlDevice_t device;
#endif
#endif

int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
{
	t1 = starpu_timing_now();

	switch (archi)
	{
#ifdef STARPU_PAPI
#ifdef STARPU_HAVE_HWLOC
	case STARPU_CPU_WORKER:
	{
		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");

		int retval, number;

		struct _starpu_machine_config *config = _starpu_get_machine_config();
		hwloc_topology_t topology = config->topology.hwtopology;

		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);

		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
			ERROR_RETURN(retval);

		/* Creating the eventset */
		if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
			ERROR_RETURN(retval);

		int i;
		for (i = 0 ; i < nsockets ; i ++ )
		{
			/* return the index of socket */
			hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i);
			if ( (retval = add_event(EventSet, obj->os_index)) != PAPI_OK)
				ERROR_RETURN(retval);
		}

		/* get the number of events in the event set */
		number = 0;
		if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK)
			ERROR_RETURN(retval);

		debug("There are %d events in the event set\n", number);

		/* Start counting */
		if ( (retval = PAPI_start(EventSet)) != PAPI_OK)
			ERROR_RETURN(retval);

		return retval;
	}
#endif
#endif

#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
	case STARPU_CUDA_WORKER:
	{
		STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
		int devid = starpu_worker_get_devid(workerid);
		int ret = nvmlDeviceGetHandleByIndex_v2 (devid,  &device);
		if (ret != NVML_SUCCESS)
		{
			_STARPU_DISP("Could not get CUDA device %d from nvml\n", devid);
			return -1;
		}
		ret = nvmlDeviceGetTotalEnergyConsumption ( device, &energy_begin );
		if (ret != NVML_SUCCESS)
		{
			_STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid);
			return -1;
		}
		return 0;
	}
	break;
#endif

	default:
		printf("Error: worker is not supported ! \n");
		return -1;
	}
}

int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi)
{
	double energy = 0.;
	int retval;
	unsigned cpuid = 0;
	double t2 = starpu_timing_now();
	double t STARPU_ATTRIBUTE_UNUSED = t2 - t1;

	switch (archi)
	{
#ifdef STARPU_PAPI
#ifdef STARPU_HAVE_HWLOC
	case STARPU_CPU_WORKER:
	{
		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");

		/*This is where we store the values we read from the eventset */
		long long values[nsockets*N_EVTS];

		/* Stop counting and store the values into the array */
		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
			ERROR_RETURN(retval);

		int k,s;

		for( s = 0 ; s < nsockets ; s ++)
		{
			for(k = 0 ; k < N_EVTS; k++)
			{
				double delta = values[s * N_EVTS + k]*0.23/1.0e9;
				energy += delta;

				debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n",
				      event_names[k],
				      delta, t, delta/(t*1.0E-6));
			}
		}

		/*removes all events from a PAPI event set */
		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
			ERROR_RETURN(retval);

		/*deallocates the memory associated with an empty PAPI EventSet*/
		if ( (retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
			ERROR_RETURN(retval);

		break;
	}
#endif
#endif

#ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION
	case STARPU_CUDA_WORKER:
	{
		STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n");
		int ret = nvmlDeviceGetTotalEnergyConsumption(device, &energy_end );
		if (ret != NVML_SUCCESS)
			return -1;
		energy = (energy_end - energy_begin) / 1000.;
		debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6));
		break;
	}
#endif

	default:
	{
		printf("Error: worker type %d is not supported! \n", archi);
		return -1;
		break;
	}
	}


	struct starpu_perfmodel_arch *arch;
	if (workerid == -1)
		/* Just take one of them */
		workerid = starpu_worker_get_by_type(archi, 0);

	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);

	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);

	return retval;
}

#ifdef STARPU_PAPI
#ifdef STARPU_HAVE_HWLOC
static int add_event(int eventSet, int socket)
{
	int retval, i;
	for (i = 0; i < N_EVTS; i++)
	{
		char buf[255];
		snprintf(buf, sizeof(buf), event_names[i], socket);

		/* printf("Activating multiplex\n"); */
		/* retval = PAPI_set_multiplex(eventSet); */
		/* if(retval != PAPI_OK) { */
		/*      _STARPU_DISP("cannot set multiplex\n"); */
		/*      return retval; */
		/* } */
		retval = PAPI_add_named_event(eventSet, buf);
		if (retval != PAPI_OK)
		{
			if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
			{
				/* Ok, too bad */
				_STARPU_DISP("Note: DRAM energy measurement not available\n");
				return PAPI_OK;
			}
			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
			return retval;
		}
	}

	return ( PAPI_OK );
}
#endif
#endif