Mariem makni 4 lat temu
rodzic
commit
1d909c853b
2 zmienionych plików z 63 dodań i 59 usunięć
  1. 46 36
      src/core/perfmodel/energy_model.c
  2. 17 23
      tests/perfmodels/test_papi.c

+ 46 - 36
src/core/perfmodel/energy_model.c

@@ -33,11 +33,16 @@
 
 #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__);  exit(retval); }
 
+#if 1
+#define debug(fmt, ...) printf(fmt, ## __VA_ARGS)
+#else
+#define debug(fmt, ...) 
+#endif
+
 static const int N_EVTS = 2;
 
-static hwloc_topology_t topology;
+static int nsockets;
 
-static int nnuma;
 static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
 
                               "rapl::RAPL_ENERGY_DRAM:cpu=%d"};
@@ -45,34 +50,23 @@ static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
 static int add_event(int EventSet, int socket);
 
 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
-static int EventSet = PAPI_NULL;
-static int tmp, i;
+int EventSet = PAPI_NULL;
 
 /*This is where we store the values we read from the eventset */
 static long long *values;
 
-
-static int retval, number;
-static int device_count;
-static char errstring[PAPI_MAX_STR_LEN];
-
   
 int starpu_energy_start()
 { 
+    int retval, number;
+    int i;
 
-    /* Allocate and initialize topology object. */
-    hwloc_topology_init(&topology);
-
-
-    /* Perform the topology detection. */
-    hwloc_topology_load(topology);
-
-    nnuma = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
-   
-
-    printf("nbre de sockets %d \n",nnuma);
-
-    values=calloc(nnuma * N_EVTS,sizeof(long long));
+    struct _starpu_machine_config *config = _starpu_get_machine_config();
+    hwloc_topology_t topology = config->topology.hwtopology;
+ 
+    nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
+ 
+    values=calloc(nsockets * N_EVTS,sizeof(long long));
     if (values==NULL) {
          exit(1);
       }
@@ -84,8 +78,12 @@ int starpu_energy_start()
     if ( (retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
       ERROR_RETURN(retval);
 
-    for (i = 0 ; i < nnuma ; i ++ )
-        add_event(EventSet, i);
+    for (i = 0 ; i < nsockets ; i ++ )
+{
+        /* return the index of socket */
+        hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i);
+        add_event(EventSet, obj->os_index);
+}
 
     /* get the number of events in the event set */
     number = 0;
@@ -93,21 +91,23 @@ int starpu_energy_start()
         ERROR_RETURN(retval);
 
     printf("There are %d events in the event set\n", number);
-
+      
     /* Start counting */
     if ( (retval = PAPI_start(EventSet)) != PAPI_OK)
                 ERROR_RETURN(retval);
 
-    /* Destroy topology object. */
-     hwloc_topology_destroy(topology);
-
+    
 return retval;
 
 }
 
 int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned ntasks)
 {
-  double energy;
+    double energy = 0.;
+ 
+    int retval;
+    unsigned workerid = 0;
+    unsigned cpuid = 0;
 
  /* Stop counting and store the values into the array */
     if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
@@ -115,26 +115,36 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
   
        int k,s;
        
-       for( s = 0 ; s < nnuma ; s ++){
+       for( s = 0 ; s < nsockets ; s ++){
 
            for(k = 0 ; k < N_EVTS; k++) {
 
-             energy=values[s * N_EVTS + k]/ ntasks;
+             energy += values[s * N_EVTS + k];
+             
              printf("%-40s%12.6f J\t(Average Power %.1fW)\n",
              event_names[k],
              (energy/1.0e9),
-             (energy/1.0e9));
+             (energy/1.0e9) // FIXME: diviser par le temps en secondes (utiliser starpu_timing_now() qui retourne le temps en microsecondes
+);
+             
+
                 }
              }
 
-
-      unsigned workerid = 0;
-      unsigned cpuid = 0;
+     energy=energy / ntasks;
 
      struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
      starpu_perfmodel_update_history(model, task, arch, cpuid, 0, energy);
 
-  /* free the resources used by PAPI */
+    /*emoves all events from a PAPI event set */
+    if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
+      ERROR_RETURN(retval);
+
+    /*deallocates the memory associated with an empty PAPI EventSet*/
+    if ( (retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
+      ERROR_RETURN(retval);
+
+    /* free the resources used by PAPI */
         PAPI_shutdown();
 
 return retval;

+ 17 - 23
tests/perfmodels/test_papi.c

@@ -29,8 +29,8 @@
 #define END 16777216
 #endif
 
+int ntasks;
 /* First implementation */
-
 void memset0_cpu(void *descr[], void *arg)
 {
         (void)arg;
@@ -84,17 +84,18 @@ static struct starpu_codelet memset_cl=
 
 static void test_memset(int nelems, struct starpu_codelet *codelet)
 {
-        int nloops = 100;
+        ntasks = starpu_cpu_worker_get_count() * 30;
         int loop;
        
         struct starpu_task *task;
     
+// TODO: faire plutôt un tableau[nloops] de handles, et on fait unregister dans une boucle séparée, pour laisser les tâches tourner en parallèle
+        starpu_data_handle_t handle;
+
         
-        for (loop = 0; loop < nloops; loop++)
+        for (loop = 0; loop < ntasks; loop++)
         {
                 task = starpu_task_create();
-                starpu_data_handle_t handle;
-
                 starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
 
                 task->cl = codelet;
@@ -124,24 +125,22 @@ int main(int argc, char **argv)
         int ret;
         int retval;
         int size;
-
+       
         starpu_conf_init(&conf);
 
-        conf.sched_policy_name = "dmda";
-        conf.calibrate = 2;
-
+        conf.sched_policy_name = "eager";
+       
         ret = starpu_initialize(&conf, &argc, &argv);
         if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
         STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
       
-       /* Now create a dummy task just to estimate its duration according to the regression */
-  
-        size = 1234567;
-
+        for (size = STARTlin; size < END; size *= 2)
+        {
+               
         starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int));
 
         struct starpu_task *task = starpu_task_create();
-        // task->cl = &memset_cl;
+       
         task->cl = &memset_cl;
 
         task->handles[0] = handle;
@@ -149,8 +148,6 @@ int main(int argc, char **argv)
 
         task->destroy = 0;
                           
-          
-        /* Use a non-linear regression */
         /* Start counting */
 
          if ( (retval = starpu_energy_start()) != 0)
@@ -161,7 +158,7 @@ int main(int argc, char **argv)
 
         /* Stop counting and store the values into the array */
 
-         if ( (retval = starpu_energy_stop(&my_perfmodel, task, 100)) != 0)
+         if ( (retval = starpu_energy_stop(&my_perfmodel, task, ntasks)) != 0)
 
                 ERROR_RETURN(retval);
 
@@ -170,13 +167,10 @@ int main(int argc, char **argv)
 
         starpu_data_unregister(handle);
         
-
-
-       // int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NUMANODE);
-       // unsigned nnuma = starpu_memory_nodes_get_numa_count();
+       
+      }
+        starpu_shutdown();
         
-       starpu_shutdown();
-
         return EXIT_SUCCESS;