浏览代码

papi: record the task batch measurement as a batch

to avoid seeing it just dropped
Samuel Thibault 4 年之前
父节点
当前提交
6a2362f9b1

+ 18 - 2
include/starpu_perfmodel.h

@@ -416,16 +416,32 @@ int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t
 int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model);
 
 /**
-   Feed the performance model model with an explicit
-   measurement measured (in µs), in addition to measurements done by StarPU
+   Feed the performance model \p model with one explicit
+   measurement (in µs or J), in addition to measurements done by StarPU
    itself. This can be useful when the application already has an
    existing set of measurements done in good conditions, that StarPU
    could benefit from instead of doing on-line measurements. An example
    of use can be seen in \ref PerformanceModelExample.
+
+   Note that this records only one measurement, and StarPU would ignore
+   the first measurement (since it is usually disturbed by library loading
+   etc.). Make sure to call this function several times to record all your
+   measurements.
+
+   You can also call starpu_perfmodel_update_history_n() to directly provide an
+   average performed on several tasks.
 */
 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
 
 /**
+   Feed the performance model \p model with an explicit average measurement (in µs or J).
+
+   This is similar to starpu_perfmodel_update_history(), but records a batch of
+   \p number measurements provided as the average of the measurements \p average_measured.
+*/
+void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double average_measured, unsigned number);
+
+/**
    Print the directory name storing performance models on \p output
 */
 void starpu_perfmodel_directory(FILE *output);

+ 1 - 1
src/core/perfmodel/energy_model.c

@@ -143,7 +143,7 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
      energy = energy / ntasks;
 
      struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
-     starpu_perfmodel_update_history(model, task, arch, cpuid, 0, energy);
+     starpu_perfmodel_update_history_n(model, task, arch, cpuid, 0, energy, ntasks);
 
     /*emoves all events from a PAPI event set */
     if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)

+ 1 - 1
src/core/perfmodel/perfmodel.h

@@ -79,7 +79,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 double _starpu_multiple_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch,
 					struct _starpu_job *j, unsigned nimpl);
 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch,
-				unsigned cpuid, double measured, unsigned nimpl);
+					unsigned cpuid, double measured, unsigned nimpl, unsigned number);
 int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch);
 
 void _starpu_create_sampling_directory_if_needed(void);

+ 17 - 12
src/core/perfmodel/perfmodel_history.c

@@ -804,7 +804,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 	/* Dump the history into the model file in case it is necessary */
        if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
 	{
-		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\tsum\t\tsum2\t\tn\n");
+		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tdev (us or J)\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
 		while (ptr)
 		{
@@ -1822,7 +1822,7 @@ int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch)
 	return comb;
 }
 
-void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl)
+void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl, unsigned number)
 {
 	STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured);
 	if (model)
@@ -1892,11 +1892,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 				/* For history-based, do not take the first measurement into account, it is very often quite bogus */
 				/* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */
-				if (model->type != STARPU_HISTORY_BASED)
+				if (number != 1 || model->type != STARPU_HISTORY_BASED)
 				{
-					entry->sum = measured;
-					entry->sum2 = measured*measured;
-					entry->nsample = 1;
+					entry->sum = measured * number;
+					entry->sum2 = measured*measured * number;
+					entry->nsample = number;
 					entry->mean = measured;
 				}
 
@@ -1917,7 +1917,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 					(100 * local_deviation > (100 + historymaxerror)
 					 || (100 / local_deviation > (100 + historymaxerror))))
 				{
-					entry->nerror++;
+					entry->nerror+=number;
 
 					/* More errors than measurements, we're most probably completely wrong, we flush out all the entries */
 					if (entry->nerror >= entry->nsample)
@@ -1935,9 +1935,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				}
 				else
 				{
-					entry->sum += measured;
-					entry->sum2 += measured*measured;
-					entry->nsample++;
+					entry->sum += measured * number;
+					entry->sum2 += measured*measured * number;
+					entry->nsample += number;
 
 					unsigned n = entry->nsample;
 					entry->mean = entry->sum / n;
@@ -2053,7 +2053,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 	}
 }
 
-void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured)
+void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured, unsigned number)
 {
 	struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
 
@@ -2063,11 +2063,16 @@ void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct star
 
 	_starpu_init_and_load_perfmodel(model);
 	/* Record measurement */
-	_starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
+	_starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl, number);
 	/* and save perfmodel on termination */
 	_starpu_set_calibrate_flag(1);
 }
 
+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured)
+{
+	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, measured, 1);
+}
+
 int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 {
 	int comb;

+ 2 - 2
src/drivers/driver_common/driver_common.c

@@ -276,7 +276,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 				do_update_time_model = 0;
 			if (do_update_time_model)
 			{
-				_starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl);
+				_starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl, 1);
 			}
 		}
 	}
@@ -312,7 +312,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 			do_update_energy_model = 0;
 		if (do_update_energy_model)
 		{
-			_starpu_update_perfmodel_history(j, j->task->cl->energy_model, perf_arch, worker->devid, energy_consumed, j->nimpl);
+			_starpu_update_perfmodel_history(j, j->task->cl->energy_model, perf_arch, worker->devid, energy_consumed, j->nimpl, 1);
 		}
 	}
 }