浏览代码

doc: add profiling example

Nathalie Furmento 15 年之前
父节点
当前提交
d25256b368
共有 1 个文件被更改,包括 61 次插入0 次删除
  1. 61 0
      doc/starpu.texi

+ 61 - 0
doc/starpu.texi

@@ -2462,6 +2462,7 @@ instance.
 * Hello World::                 Submitting Tasks
 * Scaling a Vector::            Manipulating Data
 * Vector Scaling on an Hybrid CPU/GPU Machine::  Handling Heterogeneous Architectures
+* Task and Worker Profiling::   
 * Partitioning Data::           Partitioning Data
 * More examples::               More examples shipped with StarPU
 @end menu
@@ -3047,6 +3048,66 @@ or by disabling CUDA devices:
 0.000000 3.000000 6.000000 9.000000 12.000000
 @end smallexample
 
+@node Task and Worker Profiling
+@section Task and Worker Profiling
+
+A full example showing how to use the profiling API is available in
+the StarPU sources in the directory @code{examples/profiling/}.
+
+@cartouche
+@smallexample
+struct starpu_task *task = starpu_task_create();
+task->cl = &cl;
+task->synchronous = 1;
+/* We will destroy the task structure by hand so that we can
+ * query the profiling info before the task is destroyed. */
+task->destroy = 0;
+
+starpu_task_submit(task);
+
+/* The task is finished, get profiling information */
+struct starpu_task_profiling_info *info = task->profiling_info;
+
+/* How much time did it take before the task started ? */
+double delay += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time);
+
+/* How long was the task execution ? */
+double length += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
+
+/* We don't need the task structure anymore */
+starpu_task_destroy(task);
+@end smallexample
+@end cartouche
+
+@cartouche
+@smallexample
+/* Display the occupancy of all workers during the test */
+int worker;
+for (worker = 0; worker < starpu_worker_get_count(); worker++)
+@{
+        struct starpu_worker_profiling_info worker_info;
+        int ret = starpu_worker_get_profiling_info(worker, &worker_info);
+        STARPU_ASSERT(!ret);
+
+        double total_time = starpu_timing_timespec_to_us(&worker_info.total_time);
+        double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time);
+        double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time);
+
+        float executing_ratio = 100.0*executing_time/total_time;
+        float sleeping_ratio = 100.0*sleeping_time/total_time;
+
+        char workername[128];
+        starpu_worker_get_name(worker, workername, 128);
+        fprintf(stderr, "Worker %s:\n", workername);
+        fprintf(stderr, "\ttotal time : %.2lf ms\n", total_time*1e-3);
+        fprintf(stderr, "\texec time  : %.2lf ms (%.2f %%)\n", executing_time*1e-3,
+                executing_ratio);
+        fprintf(stderr, "\tblocked time  : %.2lf ms (%.2f %%)\n", sleeping_time*1e-3,
+                sleeping_ratio);
+@}
+@end smallexample
+@end cartouche
+
 @c TODO: Add performance model example (and update basic_examples)
 
 @node Partitioning Data