Browse Source

The perfmodel display tool can display the performance of parallel CPU tasks
too.

Cédric Augonnet 14 years ago
parent
commit
72ae4e0b1f
2 changed files with 16 additions and 3 deletions
  1. 1 1
      src/core/perfmodel/perfmodel_history.c
  2. 15 2
      tools/perfmodel_display.c

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -475,7 +475,7 @@ void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archna
 	{
 		if (arch == STARPU_CPU_DEFAULT)
 		{
-#warning We could just use cpu_1 as well ...
+			/* NB: We could just use cpu_1 as well ... */
 			snprintf(archname, maxlen, "cpu");
 		}
 		else

+ 15 - 2
tools/perfmodel_display.c

@@ -49,7 +49,7 @@ static void usage(char **argv)
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
-        fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cuda, gordon)\n");
+        fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda, gordon)\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
         fprintf(stderr, "\n");
 
@@ -232,12 +232,25 @@ static void display_all_perf_models(struct starpu_perfmodel_t *model)
 		}
 	}
 	else {
-#warning TODO add the cpu:k interface as in the branch
 		if (strcmp(arch, "cpu") == 0) {
 			display_perf_model(model, STARPU_CPU_DEFAULT);
 			return;
 		}
 
+		int k;
+		if (sscanf(arch, "cpu:%d", &k) == 1)
+		{
+			/* For combined CPU workers */
+			if ((k < 1) || (k > STARPU_NMAXCPUS))
+			{
+				fprintf(stderr, "Invalid CPU size\n");
+				exit(-1);
+			}
+
+			display_perf_model(model, STARPU_CPU_DEFAULT + k - 1);
+			return;
+		}
+
 		if (strcmp(arch, "cuda") == 0) {
 			unsigned archid;
 			for (archid = STARPU_CUDA_DEFAULT; archid < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS; archid++)