8 years ago · 3e4ac00ff8
--- a/configure.ac
+++ b/configure.ac
@@ -2863,6 +2863,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
				   chmod +x tools/starpu_paje_draw_histogram
			
 
				   chmod +x tools/starpu_paje_state_stats
			
 
				   chmod +x tools/starpu_paje_summary
			
 
				+  chmod +x tools/starpu_mlr_analysis
			
 
				   chmod +x tools/starpu_paje_sort
			
 
				   chmod +x tools/starpu_smpirun
			
 
				   chmod +x doc/doxygen/doxygen_filter.sh
			
@@ -2920,6 +2921,7 @@ AC_OUTPUT([
 
				 	tools/starpu_paje_draw_histogram
			
 
				 	tools/starpu_paje_state_stats
			
 
				 	tools/starpu_paje_summary
			
 
				+	tools/starpu_mlr_analysis
			
 
				 	tools/starpu_paje_sort
			
 
				 	tools/starpu_smpirun
			
 
				 	socl/Makefile
			
--- a/tools/starpu_mlr_analysis.Rmd
+++ b/tools/starpu_mlr_analysis.Rmd
@@ -0,0 +1,216 @@
 
				+
			
 
				+```{r Setup, echo=FALSE}
			
 
				+opts_chunk$set(echo=FALSE)
			
 
				+```
			
 
				+
			
 
				+```{r Load_R_files_and_functions}
			
 
				+print_codelet <- function(reg,codelet){
			
 
				+   cat(paste("/* ############################################ */", "\n"))
			
 
				+   cat(paste("/*\t Automatically generated code */", "\n"))
			
 
				+   cat(paste("\t Check for potential errors and be sure parameter value are written in good order (alphabetical one by default)", "\n"))
			
 
				+   cat(paste("\t Adjusted R-squared: ", summary(reg)$adj.r.squared, "*/\n\n"))
			
 
				+
			
 
				+   ncomb <- reg$rank - 1
			
 
				+   cat(paste("\t ", codelet, ".model->ncombinations = ", ncomb, ";\n", sep=""))
			
 
				+
			
 
				+   cat(paste("\t ", codelet, ".model->combinations = (unsigned **) malloc(", codelet, ".model->ncombinations*sizeof(unsigned *))", ";\n\n", sep=""))
			
 
				+
			
 
				+   cat(paste("\t if (", codelet, ".model->combinations)", "\n", "\t {\n", sep=""))
			
 
				+   cat(paste("\t   for (unsigned i = 0; i < ", codelet, ".model->ncombinations; i++)", "\n", "\t   {\n", sep=""))
			
 
				+   cat(paste("\t     ", codelet, ".model->combinations[i] = (unsigned *) malloc(", codelet, ".model->nparameters*sizeof(unsigned))", ";\n", "\t   }\n", "\t }\n\n", sep=""))
			
 
				+   
			
 
				+   # Computing combinations
			
 
				+   df <- data.frame(attr(reg$terms, "factors"))
			
 
				+   df <- df/2
			
 
				+   df$Params <- row.names(df)
			
 
				+   df <-df[c(2:nrow(df)),]
			
 
				+
			
 
				+   i=1
			
 
				+   options(warn=-1)
			
 
				+   for(i in (1:nrow(df)))
			
 
				+   {
			
 
				+     name <- df[i,]$Params
			
 
				+     if (grepl("I\\(*", name))
			
 
				+     {
			
 
				+        exp <- as.numeric(gsub("(.*?)\\^(.*?)\\)", "\\2", name))
			
 
				+        df[i,] <- as.numeric(df[i,]) * exp
			
 
				+        df[i,]$Params <- as.character(gsub("I\\((.*?)\\^(.*?)\\)", "\\1", name))
			
 
				+     }
			
 
				+   }
			
 
				+   df <- aggregate(. ~ Params, transform(df, Params), sum)
			
 
				+   options(warn=0)
			
 
				+
			
 
				+   i=1
			
 
				+   j=1 
			
 
				+   for(j in (2:length(df)))
			
 
				+   {
			
 
				+     for(i in (1:nrow(df)))
			
 
				+     {
			
 
				+       cat(paste("\t ", codelet, ".model->combinations[", j-2, "][", i-1, "] = ", as.numeric(df[i,j]), ";\n", sep=""))
			
 
				+     }
			
 
				+   }
			
 
				+
			
 
				+   cat(paste("/* ############################################ */", "\n"))
			
 
				+}
			
 
				+
			
 
				+df<-read.csv(input_trace, header=TRUE)
			
 
				+```
			
 
				+
			
 
				+# Introduction
			
 
				+
			
 
				+TODO
			
 
				+
			
 
				+### How to compile
			
 
				+
			
 
				+    ./starpu_mlr_analysis .starpu/sampling/codelets/tmp/test_mlr.out
			
 
				+
			
 
				+### Software dependencies
			
 
				+
			
 
				+In order to run the analysis you need to have R installed:
			
 
				+
			
 
				+    sudo apt-get install r-base 
			
 
				+
			
 
				+In order to compile this document, you need *knitr*. However, you can perfectly use the R code from this document without knitr in your own scripts. If you decided that you want to generate this document, then start R (e.g., from terminal) and install knitr package:
			
 
				+
			
 
				+    R> install.packages("knitr")
			
 
				+
			
 
				+No additional R packages are needed.
			
 
				+
			
 
				+# First glimpse
			
 
				+
			
 
				+First, we show the relations between all parameters in a single plot.
			
 
				+
			
 
				+```{r InitPlot}
			
 
				+plot(df)
			
 
				+```
			
 
				+
			
 
				+For this example, all three parameters M, N, K have some influence,
			
 
				+but their relation is not easy to understand.
			
 
				+
			
 
				+In general, this type of plots can typically show if there is a group
			
 
				+of parameters which are mutually perfectly correlated, in which case
			
 
				+only a one parameter from the group should be kept for the further
			
 
				+analysis. Additionally, plot can show the parameters that have a
			
 
				+constant value, and since these cannot have an influence on the model,
			
 
				+they should also be ignored.
			
 
				+
			
 
				+However, making conclusions based solely on the visual analysis can be
			
 
				+treacherous and it is better to rely on the statistical tools. The
			
 
				+multiple linear regression methods used in the following sections will
			
 
				+also be able to detect and ignore these irrelevant
			
 
				+parameters. Therefore, this initial visual look should only be used to
			
 
				+get a basic idea about the model, but all the parameters should be
			
 
				+kept for now.
			
 
				+
			
 
				+# Initial model
			
 
				+
			
 
				+At this point, an initial model is computed, using all the parameters,
			
 
				+but not taking into account their exponents or the relations between
			
 
				+them.
			
 
				+
			
 
				+```{r Model1}
			
 
				+model1 <- lm(data=df, Duration ~ M+N+K)
			
 
				+summary(model1)
			
 
				+```
			
 
				+
			
 
				+For each parameter and the constant in the first column, an estimation
			
 
				+of the corresponding coefficient is provided along with the 95%
			
 
				+confidence interval. If there are any parameters with NA value, which
			
 
				+suggests that the parameters are correlated to another parameter or
			
 
				+that their value is constant, these parameters should not be used in
			
 
				+the following model computations. The stars in the last column
			
 
				+indicate the significance of each parameter. However, having maximum
			
 
				+three stars for each parameter does not necessarily mean that the
			
 
				+model is perfect and we should always inspect the adjusted R^2 value
			
 
				+(the closer it is to 1, the better the model is). To the users that
			
 
				+are not common to the multiple linear regression analysis and R tools,
			
 
				+we suggest to the R documentation. Some explanations are also provided
			
 
				+in the following article https://hal.inria.fr/hal-01180272.
			
 
				+       
			
 
				+In this example, all parameters M, N, K are all very
			
 
				+important. However, it is not clear if there are some relations
			
 
				+between them or if some of these parameters should be used with an
			
 
				+exponent. Moreover, adjusted R^2 value is not extremelly high and we
			
 
				+hope we can get a better one. Thus, we proceed to the more advanced
			
 
				+analysis.
			
 
				+
			
 
				+# Refining the model
			
 
				+
			
 
				+Now, we can seek for the relations between the parameters. Note that
			
 
				+trying all the possible combinations for the cases with a huge number
			
 
				+of parameters can be prohibitively long. Thus, it may be better to first
			
 
				+get rid of the parameters which seem to have very small influence
			
 
				+(typically the ones with no stars from the table in the previous
			
 
				+section).
			
 
				+
			
 
				+```{r Model2}
			
 
				+model2 <- lm(data=df, Duration ~ M*N*K)
			
 
				+summary(model2)
			
 
				+```
			
 
				+
			
 
				+This model is more accurate, as the R^2 value increased. Now when some
			
 
				+relations are observed, we can try some of these parameters with the
			
 
				+exponents.
			
 
				+
			
 
				+```{r Model3}
			
 
				+model3 <- lm(data=df, Duration ~ I(M^2)+I(M^3)+I(N^2)+I(N^3)+I(K^2)+I(K^3))
			
 
				+summary(model3)
			
 
				+```
			
 
				+
			
 
				+It seems like some parameters are important. Now we combine these and
			
 
				+try to find the optimal combination.
			
 
				+
			
 
				+```{r Model4}
			
 
				+model4 <- lm(data=df, Duration ~ I(M^2):N+I(N^3):K)
			
 
				+summary(model4)
			
 
				+```
			
 
				+
			
 
				+Depending on the machine characteristics and the variability of
			
 
				+benchmarks, this may be the best model.
			
 
				+
			
 
				+# Validation
			
 
				+
			
 
				+Once the model has been computed, we should validate it. Apart from
			
 
				+the low adjusted R^2 value, the model weakness can also be observed
			
 
				+even better when inspecting the residuals. The results on two
			
 
				+following plots (and thus the accuracy of the model) will greatly
			
 
				+depend on the measurements variability and the design of experiments.
			
 
				+
			
 
				+```{r Validation}
			
 
				+par(mfrow=c(1,2))
			
 
				+plot(model4, which=c(1:2))
			
 
				+```
			
 
				+
			
 
				+Generally speaking, if there are some structures on the left plot,
			
 
				+this can indicate that there are certain phenomena not explained by
			
 
				+the model. Many points on the same horizontal line represent
			
 
				+repetitive occurrences of the task with the same parameter values,
			
 
				+which is typical for a single experiment run with a homogeneous
			
 
				+data. The fact that there is some variability is common, as executing
			
 
				+exactly the same code on a real machine will always have slightly
			
 
				+different duration. However, having a huge variability means that the
			
 
				+benchmarks were very noisy, thus having an accurate models from them
			
 
				+will be hard.
			
 
				+
			
 
				+Plot on the right may show that the residuals do not follow the normal
			
 
				+distribution. Therefore, such model in overall would have a limited
			
 
				+predictive power.
			
 
				+
			
 
				+If we are not satisfied with the accuracy of the observed models, we
			
 
				+should go back to the previous section and try to find a better
			
 
				+one. In some cases, the benchmarked data will just be too noisy and
			
 
				+they should be redesigned and run again.
			
 
				+
			
 
				+When we are finally satisfied with the model accuracy, we should
			
 
				+modify our task code, so that StarPU knows which parameters
			
 
				+combinations are used in the model.
			
 
				+
			
 
				+# Generating C code
			
 
				+
			
 
				+This is a simple helper to generate C code which should be copied to
			
 
				+the task description in your application. Make sure that the generated
			
 
				+code correctly corresponds to computed model.
			
 
				+
			
 
				+```{r Code}
			
 
				+print_codelet(model4, "mlr_cl")
			
 
				+```
			
--- a/tools/starpu_mlr_analysis.in
+++ b/tools/starpu_mlr_analysis.in
@@ -0,0 +1,80 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+# 
			
 
				+# Copyright (C) 2014  Université Joseph Fourier
			
 
				+# Copyright (C) 2014-2015  Université Bordeaux
			
 
				+# 
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+# 
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+# 
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+
			
 
				+# Script for giving statistical analysis of the paje trace
			
 
				+
			
 
				+set -e # fail fast
			
 
				+
			
 
				+# File names
			
 
				+basename="$PWD"
			
 
				+outputfile="mlr_analysis.html"
			
 
				+analysis_script="$(dirname $(which $0))/starpu_mlr_analysis.Rmd"
			
 
				+
			
 
				+# Command line arguments
			
 
				+inputfile=""
			
 
				+
			
 
				+help_script()
			
 
				+{
			
 
				+cat << EOF
			
 
				+Give statistical analysis of the paje trace
			
 
				+
			
 
				+Options:
			
 
				+   -h      Show this message
			
 
				+
			
 
				+Examples:
			
 
				+$0 .starpu/sampling/codelets/tmp/test_mlr.out
			
 
				+
			
 
				+Report bugs to <@PACKAGE_BUGREPORT@>
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    help_script
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+while getopts "h" opt; do
			
 
				+  case $opt in
			
 
				+    \?)
			
 
				+      echo "Invalid option: -$OPTARG"
			
 
				+      help_script
			
 
				+      exit 3
			
 
				+      ;;
			
 
				+  esac
			
 
				+done
			
 
				+
			
 
				+# Reading files that need to be analyzed
			
 
				+shift $((OPTIND - 1))
			
 
				+inputfile=$1
			
 
				+# Error if there is no input files specified
			
 
				+# if [[ $# != 1]]; then
			
 
				+#     echo "Error!"
			
 
				+#     help_script
			
 
				+#     exit 2
			
 
				+# fi
			
 
				+
			
 
				+#####################################
			
 
				+# Running analysis file to get actual results
			
 
				+Rscript -e "library(knitr); input_trace = '$inputfile' ; outputhtml='$outputfile';\
			
 
				+            outputRmd = gsub('.html\$','.Rmd',outputhtml);\
			
 
				+            knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)"