Browse Source

merge trunk

Nathalie Furmento 11 years ago
parent
commit
de7e0bc553

+ 3 - 1
ChangeLog

@@ -18,7 +18,7 @@ StarPU 1.2.0 (svn revision xxxx)
 ==============================================
 
 New features:
-  * Xeon Phi support
+  * MIC Xeon Phi support
   * SCC support
   * New function starpu_sched_ctx_exec_parallel_code to execute a
     parallel code on the workers of the given scheduler context
@@ -73,6 +73,7 @@ Small features:
     manage the tag.
   * On Linux x86, spinlocks now block after a hundred tries. This avoids
     typical 10ms pauses when the application thread tries to submit tasks.
+  * New function char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define
@@ -93,6 +94,7 @@ The scheduling context release
 
 New features:
   * One can register an existing on-GPU buffer to be used by a handle.
+  * Add the starpu_paje_summary statistics tool.
 
 StarPU 1.1.2 (svn revision xxx)
 ==============================================

+ 5 - 3
configure.ac

@@ -54,8 +54,8 @@ AC_CANONICAL_SYSTEM
 dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
 dnl when they're available.
 m4_ifdef([AM_SILENT_RULES],
-  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests])],
-  [AM_INIT_AUTOMAKE([1.10 -Wall foreign])])
+  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests subdir-objects])],
+  [AM_INIT_AUTOMAKE([1.10 -Wall foreign subdir-objects])])
 
 m4_ifdef([AM_SILENT_RULES],
   [AM_SILENT_RULES(yes)])
@@ -92,7 +92,7 @@ AC_PATH_PROG(svnversioncommand, svnversion)
 
 # use svnversion to record the current repository revision only if
 # subversion is installed and we are in a working copy
-if test "$svnversioncommand" = "" || test `LC_ALL=C $svnversioncommand -n $srcdir` = "exported" ; then
+if test "$svnversioncommand" = "" || test "`LC_ALL=C $svnversioncommand -n $srcdir`" = "exported" ; then
    if test -f $srcdir/STARPU-REVISION ; then
       cp $srcdir/STARPU-REVISION .
    else
@@ -2458,6 +2458,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x tools/starpu_workers_activity
   chmod +x tools/starpu_paje_draw_histogram
   chmod +x tools/starpu_paje_state_stats
+  chmod +x tools/starpu_paje_summary
   chmod +x doc/doxygen/doxygen_filter.sh
 ])
 
@@ -2487,6 +2488,7 @@ AC_OUTPUT([
 	tools/starpu_workers_activity
 	tools/starpu_paje_draw_histogram
 	tools/starpu_paje_state_stats
+	tools/starpu_paje_summary
 	socl/Makefile
 	socl/src/Makefile
 	socl/examples/Makefile

+ 10 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -337,6 +337,7 @@ $ R
 > install.packages("plyr")
 > install.packages("ggplot2")
 > install.packages("data.table")
+> install.packages("knitr")
 \endverbatim
 
 The pj_dump tool from pajeng is also needed (see
@@ -364,6 +365,15 @@ and see the resulting pdf file:
 \image html paje_draw_histogram.png
 \image latex paje_draw_histogram.eps "" width=\textwidth
 
+A quick statistical report can be generated by using:
+
+\verbatim
+$ starpu_paje_summary native.trace simgrid.trace
+\endverbatim
+
+it includes gantt charts, execution summaries, as well as state duration charts
+and time distribution histograms.
+
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 
 StarPU can record a trace of what tasks are needed to complete the

+ 5 - 5
doc/doxygen/chapters/18mic_scc_support.doxy

@@ -6,13 +6,13 @@
  * See the file version.doxy for copying conditions.
  */
 
-/*! \page MICSCCSupport MIC/SCC Support
+/*! \page MICSCCSupport MIC Xeon Phi / SCC Support
 
 \section Compilation Compilation
 
 SCC support just needs the presence of the RCCE library.
 
-MIC support actually needs two compilations of StarPU, one for the host and one for
+MIC Xeon Phi support actually needs two compilations of StarPU, one for the host and one for
 the device. The PATH environment variable has to include the path to the
 cross-compilation toolchain, for instance <c>/usr/linux-k1om-4.7/bin</c>
 The script <c>mic-configure</c> can then be used to achieve the two compilations: it basically
@@ -20,12 +20,12 @@ calls <c>configure</c> as appropriate from two new directories: <c>build_mic</c>
 <c>build_host</c>. <c>make</c> and <c>make install</c> can then be used as usual and will
 recurse into both directories.
 
-\section PortingApplicationsToMICSCC Porting Applications To MIC/SCC
+\section PortingApplicationsToMICSCC Porting Applications To MIC Xeon Phi / SCC
 
-The simplest way to port an application to MIC/SCC is to set the field
+The simplest way to port an application to MIC Xeon Phi or SCC is to set the field
 starpu_codelet::cpu_funcs_name, to provide StarPU with the function
 name of the CPU implementation. StarPU will thus simply use the
-existing CPU implementation (cross-rebuilt in the MIC case). The
+existing CPU implementation (cross-rebuilt in the MIC Xeon Phi case). The
 functions have to be globally-visible (i.e. not <c>static</c>) for
 StarPU to be able to look them up.
 

+ 5 - 1
doc/doxygen/chapters/api/workers.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  * See the file version.doxy for copying conditions.
  */
@@ -222,4 +222,8 @@ Returns the type of the given node as defined by
 this function should be used in the allocation function to determine
 on which device the memory needs to be allocated.
 
+\fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
+\ingroup API_Workers_Properties
+Returns the given worker type as a string.
+
 */

+ 2 - 0
include/starpu_worker.h

@@ -117,6 +117,8 @@ unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
 
 unsigned starpu_worker_is_slave(int workerid);
 
+char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
+
 #ifdef __cplusplus
 }
 #endif

+ 10 - 0
src/core/workers.c

@@ -1987,3 +1987,13 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 	return nsched_ctxs;
 }
 
+char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
+{
+	if (type == STARPU_CPU_WORKER) return "STARPU_CPU_WORKER";
+	if (type == STARPU_CUDA_WORKER) return "STARPU_CUDA_WORKER";
+	if (type == STARPU_OPENCL_WORKER) return "STARPU_OPENCL_WORKER";
+	if (type == STARPU_MIC_WORKER) return "STARPU_MIC_WORKER";
+	if (type == STARPU_SCC_WORKER) return "STARPU_SCC_WORKER";
+	if (type == STARPU_ANY_WORKER) return "STARPU_ANY_WORKER";
+	return "STARPU_unknown_WORKER";
+}

+ 284 - 0
tools/starpu_paje_summary.Rmd

@@ -0,0 +1,284 @@
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#sec-1">1. Introduction</a>
+<ul>
+<li>
+<ul>
+<li><a href="#sec-1-0-1">1.0.1. How to compile</a></li>
+<li><a href="#sec-1-0-2">1.0.2. Software dependencies</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a href="#sec-2">2. Gantt Charts of the whole Trace</a></li>
+<li><a href="#sec-3">3. Table Summary</a></li>
+<li><a href="#sec-4">4. State Duration during the Execution Time</a></li>
+<li><a href="#sec-5">5. Distribution Histograms</a></li>
+</ul>
+</div>
+</div>
+```{r Setup, echo=FALSE}
+opts_chunk$set(echo=FALSE)
+```
+
+
+
+```{r Install_R_libraries}
+InstalledPackage <- function(package) 
+{
+    available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)))
+    missing <- package[!available]
+    if (length(missing) > 0) return(FALSE)
+    return(TRUE)
+}
+
+CRANChoosen <- function()
+{
+    return(getOption("repos")["CRAN"] != "@CRAN@")
+}
+
+UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org") 
+{
+    if(!InstalledPackage(package))
+    {
+	if(!CRANChoosen())
+	{       
+	    chooseCRANmirror()
+	    if(!CRANChoosen())
+	    {
+		options(repos = c(CRAN = defaultCRANmirror))
+	    }
+	}
+
+	suppressMessages(suppressWarnings(install.packages(package)))
+	if(!InstalledPackage(package)) return(FALSE)
+    }
+    return(TRUE)
+}
+
+# Now install desired libraries
+libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer")
+for(libr in libraries) 
+{ 
+    if(!UsePackage(libr))
+    {
+	stop("Error!", libr)
+    }
+}
+```
+
+```{r Load_R_files}
+# Load ggplot and plyr just for the following cases
+   library(ggplot2)
+   library(plyr)
+   library(data.table)
+   library(RColorBrewer) 
+
+# Defining non-computation states:
+def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")
+
+# Function for reading .csv file
+read_df <- function(file,range1,range2) {
+  df<-read.csv(file, header=FALSE, strip.white=TRUE)
+  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
+  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
+  df$Origin<-as.factor(as.character(file))
+
+# Changing names if needed:
+  df$Value <- as.character(df$Value)
+  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
+  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
+
+# Small cleanup
+df$Start<-round(df$Start,digit=1)
+df$End<-round(df$End,digit=1)
+df$ResourceId<-as.factor(df$ResourceId)
+df$Value<-as.factor(df$Value)
+
+# Start from zero
+  m <- min(df$Start)
+  df$Start <- df$Start - m
+  df$End <- df$Start+df$Duration
+
+# Return data frame
+  df
+}
+```
+
+```{r Load_traces}
+df<-data.frame()
+if( !exists("input_traces") )
+  input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv")
+
+for (i in 1:length(input_traces)){
+  dfs<-read_df(input_traces[i])
+  df<-rbindlist(list(df,dfs))
+}
+
+# Color palettes
+colourCount = length(unique(df$Value))
+getPalette = colorRampPalette(brewer.pal(9, "Set1"))
+
+# Order of Value so we can have good colors
+ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value]))
+ordered_states<-append(sort(ker_states), def_states)
+df$Value <- factor(df$Value, levels=ordered_states)
+
+# Order of ResourceId so we can have y-axis
+df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId))))
+```
+
+# Introduction
+
+This document presents a basic analysis of multiple StarPU
+traces. First, paje *traces* will be transferred into *.csv* files and
+then we analyze them with **R**. This summary is a first step that
+should help researchers verify their hypothesis or find problematic
+areas that require more exhaustive investigation.
+
+Be cautious, as the following results are only a brief analysis of
+the traces and many important phenomena could still be hidden. Also,
+be very careful when comparing different states or traces. Even
+though some large discrepancies can be irrelevant, in other cases
+even the smallest differences can be essential in understanding what
+exactly happened during the StarPU execution.
+
+### How to compile
+
+    ./starpu_summary.sh example.native.trace example.simgrid.trace
+
+### Software dependencies
+
+In order to run this analysis you need to have R installed:
+
+    sudo apt-get install r-base 
+
+Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (<https://github.com/schnorr/pajeng>), so we encourage users to install it.
+
+When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package:
+
+    R> install.packages("knitr")
+
+Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*)
+
+# Gantt Charts of the whole Trace
+
+First, we show a simple gantt chart of every trace. X-axis is a
+simple timeline of the execution, *Resources* on y-axis correspond
+to different CPUs/GPUs that were used and finally different colors
+represent different *States* of the application.
+
+This kind of figures can often point to the idle time or
+synchronization problems. Small disadvantage is that in most cases
+there are too many states, thus it is impossible to display them all
+on a single plot without aggregation. Therefore for any strange
+behavior at a certain part of the trace, we strongly suggest to zoom
+on the interval it occurred.
+
+```{r Gantt1}
+ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + 
+ theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + 
+ geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + 
+ facet_wrap(~Origin,ncol=1,scale="free_y")
+```
+
+Second, we will concentrate only on computation kernel states, to
+get rid of visualization artifacts that can be introduced by other
+(sometimes irrelevant) states. Normally, this plot should not be too
+different from the previous one.
+
+```{r Gantt2}
+# Select only computation kernels
+ df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),]
+
+# Start from zero
+  m <- min(df1$Start)
+  df1$Start <- df1$Start - m
+  df1$End <- df1$Start+df1$Duration
+
+# Plot
+ ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + 
+  theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + 
+  geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + 
+  facet_wrap(~Origin,ncol=1,scale="free_y")
+```
+
+# Table Summary
+
+Here we present how much time application spent in each state
+(OverallDuration), how many times it was in that state (Count),
+mean and median values of duration (Mean and Median), and finally
+what is a standard deviation (StandDev).
+
+General information provided by this table can sometimes give an
+idea to application experts which parts of code are not working as
+desired. Be aware that this kind of tables hide many important
+things, such as outliers, multiple modes, etc.
+
+```{r Table}
+options(width=120)
+ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration))
+```
+
+# State Duration during the Execution Time
+
+Now, we show how duration of each state was changing during the
+execution. This can display a general behavior of a state; show if
+there are outliers or multiple modes; are some events occurring in
+groups, etc. . It can also suggest a strange behavior of a state
+during a certain time interval, which should be later investigated
+more carefully.
+
+  However, since each event is represented by a single point (and
+there is no "alpha" factor), those events that happen almost
+simultaneously are overplotted. Therefore density of events along
+execution time may not be easy to read.
+
+```{r Dur}
+ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y")
+```
+
+# Distribution Histograms
+
+Finally, we show a distribution of *Duration* for each state in form
+of histograms. X-axis is partitioned into bins with equidistant time
+intervals in milliseconds, while y-axis represents the number of
+occurrences inside such intervals for a certain state. Note that for
+the first plot y-axis is not fixed, meaning that the scale changes
+from one row to another. This plot allows to not only to see what
+was the most frequent duration of a state, but also to compare
+duration between different states.
+
+```{r Hist1}
+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y")
+```
+
+Similar to the previous figure, only now traces are showed vertically
+instead of horizontally. Note that for this plot x-axis is not fixed,
+meaning that the scale changes from one column to another. This plot
+allows to compare frequency of different states and in case of
+multiple traces to easily compare duration distribution for each
+state.
+
+```{r Hist2}
+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x")
+```

+ 109 - 0
tools/starpu_paje_summary.in

@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+# 
+# Copyright (C) 2014  Université Joseph Fourier
+# Copyright (C) 2014  Université Bordeaux
+# 
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+# 
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+# Script for giving statistical analysis of the paje trace
+
+set -e # fail fast
+
+# File names
+basename="$PWD"
+outputfile="summary.html"
+analysis_script="$(dirname $(which $0))/starpu_paje_summary.Rmd"
+analysis_input=""
+
+# Command line arguments
+inputfiles=""
+
+help_script()
+{
+cat << EOF
+Give statistical analysis of the paje trace
+
+Options:
+   -h      Show this message
+
+Examples:
+$0 example.native.trace
+$0 example.native.trace example.simgrid.trace
+
+Report bugs to <@PACKAGE_BUGREPORT@>
+EOF
+}
+
+if [ "$1" = "--version" ] ; then
+    echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@"
+    exit 0
+fi
+
+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
+    help_script
+    exit 0
+fi
+
+while getopts "h" opt; do
+  case $opt in
+    \?)
+      echo "Invalid option: -$OPTARG"
+      help_script
+      exit 3
+      ;;
+  esac
+done
+
+# Reading files that need to be analyzed
+shift $((OPTIND - 1))
+inputfiles=$@
+# Error if there is no input files specified
+if [[ $# < 1 ]]; then
+    echo "Error!"
+    help_script
+    exit 2
+fi
+
+#####################################
+# Transforming input files into .csv
+for file in $inputfiles; do
+    if [ ! -s $file ]
+	then
+	echo "Error: file $file does not exist!"
+	exit 5
+    fi
+# Sorting traces
+    grep -e '^\(\(%\)\|\(\(1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > start.trace
+    grep -e '^\(\(%\)\|\(\(1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v  $file > end.trace
+    sort -s -V --key=2,2 end.trace > endSorted.trace
+    cat start.trace endSorted.trace > outputSorted.trace
+
+# Transferring to .csv
+    pj_dump -n outputSorted.trace > $file.csv
+    perl -i -ne 'print if /^State/' $file.csv
+done
+
+analysis_input=`echo \"$inputfiles".csv\"" | sed 's/  */.csv", "/g'`
+
+#####################################
+# Running analysis file to get actual results
+Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\
+            outputRmd = gsub('.html\$','.Rmd',outputhtml);\
+            knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)"
+
+# Cleanup: delete temporary files
+rm -f outputSorted.trace
+rm -f start.trace
+rm -f end.trace
+rm -f endSorted.trace