| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285 | <div id="table-of-contents"><h2>Table of Contents</h2><div id="text-table-of-contents"><ul><li><a href="#sec-1">1. Introduction</a><ul><li><ul><li><a href="#sec-1-0-1">1.0.1. How to compile</a></li><li><a href="#sec-1-0-2">1.0.2. Software dependencies</a></li></ul></li></ul></li><li><a href="#sec-2">2. Gantt Charts of the whole Trace</a></li><li><a href="#sec-3">3. Table Summary</a></li><li><a href="#sec-4">4. State Duration during the Execution Time</a></li><li><a href="#sec-5">5. Distribution Histograms</a></li></ul></div></div>```{r Setup, echo=FALSE}opts_chunk$set(echo=FALSE)``````{r Install_R_libraries}InstalledPackage <- function(package) {    available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)))    missing <- package[!available]    if (length(missing) > 0) return(FALSE)    return(TRUE)}CRANChoosen <- function(){    return(getOption("repos")["CRAN"] != "@CRAN@")}UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org") {    if(!InstalledPackage(package))    {	if(!CRANChoosen())	{       	    chooseCRANmirror()	    if(!CRANChoosen())	    {		options(repos = c(CRAN = defaultCRANmirror))	    }	}	suppressMessages(suppressWarnings(install.packages(package)))	if(!InstalledPackage(package)) return(FALSE)    }    return(TRUE)}# Now install desired librarieslibraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer")for(libr in libraries) {     if(!UsePackage(libr))    {	stop("Error!", libr)    }}``````{r Load_R_files}# Load ggplot and plyr just for the following cases   library(ggplot2)   library(plyr)   library(data.table)   library(RColorBrewer) # Defining non-computation states:def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")# Function for reading .csv fileread_df <- function(file,range1,range2) {  df<-read.csv(file, header=FALSE, strip.white=TRUE)  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]  df$Origin<-as.factor(as.character(file))# Changing names if needed:  df$Value <- as.character(df$Value)  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))  df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value))  df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value))  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))# Small cleanupdf$Start<-round(df$Start,digit=1)df$End<-round(df$End,digit=1)df$ResourceId<-as.factor(df$ResourceId)df$Value<-as.factor(df$Value)# Start from zero  m <- min(df$Start)  df$Start <- df$Start - m  df$End <- df$Start+df$Duration# Return data frame  df}``````{r Load_traces}df<-data.frame()if( !exists("input_traces") )  input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv")for (i in 1:length(input_traces)){  dfs<-read_df(input_traces[i])  df<-rbindlist(list(df,dfs))}# Color palettescolourCount = length(unique(df$Value))getPalette = colorRampPalette(brewer.pal(9, "Set1"))# Order of Value so we can have good colorsker_states<-as.character(unique(df[!(df$Value %in% def_states),Value]))ordered_states<-append(sort(ker_states), def_states)df$Value <- factor(df$Value, levels=ordered_states)# Order of ResourceId so we can have y-axisdf$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId))))```# IntroductionThis document presents a basic analysis of multiple StarPUtraces. First, paje *traces* will be transferred into *.csv* files andthen we analyze them with **R**. This summary is a first step thatshould help researchers verify their hypothesis or find problematicareas that require more exhaustive investigation.Be cautious, as the following results are only a brief analysis ofthe traces and many important phenomena could still be hidden. Also,be very careful when comparing different states or traces. Eventhough some large discrepancies can be irrelevant, in other caseseven the smallest differences can be essential in understanding whatexactly happened during the StarPU execution.### How to compile    ./starpu_summary.sh example.native.trace example.simgrid.trace### Software dependenciesIn order to run this analysis you need to have R installed:    sudo apt-get install r-base Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (<https://github.com/schnorr/pajeng>), so we encourage users to install it.When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package:    R> install.packages("knitr")Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*)# Gantt Charts of the whole TraceFirst, we show a simple gantt chart of every trace. X-axis is asimple timeline of the execution, *Resources* on y-axis correspondto different CPUs/GPUs that were used and finally different colorsrepresent different *States* of the application.This kind of figures can often point to the idle time orsynchronization problems. Small disadvantage is that in most casesthere are too many states, thus it is impossible to display them allon a single plot without aggregation. Therefore for any strangebehavior at a certain part of the trace, we strongly suggest to zoomon the interval it occurred.```{r Gantt1}ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +  theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +  geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +  facet_wrap(~Origin,ncol=1,scale="free_y")```Second, we will concentrate only on computation kernel states, toget rid of visualization artifacts that can be introduced by other(sometimes irrelevant) states. Normally, this plot should not be toodifferent from the previous one.```{r Gantt2}# Select only computation kernels df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),]# Start from zero  m <- min(df1$Start)  df1$Start <- df1$Start - m  df1$End <- df1$Start+df1$Duration# Plot ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +   theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +   geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +   facet_wrap(~Origin,ncol=1,scale="free_y")```# Table SummaryHere we present how much time application spent in each state(OverallDuration), how many times it was in that state (Count),mean and median values of duration (Mean and Median), and finallywhat is a standard deviation (StandDev).General information provided by this table can sometimes give anidea to application experts which parts of code are not working asdesired. Be aware that this kind of tables hide many importantthings, such as outliers, multiple modes, etc.```{r Table}options(width=120)ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration))```# State Duration during the Execution TimeNow, we show how duration of each state was changing during theexecution. This can display a general behavior of a state; show ifthere are outliers or multiple modes; are some events occurring ingroups, etc. . It can also suggest a strange behavior of a stateduring a certain time interval, which should be later investigatedmore carefully.  However, since each event is represented by a single point (andthere is no "alpha" factor), those events that happen almostsimultaneously are overplotted. Therefore density of events alongexecution time may not be easy to read.```{r Dur}ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y")```# Distribution HistogramsFinally, we show a distribution of *Duration* for each state in formof histograms. X-axis is partitioned into bins with equidistant timeintervals in milliseconds, while y-axis represents the number ofoccurrences inside such intervals for a certain state. Note that forthe first plot y-axis is not fixed, meaning that the scale changesfrom one row to another. This plot allows to not only to see whatwas the most frequent duration of a state, but also to compareduration between different states.```{r Hist1}ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y")```Similar to the previous figure, only now traces are showed verticallyinstead of horizontally. Note that for this plot x-axis is not fixed,meaning that the scale changes from one column to another. This plotallows to compare frequency of different states and in case ofmultiple traces to easily compare duration distribution for eachstate.```{r Hist2}ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x")```
 |