123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300 |
- # StarPU --- Runtime system for heterogeneous multicore architectures.
- #
- # Copyright (C) 2014-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- #
- # StarPU is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published by
- # the Free Software Foundation; either version 2.1 of the License, or (at
- # your option) any later version.
- #
- # StarPU is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- #
- # See the GNU Lesser General Public License in COPYING.LGPL for more details.
- #
- <div id="table-of-contents">
- <h2>Table of Contents</h2>
- <div id="text-table-of-contents">
- <ul>
- <li><a href="#sec-1">1. Introduction</a>
- <ul>
- <li>
- <ul>
- <li><a href="#sec-1-0-1">1.0.1. How to compile</a></li>
- <li><a href="#sec-1-0-2">1.0.2. Software dependencies</a></li>
- </ul>
- </li>
- </ul>
- </li>
- <li><a href="#sec-2">2. Gantt Charts of the whole Trace</a></li>
- <li><a href="#sec-3">3. Table Summary</a></li>
- <li><a href="#sec-4">4. State Duration during the Execution Time</a></li>
- <li><a href="#sec-5">5. Distribution Histograms</a></li>
- </ul>
- </div>
- </div>
- ```{r Setup, echo=FALSE}
- opts_chunk$set(echo=FALSE)
- ```
- ```{r Install_R_libraries}
- InstalledPackage <- function(package)
- {
- available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)))
- missing <- package[!available]
- if (length(missing) > 0) return(FALSE)
- return(TRUE)
- }
- CRANChoosen <- function()
- {
- return(getOption("repos")["CRAN"] != "@CRAN@")
- }
- UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org")
- {
- if(!InstalledPackage(package))
- {
- if(!CRANChoosen())
- {
- chooseCRANmirror()
- if(!CRANChoosen())
- {
- options(repos = c(CRAN = defaultCRANmirror))
- }
- }
- suppressMessages(suppressWarnings(install.packages(package)))
- if(!InstalledPackage(package)) return(FALSE)
- }
- return(TRUE)
- }
- # Now install desired libraries
- libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer")
- for(libr in libraries)
- {
- if(!UsePackage(libr))
- {
- stop("Error!", libr)
- }
- }
- ```
- ```{r Load_R_files}
- # Load ggplot and plyr just for the following cases
- library(ggplot2)
- library(plyr)
- library(data.table)
- library(RColorBrewer)
- # Defining non-computation states:
- def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")
- # Function for reading .csv file
- read_df <- function(file,range1,range2) {
- df<-read.csv(file, header=FALSE, strip.white=TRUE)
- names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
- df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
- df$Origin<-as.factor(as.character(file))
- # Changing names if needed:
- df$Value <- as.character(df$Value)
- df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
- df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
- df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
- df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
- df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value))
- df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
- df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
- df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
- df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
- df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
- df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
- # Small cleanup
- df$Start<-round(df$Start,digit=1)
- df$End<-round(df$End,digit=1)
- df$ResourceId<-as.factor(df$ResourceId)
- df$Value<-as.factor(df$Value)
- # Start from zero
- m <- min(df$Start)
- df$Start <- df$Start - m
- df$End <- df$Start+df$Duration
- # Return data frame
- df
- }
- ```
- ```{r Load_traces}
- df<-data.frame()
- if( !exists("input_traces") )
- input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv")
- for (i in 1:length(input_traces)){
- dfs<-read_df(input_traces[i])
- df<-rbindlist(list(df,dfs))
- }
- # Color palettes
- colourCount = length(unique(df$Value))
- getPalette = colorRampPalette(brewer.pal(9, "Set1"))
- # Order of Value so we can have good colors
- ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value]))
- ordered_states<-append(sort(ker_states), def_states)
- df$Value <- factor(df$Value, levels=ordered_states)
- # Order of ResourceId so we can have y-axis
- df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId))))
- ```
- # Introduction
- This document presents a basic analysis of multiple StarPU
- traces. First, paje *traces* will be transferred into *.csv* files and
- then we analyze them with **R**. This summary is a first step that
- should help researchers verify their hypothesis or find problematic
- areas that require more exhaustive investigation.
- Be cautious, as the following results are only a brief analysis of
- the traces and many important phenomena could still be hidden. Also,
- be very careful when comparing different states or traces. Even
- though some large discrepancies can be irrelevant, in other cases
- even the smallest differences can be essential in understanding what
- exactly happened during the StarPU execution.
- ### How to compile
- ./starpu_summary.sh example.native.trace example.simgrid.trace
- ### Software dependencies
- In order to run this analysis you need to have R installed:
- sudo apt-get install r-base
- Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (<https://github.com/schnorr/pajeng>), so we encourage users to install it.
- When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package:
- R> install.packages("knitr")
- Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*)
- # Gantt Charts of the whole Trace
- First, we show a simple gantt chart of every trace. X-axis is a
- simple timeline of the execution, *Resources* on y-axis correspond
- to different CPUs/GPUs that were used and finally different colors
- represent different *States* of the application.
- This kind of figures can often point to the idle time or
- synchronization problems. Small disadvantage is that in most cases
- there are too many states, thus it is impossible to display them all
- on a single plot without aggregation. Therefore for any strange
- behavior at a certain part of the trace, we strongly suggest to zoom
- on the interval it occurred.
- ```{r Gantt1}
- ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +
- theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +
- geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +
- facet_wrap(~Origin,ncol=1,scale="free_y")
- ```
- Second, we will concentrate only on computation kernel states, to
- get rid of visualization artifacts that can be introduced by other
- (sometimes irrelevant) states. Normally, this plot should not be too
- different from the previous one.
- ```{r Gantt2}
- # Select only computation kernels
- df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),]
- # Start from zero
- m <- min(df1$Start)
- df1$Start <- df1$Start - m
- df1$End <- df1$Start+df1$Duration
- # Plot
- ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +
- theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +
- geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +
- facet_wrap(~Origin,ncol=1,scale="free_y")
- ```
- # Table Summary
- Here we present how much time application spent in each state
- (OverallDuration), how many times it was in that state (Count),
- mean and median values of duration (Mean and Median), and finally
- what is a standard deviation (StandDev).
- General information provided by this table can sometimes give an
- idea to application experts which parts of code are not working as
- desired. Be aware that this kind of tables hide many important
- things, such as outliers, multiple modes, etc.
- ```{r Table}
- options(width=120)
- ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration))
- ```
- # State Duration during the Execution Time
- Now, we show how duration of each state was changing during the
- execution. This can display a general behavior of a state; show if
- there are outliers or multiple modes; are some events occurring in
- groups, etc. . It can also suggest a strange behavior of a state
- during a certain time interval, which should be later investigated
- more carefully.
- However, since each event is represented by a single point (and
- there is no "alpha" factor), those events that happen almost
- simultaneously are overplotted. Therefore density of events along
- execution time may not be easy to read.
- ```{r Dur}
- ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y")
- ```
- # Distribution Histograms
- Finally, we show a distribution of *Duration* for each state in form
- of histograms. X-axis is partitioned into bins with equidistant time
- intervals in milliseconds, while y-axis represents the number of
- occurrences inside such intervals for a certain state. Note that for
- the first plot y-axis is not fixed, meaning that the scale changes
- from one row to another. This plot allows to not only to see what
- was the most frequent duration of a state, but also to compare
- duration between different states.
- ```{r Hist1}
- ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y")
- ```
- Similar to the previous figure, only now traces are showed vertically
- instead of horizontally. Note that for this plot x-axis is not fixed,
- meaning that the scale changes from one column to another. This plot
- allows to compare frequency of different states and in case of
- multiple traces to easily compare duration distribution for each
- state.
- ```{r Hist2}
- ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x")
- ```
|