5 jaren geleden · 9c38680a23
--- a/docker/README.md
+++ b/docker/README.md
--- a/docker/hibench-base/Dockerfile
+++ b/docker/hibench-base/Dockerfile
@@ -0,0 +1,54 @@
 
				+FROM ubuntu:18.04
			
 
				+
			
 
				+RUN apt -y update && apt-get -y update
			
 
				+
			
 
				+RUN apt -y install git bc 
			
 
				+
			
 
				+RUN git clone https://github.com/Intel-bigdata/HiBench.git
			
 
				+
			
 
				+#RUN apt-get -y install maven openjdk-8-jre-headless scala openjdk-8-jdk vim wget python2.7 software-properties-common
			
 
				+RUN apt-get -y install maven scala vim wget python2.7 software-properties-common
			
 
				+RUN wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add -
			
 
				+RUN add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/
			
 
				+RUN apt update
			
 
				+RUN apt install adoptopenjdk-8-hotspot
			
 
				+ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64
			
 
				+
			
 
				+
			
 
				+WORKDIR /HiBench
			
 
				+RUN mvn -Psparkbench -Dspark=3.0 -Dscala=2.12 -Dhadoop=2.7 clean package
			
 
				+ENV HIBENCH_HOME=/HiBench
			
 
				+
			
 
				+WORKDIR /usr/bin
			
 
				+RUN ln -s python2.7 python2
			
 
				+RUN ln -s python2.7 python
			
 
				+
			
 
				+WORKDIR /usr/local
			
 
				+
			
 
				+RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.4/hadoop-2.7.4.tar.gz
			
 
				+RUN tar xzvf hadoop-2.7.4.tar.gz
			
 
				+RUN mv hadoop-2.7.4 hadoop
			
 
				+ENV HADOOP_HOME=/usr/local/hadoop
			
 
				+ENV HADOOP_INSTALL=/usr/local/hadoop
			
 
				+ENV HADOOP_PREFIX /usr/local/hadoop
			
 
				+ENV HADOOP_COMMON_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_HDFS_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_MAPRED_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_YARN_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
			
 
				+ENV HADOOP_EXAMPLES_JAR=/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar
			
 
				+#ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
			
 
				+ENV PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
			
 
				+RUN rm ${HADOOP_CONF_DIR}/core-site.xml
			
 
				+
			
 
				+
			
 
				+WORKDIR /
			
 
				+RUN wget https://ftp.cc.uoc.gr/mirrors/apache/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz
			
 
				+RUN tar xvf spark-3.0.1-bin-hadoop2.7.tgz
			
 
				+RUN mv spark-3.0.1-bin-hadoop2.7 spark
			
 
				+ENV SPARK_HOME=/spark
			
 
				+RUN mkdir ~/.kube
			
 
				+
			
 
				+COPY spark-exec.template.yaml /template.yaml
			
 
				+
			
 
				+WORKDIR /HiBench/
			
--- a/docker/hibench-base/conf/core-site-ime.xml
+++ b/docker/hibench-base/conf/core-site-ime.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-ime.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-base/conf/core-site-lustre.xml
+++ b/docker/hibench-base/conf/core-site-lustre.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-lustre.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-base/conf/core-site-nfs.xml
+++ b/docker/hibench-base/conf/core-site-nfs.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-nfs.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-base/conf/hadoop-ime.conf
+++ b/docker/hibench-base/conf/hadoop-ime.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home ${HADOOP_HOME}
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-ime.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-base/conf/hadoop-lustre.conf
+++ b/docker/hibench-base/conf/hadoop-lustre.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home ${HADOOP_HOME}
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-lustre.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-base/conf/hadoop-nfs.conf
+++ b/docker/hibench-base/conf/hadoop-nfs.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home ${HADOOP_HOME}
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-nfs.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-base/conf/spark.conf
+++ b/docker/hibench-base/conf/spark.conf
@@ -0,0 +1,46 @@
 
				+# Spark home
			
 
				+hibench.spark.home      /spark
			
 
				+
			
 
				+# Spark master
			
 
				+#   standalone mode: spark://xxx:7077
			
 
				+#   YARN mode: yarn-client
			
 
				+hibench.spark.master    k8s://https://172.9.0.240:6443
			
 
				+
			
 
				+# executor number and cores when running on Yarn
			
 
				+hibench.yarn.executor.num     2
			
 
				+hibench.yarn.executor.cores   4
			
 
				+
			
 
				+# executor and driver memory in standalone & YARN mode
			
 
				+spark.executor.memory  4g
			
 
				+spark.driver.memory    4g
			
 
				+
			
 
				+# set spark parallelism property according to hibench's parallelism value
			
 
				+spark.default.parallelism     ${hibench.default.map.parallelism}
			
 
				+
			
 
				+# set spark sql's default shuffle partitions according to hibench's parallelism value
			
 
				+spark.sql.shuffle.partitions  ${hibench.default.shuffle.parallelism}
			
 
				+
			
 
				+#======================================================
			
 
				+# Spark Streaming
			
 
				+#======================================================
			
 
				+# Spark streaming Batchnterval in millisecond (default 100)
			
 
				+hibench.streambench.spark.batchInterval          100
			
 
				+
			
 
				+# Number of nodes that will receive kafka input (default: 4)
			
 
				+hibench.streambench.spark.receiverNumber        4
			
 
				+
			
 
				+# Indicate RDD storage level. (default: 2)
			
 
				+# 0 = StorageLevel.MEMORY_ONLY
			
 
				+# 1 = StorageLevel.MEMORY_AND_DISK_SER
			
 
				+# other = StorageLevel.MEMORY_AND_DISK_SER_2
			
 
				+hibench.streambench.spark.storageLevel 2
			
 
				+
			
 
				+# indicate whether to test the write ahead log new feature (default: false)
			
 
				+hibench.streambench.spark.enableWAL false
			
 
				+
			
 
				+# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp)
			
 
				+hibench.streambench.spark.checkpointPath /var/tmp
			
 
				+
			
 
				+# whether to use direct approach or not (dafault: true)
			
 
				+hibench.streambench.spark.useDirectMode true
			
 
				+
			
--- a/docker/hibench-base/spark-exec.template.yaml
+++ b/docker/hibench-base/spark-exec.template.yaml
@@ -0,0 +1,27 @@
 
				+apiVersion: v1
			
 
				+kind: Pod
			
 
				+spec:
			
 
				+  containers:
			
 
				+  - name: spark-kubernetes-executor
			
 
				+    securityContext:
			
 
				+      privileged: true
			
 
				+    volumeMounts:
			
 
				+    - name: hibench-results
			
 
				+      mountPath: /results
			
 
				+    env:
			
 
				+      - name: SCENARIO
			
 
				+        value: __FIXME__
			
 
				+  volumes:
			
 
				+  - name: hibench-results
			
 
				+    hostPath:
			
 
				+      path: /home_nfs/home_masourod/hibench-results
			
 
				+      type: Directory
			
 
				+  affinity:
			
 
				+    nodeAffinity:
			
 
				+      requiredDuringSchedulingIgnoredDuringExecution:
			
 
				+        nodeSelectorTerms:
			
 
				+        - matchExpressions:
			
 
				+          - key: hibench
			
 
				+            operator: In
			
 
				+            values:
			
 
				+            - true
			
--- a/docker/hibench-base/workload_functions.sh
+++ b/docker/hibench-base/workload_functions.sh
--- a/docker/hibench-base/workload_functions.sh.2
+++ b/docker/hibench-base/workload_functions.sh.2
@@ -0,0 +1,450 @@
 
				+#!/bin/bash
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+set -u
			
 
				+
			
 
				+export HIBENCH_PRINTFULLLOG=0
			
 
				+this="${BASH_SOURCE-$0}"
			
 
				+workload_func_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
			
 
				+. ${workload_func_bin}/assert.sh
			
 
				+. ${workload_func_bin}/color.sh
			
 
				+
			
 
				+HIBENCH_CONF_FOLDER=${HIBENCH_CONF_FOLDER:-${workload_func_bin}/../../conf}
			
 
				+
			
 
				+function enter_bench(){		# declare the entrance of a workload
			
 
				+    assert $1 "Workload name not specified."
			
 
				+    assert $2 "Workload config file not specified."
			
 
				+    assert $3 "Current workload folder not specified."
			
 
				+    export HIBENCH_CUR_WORKLOAD_NAME=$1
			
 
				+    workload_config_file=$2
			
 
				+    workload_folder=$3
			
 
				+    shift 3
			
 
				+    patching_args=$@
			
 
				+    echo "patching args=$patching_args"
			
 
				+    local CONF_FILE=`${workload_func_bin}/load_config.py ${HIBENCH_CONF_FOLDER} $workload_config_file $workload_folder $patching_args`
			
 
				+    . $CONF_FILE
			
 
				+}
			
 
				+
			
 
				+function leave_bench(){		# declare the workload is finished
			
 
				+    assert $HIBENCH_CUR_WORKLOAD_NAME "BUG, HIBENCH_CUR_WORKLOAD_NAME unset."
			
 
				+    unset HIBENCH_CUR_WORKLOAD_NAME
			
 
				+}
			
 
				+
			
 
				+function show_bannar(){		# print bannar
			
 
				+    assert $HIBENCH_CUR_WORKLOAD_NAME "HIBENCH_CUR_WORKLOAD_NAME not specified."
			
 
				+    assert $1 "Unknown banner operation"
			
 
				+    echo -e "${BGreen}$1 ${Color_Off}${UGreen}$HIBENCH_CUR_WORKLOAD_NAME${Color_Off} ${BGreen}bench${Color_Off}"
			
 
				+}
			
 
				+
			
 
				+function timestamp(){		# get current timestamp
			
 
				+    sec=`date +%s`
			
 
				+    nanosec=`date +%N`
			
 
				+    re='^[0-9]+$'
			
 
				+    if ! [[ $nanosec =~ $re ]] ; then
			
 
				+	$nanosec=0
			
 
				+    fi
			
 
				+    tmp=`expr $sec \* 1000 `
			
 
				+    msec=`expr $nanosec / 1000000 `
			
 
				+    echo `expr $tmp + $msec`
			
 
				+}
			
 
				+
			
 
				+function start_monitor(){
			
 
				+    MONITOR_PID=`${workload_func_bin}/monitor.py ${HIBENCH_CUR_WORKLOAD_NAME} $$ ${WORKLOAD_RESULT_FOLDER}/monitor.log ${WORKLOAD_RESULT_FOLDER}/bench.log ${WORKLOAD_RESULT_FOLDER}/monitor.html ${SLAVES} &`
			
 
				+#    echo "start monitor, got child pid:${MONITOR_PID}" > /dev/stderr
			
 
				+    echo ${MONITOR_PID}
			
 
				+}
			
 
				+
			
 
				+function stop_monitor(){
			
 
				+    MONITOR_PID=$1
			
 
				+    assert $1 "monitor pid missing"
			
 
				+#    echo "stop monitor, kill ${MONITOR_PID}" > /dev/stderr
			
 
				+    kill ${MONITOR_PID}
			
 
				+}
			
 
				+
			
 
				+function get_field_name() {	# print report column header
			
 
				+    printf "${REPORT_COLUMN_FORMATS}" Type Date Time Input_data_size "Duration(s)" "Throughput(bytes/s)" Throughput/node 
			
 
				+}
			
 
				+
			
 
				+function gen_report() {		# dump the result to report file
			
 
				+    assert ${HIBENCH_CUR_WORKLOAD_NAME} "HIBENCH_CUR_WORKLOAD_NAME not specified."
			
 
				+    local start=$1
			
 
				+    local end=$2
			
 
				+    local size=$3
			
 
				+    which bc > /dev/null 2>&1
			
 
				+    if [ $? -eq 1 ]; then
			
 
				+	assert 0 "\"bc\" utility missing. Please install it to generate proper report."
			
 
				+        return 1
			
 
				+    fi
			
 
				+    local duration=$(echo "scale=3;($end-$start)/1000"|bc)
			
 
				+    local tput=`echo "$size/$duration"|bc`
			
 
				+#    local nodes=`cat ${SPARK_HOME}/conf/slaves 2>/dev/null | grep -v '^\s*$' | sed "/^#/ d" | wc -l`
			
 
				+    local nodes=`echo ${SLAVES} | wc -w`
			
 
				+    nodes=${nodes:-1}
			
 
				+    
			
 
				+    if [ $nodes -eq 0 ]; then nodes=1; fi
			
 
				+    local tput_node=`echo "$tput/$nodes"|bc`
			
 
				+
			
 
				+    REPORT_TITLE=`get_field_name`
			
 
				+    if [ ! -f ${HIBENCH_REPORT}/${HIBENCH_REPORT_NAME} ] ; then
			
 
				+        echo "${REPORT_TITLE}" > ${HIBENCH_REPORT}/${HIBENCH_REPORT_NAME}
			
 
				+    fi
			
 
				+
			
 
				+    REPORT_LINE=$(printf "${REPORT_COLUMN_FORMATS}" ${HIBENCH_CUR_WORKLOAD_NAME} $(date +%F) $(date +%T) $size $duration $tput $tput_node)
			
 
				+    echo "${REPORT_LINE}" >> ${HIBENCH_REPORT}/${HIBENCH_REPORT_NAME}
			
 
				+    echo "# ${REPORT_TITLE}" >> ${HIBENCH_WORKLOAD_CONF}
			
 
				+    echo "# ${REPORT_LINE}" >> ${HIBENCH_WORKLOAD_CONF}
			
 
				+}
			
 
				+
			
 
				+function rmr_hdfs(){		# rm -r for hdfs
			
 
				+    assert $1 "dir parameter missing"
			
 
				+    RMDIR_CMD="fs -rm -r -skipTrash"
			
 
				+    local CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR $RMDIR_CMD $1"
			
 
				+    echo -e "${BCyan}hdfs rm -r: ${Cyan}${CMD}${Color_Off}" 1>&2
			
 
				+    execute_withlog ${CMD}
			
 
				+}
			
 
				+
			
 
				+function upload_to_hdfs(){
			
 
				+    assert $1 "local parameter missing"
			
 
				+    assert $2 "remote parameter missing"
			
 
				+    LOCAL_FILE_PATH=$1
			
 
				+    REMOTE_FILE_PATH=$2
			
 
				+    echo "REMOTE_FILE_PATH:$REMOTE_FILE_PATH" 1>&2
			
 
				+    if [[ `echo $REMOTE_FILE_PATH | tr A-Z a-z` = hdfs://* ]]; then # strip leading "HDFS://xxx:xxx/" string
			
 
				+        echo "HDFS_MASTER:$HDFS_MASTER" 1>&2
			
 
				+        local LEADING_HDFS_STRING_LENGTH=${#HDFS_MASTER}
			
 
				+        REMOTE_FILE_PATH=${REMOTE_FILE_PATH:$LEADING_HDFS_STRING_LENGTH}
			
 
				+        echo "stripped REMOTE_FILE_PATH:$REMOTE_FILE_PATH" 1>&2
			
 
				+    fi
			
 
				+
			
 
				+    # clear previous package file
			
 
				+    local CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR fs -rm $REMOTE_FILE_PATH"
			
 
				+    echo -e "${BCyan}hdfs rm : ${Cyan}${CMD}${Color_Off}" 1>&2
			
 
				+    execute_withlog ${CMD}
			
 
				+
			
 
				+    # prepare parent folder
			
 
				+    CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR fs -mkdir `dirname $REMOTE_FILE_PATH`"
			
 
				+    echo -e "${BCyan}hdfs mkdir : ${Cyan}${CMD}${Color_Off}" 1>&2
			
 
				+    execute_withlog ${CMD}
			
 
				+
			
 
				+    # upload
			
 
				+    CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR fs -put $LOCAL_FILE_PATH $REMOTE_FILE_PATH"
			
 
				+    echo -e "${BCyan}hdfs put : ${Cyan}${CMD}${Color_Off}" 1>&2
			
 
				+    execute_withlog ${CMD}
			
 
				+}
			
 
				+
			
 
				+function dus_hdfs(){                # du -s for hdfs
			
 
				+    assert $1 "dir parameter missing"
			
 
				+    DUS_CMD="fs -du -s"
			
 
				+    local CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR $DUS_CMD $1"
			
 
				+    echo -e "${BPurple}hdfs du -s: ${Purple}${CMD}${Color_Off}" 1>&2
			
 
				+    execute_withlog ${CMD}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+function check_dir() {                # ensure dir is created
			
 
				+    local dir=$1
			
 
				+    assert $1 "dir parameter missing"
			
 
				+    if [ -z "$dir" ];then
			
 
				+        echo -e "${BYellow}WARN${Color_Off}: payload missing."
			
 
				+        return 1
			
 
				+    fi
			
 
				+    if [ ! -d "$dir" ];then
			
 
				+        echo -e "${BRed}ERROR${Color_Off}: directory $dir does not exist."
			
 
				+        exit 1
			
 
				+    fi
			
 
				+    touch "$dir"/touchtest
			
 
				+    if [ $? -ne 0 ]; then
			
 
				+        echo -e "${BRed}ERROR${Color_Off}: directory unwritable."
			
 
				+        exit 1
			
 
				+    else
			
 
				+        rm "$dir"/touchtest
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+function dir_size() {                
			
 
				+    for item in $(dus_hdfs $1); do
			
 
				+        if [[ $item =~ ^[0-9]+$ ]]; then
			
 
				+            echo $item
			
 
				+        fi
			
 
				+    done
			
 
				+}
			
 
				+
			
 
				+function run_spark_job() {
			
 
				+    LIB_JARS=
			
 
				+    while (($#)); do
			
 
				+      if [ "$1" = "--jars" ]; then
			
 
				+        LIB_JARS="--jars $2"
			
 
				+        shift 2
			
 
				+        continue
			
 
				+      fi
			
 
				+      break
			
 
				+    done
			
 
				+
			
 
				+    CLS=$1
			
 
				+    shift
			
 
				+
			
 
				+    export_withlog SPARKBENCH_PROPERTIES_FILES
			
 
				+
			
 
				+    YARN_OPTS=""
			
 
				+    if [[ "$SPARK_MASTER" == yarn-* ]] || [[ "$SPARK_MASTER" == yarn ]]; then
			
 
				+        export_withlog HADOOP_CONF_DIR
			
 
				+        
			
 
				+        YARN_OPTS="--num-executors ${YARN_NUM_EXECUTORS}"
			
 
				+        if [[ -n "${YARN_EXECUTOR_CORES:-}" ]]; then
			
 
				+            YARN_OPTS="${YARN_OPTS} --executor-cores ${YARN_EXECUTOR_CORES}"
			
 
				+       fi
			
 
				+       if [[ -n "${SPARK_YARN_EXECUTOR_MEMORY:-}" ]]; then
			
 
				+           YARN_OPTS="${YARN_OPTS} --executor-memory ${SPARK_YARN_EXECUTOR_MEMORY}"
			
 
				+       fi
			
 
				+       if [[ -n "${SPAKR_YARN_DRIVER_MEMORY:-}" ]]; then
			
 
				+           YARN_OPTS="${YARN_OPTS} --driver-memory ${SPARK_YARN_DRIVER_MEMORY}"
			
 
				+       fi
			
 
				+    fi
			
 
				+    if [[ "$CLS" == *.py ]]; then 
			
 
				+        LIB_JARS="$LIB_JARS --jars ${SPARKBENCH_JAR}"
			
 
				+        SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --master ${SPARK_MASTER} ${YARN_OPTS} ${CLS} $@"
			
 
				+    else
			
 
				+        SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --class ${CLS} --master ${SPARK_MASTER} ${YARN_OPTS} ${SPARKBENCH_JAR} $@"
			
 
				+    fi
			
 
				+    echo -e "${BGreen}Submit Spark job: ${Green}${SUBMIT_CMD}${Color_Off}"
			
 
				+    MONITOR_PID=`start_monitor`
			
 
				+    execute_withlog ${SUBMIT_CMD}
			
 
				+    result=$?
			
 
				+    stop_monitor ${MONITOR_PID}
			
 
				+    if [ $result -ne 0 ]
			
 
				+    then
			
 
				+        echo -e "${BRed}ERROR${Color_Off}: Spark job ${BYellow}${CLS}${Color_Off} failed to run successfully."
			
 
				+        echo -e "${BBlue}Hint${Color_Off}: You can goto ${BYellow}${WORKLOAD_RESULT_FOLDER}/bench.log${Color_Off} to check for detailed log.\nOpening log tail for you:\n"
			
 
				+        tail ${WORKLOAD_RESULT_FOLDER}/bench.log
			
 
				+        exit $result
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+function run_storm_job(){
			
 
				+    CMD="${STORM_HOME}/bin/storm jar ${STREAMBENCH_STORM_JAR} $@"
			
 
				+    echo -e "${BGreen}Submit Storm Job: ${Green}$CMD${Color_Off}"
			
 
				+    execute_withlog $CMD
			
 
				+}
			
 
				+
			
 
				+function run_gearpump_app(){
			
 
				+    CMD="${GEARPUMP_HOME}/bin/gear app -executors ${STREAMBENCH_GEARPUMP_EXECUTORS} -jar ${STREAMBENCH_GEARPUMP_JAR} $@"
			
 
				+    echo -e "${BGreen}Submit Gearpump Application: ${Green}$CMD${Color_Off}"
			
 
				+    execute_withlog $CMD
			
 
				+}
			
 
				+
			
 
				+function run_flink_job(){
			
 
				+    CMD="${FLINK_HOME}/bin/flink run -p ${STREAMBENCH_FLINK_PARALLELISM} -m ${HIBENCH_FLINK_MASTER} $@ ${STREAMBENCH_FLINK_JAR} ${SPARKBENCH_PROPERTIES_FILES}"
			
 
				+    echo -e "${BGreen}Submit Flink Job: ${Green}$CMD${Color_Off}"
			
 
				+    execute_withlog $CMD
			
 
				+}
			
 
				+
			
 
				+function run_hadoop_job(){
			
 
				+    ENABLE_MONITOR=1
			
 
				+    if [ "$1" = "--without-monitor" ]; then
			
 
				+        ENABLE_MONITOR=0
			
 
				+        shift 1
			
 
				+    fi
			
 
				+    local job_jar=$1
			
 
				+    shift
			
 
				+    local job_name=$1
			
 
				+    shift
			
 
				+    local tail_arguments=$@
			
 
				+    local CMD="${HADOOP_EXECUTABLE} --config ${HADOOP_CONF_DIR} jar $job_jar $job_name $tail_arguments"
			
 
				+    echo -e "${BGreen}Submit MapReduce Job: ${Green}$CMD${Color_Off}"
			
 
				+    if [ ${ENABLE_MONITOR} = 1 ]; then
			
 
				+        MONITOR_PID=`start_monitor`
			
 
				+    fi
			
 
				+    execute_withlog ${CMD}
			
 
				+    result=$?
			
 
				+    if [ ${ENABLE_MONITOR} = 1 ]; then
			
 
				+        stop_monitor ${MONITOR_PID}
			
 
				+    fi
			
 
				+    if [ $result -ne 0 ]; then
			
 
				+        echo -e "${BRed}ERROR${Color_Off}: Hadoop job ${BYellow}${job_jar} ${job_name}${Color_Off} failed to run successfully."
			
 
				+        echo -e "${BBlue}Hint${Color_Off}: You can goto ${BYellow}${WORKLOAD_RESULT_FOLDER}/bench.log${Color_Off} to check for detailed log.\nOpening log tail for you:\n"
			
 
				+        tail ${WORKLOAD_RESULT_FOLDER}/bench.log
			
 
				+        exit $result
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+function ensure_hivebench_release(){
			
 
				+    if [ ! -e ${HIBENCH_HOME}"/hadoopbench/sql/target/"$HIVE_RELEASE".tar.gz" ]; then
			
 
				+        assert 0 "Error: The hive bin file hasn't be downloaded by maven, please check!"
			
 
				+        exit
			
 
				+    fi
			
 
				+
			
 
				+    cd ${HIBENCH_HOME}"/hadoopbench/sql/target"
			
 
				+    if [ ! -d $HIVE_HOME ]; then
			
 
				+        tar zxf $HIVE_RELEASE".tar.gz"
			
 
				+    fi
			
 
				+    export_withlog HADOOP_EXECUTABLE
			
 
				+}
			
 
				+
			
 
				+function ensure_mahout_release (){
			
 
				+    if [ ! -e ${HIBENCH_HOME}"/hadoopbench/mahout/target/"$MAHOUT_RELEASE".tar.gz" ]; then
			
 
				+        assert 0 "Error: The mahout bin file hasn't be downloaded by maven, please check!"
			
 
				+        exit
			
 
				+    fi
			
 
				+
			
 
				+    cd ${HIBENCH_HOME}"/hadoopbench/mahout/target"
			
 
				+    if [ ! -d $MAHOUT_HOME ]; then
			
 
				+        tar zxf $MAHOUT_RELEASE".tar.gz"
			
 
				+    fi
			
 
				+    export_withlog HADOOP_EXECUTABLE
			
 
				+    export_withlog HADOOP_HOME
			
 
				+    export_withlog HADOOP_CONF_DIR    
			
 
				+}
			
 
				+
			
 
				+function execute () {
			
 
				+    CMD="$@"
			
 
				+    echo -e "${BCyan}Executing: ${Cyan}${CMD}${Color_Off}"
			
 
				+    $CMD
			
 
				+}
			
 
				+
			
 
				+function printFullLog(){
			
 
				+    export HIBENCH_PRINTFULLLOG=1
			
 
				+}
			
 
				+
			
 
				+function execute_withlog () {
			
 
				+    CMD="$@"
			
 
				+    if [ -t 1 ] ; then          # Terminal, beautify the output.
			
 
				+        ${workload_func_bin}/execute_with_log.py ${WORKLOAD_RESULT_FOLDER}/bench.log $CMD
			
 
				+    else                        # pipe, do nothing.
			
 
				+        $CMD
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+
			
 
				+function export_withlog () {
			
 
				+    var_name=$1
			
 
				+    var_val=${!1}
			
 
				+    assert $1 "export without a variable name!"
			
 
				+    echo -e "${BCyan}Export env: ${Cyan}${var_name}${BCyan}=${Cyan}${var_val}${Color_Off}"
			
 
				+    export ${var_name}
			
 
				+}
			
 
				+
			
 
				+function command_exist ()
			
 
				+{
			
 
				+    result=$(which $1)
			
 
				+    if [ $? -eq 0 ] 
			
 
				+    then
			
 
				+        return 0
			
 
				+    else
			
 
				+        return 1
			
 
				+    fi  
			
 
				+}
			
 
				+
			
 
				+function ensure_nutchindexing_release () {
			
 
				+    if [ ! -e ${HIBENCH_HOME}"/hadoopbench/nutchindexing/target/apache-nutch-1.2-bin.tar.gz" ]; then
			
 
				+        assert 0 "Error: The nutch bin file hasn't be downloaded by maven, please check!"
			
 
				+        exit
			
 
				+    fi
			
 
				+
			
 
				+    NUTCH_ROOT=${WORKLOAD_RESULT_FOLDER}
			
 
				+    cp -a $NUTCH_DIR/nutch $NUTCH_ROOT
			
 
				+
			
 
				+    cd ${HIBENCH_HOME}"/hadoopbench/nutchindexing/target"
			
 
				+    if [ ! -d $NUTCH_HOME ]; then
			
 
				+        tar zxf apache-nutch-1.2-bin.tar.gz
			
 
				+    fi
			
 
				+    find $NUTCH_HOME/lib ! -name "lucene-*" -type f -exec rm -rf {} \;
			
 
				+    rm -rf $NUTCH_ROOT/nutch_release
			
 
				+    cp -a $NUTCH_HOME $NUTCH_ROOT/nutch_release
			
 
				+    NUTCH_HOME_WORKLOAD=$NUTCH_ROOT/nutch_release
			
 
				+    cp $NUTCH_ROOT/nutch/conf/nutch-site.xml $NUTCH_HOME_WORKLOAD/conf
			
 
				+    cp $NUTCH_ROOT/nutch/bin/nutch $NUTCH_HOME_WORKLOAD/bin
			
 
				+
			
 
				+    # Patching jcl-over-slf4j version against cdh or hadoop2
			
 
				+    mkdir $NUTCH_HOME_WORKLOAD/temp
			
 
				+    unzip -q $NUTCH_HOME_WORKLOAD/nutch-1.2.job -d $NUTCH_HOME_WORKLOAD/temp
			
 
				+    rm -f $NUTCH_HOME_WORKLOAD/temp/lib/jcl-over-slf4j-*.jar
			
 
				+    rm -f $NUTCH_HOME_WORKLOAD/temp/lib/slf4j-log4j*.jar
			
 
				+    cp ${NUTCH_DIR}/target/dependency/jcl-over-slf4j-*.jar $NUTCH_HOME_WORKLOAD/temp/lib
			
 
				+    rm -f $NUTCH_HOME_WORKLOAD/nutch-1.2.job
			
 
				+    cd $NUTCH_HOME_WORKLOAD/temp
			
 
				+    zip -qr $NUTCH_HOME_WORKLOAD/nutch-1.2.job *
			
 
				+    rm -rf $NUTCH_HOME_WORKLOAD/temp
			
 
				+
			
 
				+    echo $NUTCH_HOME_WORKLOAD
			
 
				+}
			
 
				+
			
 
				+function prepare_sql_aggregation () {
			
 
				+    assert $1 "SQL file path not exist"
			
 
				+    HIVEBENCH_SQL_FILE=$1
			
 
				+
			
 
				+    find . -name "metastore_db" -exec rm -rf "{}" \; 2>/dev/null
			
 
				+
			
 
				+    cat <<EOF > ${HIVEBENCH_SQL_FILE}
			
 
				+USE DEFAULT;
			
 
				+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
			
 
				+set ${MAP_CONFIG_NAME}=$NUM_MAPS;
			
 
				+set ${REDUCER_CONFIG_NAME}=$NUM_REDS;
			
 
				+set hive.stats.autogather=false;
			
 
				+
			
 
				+DROP TABLE IF EXISTS uservisits;
			
 
				+CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS  SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
			
 
				+DROP TABLE IF EXISTS uservisits_aggre;
			
 
				+CREATE EXTERNAL TABLE uservisits_aggre ( sourceIP STRING, sumAdRevenue DOUBLE) STORED AS  SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_aggre';
			
 
				+INSERT OVERWRITE TABLE uservisits_aggre SELECT sourceIP, SUM(adRevenue) FROM uservisits GROUP BY sourceIP;
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+function prepare_sql_join () {
			
 
				+    assert $1 "SQL file path not exist"
			
 
				+    HIVEBENCH_SQL_FILE=$1
			
 
				+
			
 
				+    find . -name "metastore_db" -exec rm -rf "{}" \; 2>/dev/null
			
 
				+
			
 
				+    cat <<EOF > ${HIVEBENCH_SQL_FILE}
			
 
				+USE DEFAULT;
			
 
				+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
			
 
				+set ${MAP_CONFIG_NAME}=$NUM_MAPS;
			
 
				+set ${REDUCER_CONFIG_NAME}=$NUM_REDS;
			
 
				+set hive.stats.autogather=false;
			
 
				+
			
 
				+
			
 
				+DROP TABLE IF EXISTS rankings;
			
 
				+CREATE EXTERNAL TABLE rankings (pageURL STRING, pageRank INT, avgDuration INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS  SEQUENCEFILE LOCATION '$INPUT_HDFS/rankings';
			
 
				+DROP TABLE IF EXISTS uservisits_copy;
			
 
				+CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS  SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
			
 
				+DROP TABLE IF EXISTS rankings_uservisits_join;
			
 
				+CREATE EXTERNAL TABLE rankings_uservisits_join ( sourceIP STRING, avgPageRank DOUBLE, totalRevenue DOUBLE) STORED AS  SEQUENCEFILE LOCATION '$OUTPUT_HDFS/rankings_uservisits_join';
			
 
				+INSERT OVERWRITE TABLE rankings_uservisits_join SELECT sourceIP, avg(pageRank), sum(adRevenue) as totalRevenue FROM rankings R JOIN (SELECT sourceIP, destURL, adRevenue FROM uservisits_copy UV WHERE (datediff(UV.visitDate, '1999-01-01')>=0 AND datediff(UV.visitDate, '2000-01-01')<=0)) NUV ON (R.pageURL = NUV.destURL) group by sourceIP order by totalRevenue DESC;
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+function prepare_sql_scan () {
			
 
				+    assert $1 "SQL file path not exist"
			
 
				+    HIVEBENCH_SQL_FILE=$1
			
 
				+
			
 
				+    find . -name "metastore_db" -exec rm -rf "{}" \; 2>/dev/null
			
 
				+
			
 
				+    cat <<EOF > ${HIVEBENCH_SQL_FILE}
			
 
				+USE DEFAULT;
			
 
				+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
			
 
				+set ${MAP_CONFIG_NAME}=$NUM_MAPS;
			
 
				+set ${REDUCER_CONFIG_NAME}=$NUM_REDS;
			
 
				+set hive.stats.autogather=false;
			
 
				+
			
 
				+
			
 
				+DROP TABLE IF EXISTS uservisits;
			
 
				+CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS  SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
			
 
				+DROP TABLE IF EXISTS uservisits_copy;
			
 
				+CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS  SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_copy';
			
 
				+INSERT OVERWRITE TABLE uservisits_copy SELECT * FROM uservisits;
			
 
				+EOF
			
 
				+
			
 
				+}
			
--- a/docker/hibench-kube/Dockerfile
+++ b/docker/hibench-kube/Dockerfile
@@ -0,0 +1,58 @@
 
				+FROM pl4tinum/spark-kube:3.0
			
 
				+
			
 
				+USER root
			
 
				+
			
 
				+RUN apt -y update
			
 
				+RUN mkdir -p /usr/share/man/man1
			
 
				+RUN apt -y install git bc wget vim maven scala python2.7 build-essential software-properties-common
			
 
				+RUN wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add -
			
 
				+RUN add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/
			
 
				+RUN apt update
			
 
				+RUN apt install adoptopenjdk-8-hotspot
			
 
				+
			
 
				+
			
 
				+WORKDIR /
			
 
				+RUN git clone https://github.com/Intel-bigdata/HiBench.git
			
 
				+WORKDIR /HiBench
			
 
				+ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64
			
 
				+RUN mvn -Psparkbench -Dspark=3.0 -Dhadoop=2.7 -Dscala=2.12 clean package
			
 
				+ENV HIBENCH_HOME=/HiBench
			
 
				+
			
 
				+WORKDIR /usr/local
			
 
				+
			
 
				+RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.4/hadoop-2.7.4.tar.gz
			
 
				+RUN tar xzvf hadoop-2.7.4.tar.gz
			
 
				+RUN mv hadoop-2.7.4 hadoop
			
 
				+ENV HADOOP_HOME=/usr/local/hadoop
			
 
				+ENV HADOOP_INSTALL=/usr/local/hadoop
			
 
				+ENV HADOOP_PREFIX /usr/local/hadoop
			
 
				+ENV HADOOP_COMMON_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_HDFS_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_MAPRED_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_YARN_HOME /usr/local/hadoop
			
 
				+ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
			
 
				+ENV HADOOP_EXAMPLES_JAR=/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar
			
 
				+ENV JAVA_HOME=/usr/local/openjdk-8/
			
 
				+ENV PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
			
 
				+RUN rm ${HADOOP_CONF_DIR}/core-site.xml
			
 
				+
			
 
				+RUN mkdir /root/.kube
			
 
				+COPY config /root/.kube/config
			
 
				+COPY conf/core-site-nfs.xml /usr/local/hadoop/etc/hadoop/core-site.xml
			
 
				+COPY conf/hadoop-nfs.conf /HiBench/conf/hadoop.conf
			
 
				+COPY conf/spark.conf /HiBench/conf/spark.conf
			
 
				+
			
 
				+WORKDIR /
			
 
				+RUN git clone https://github.com/opcm/pcm.git
			
 
				+WORKDIR /pcm
			
 
				+RUN make
			
 
				+
			
 
				+WORKDIR /
			
 
				+COPY entrypoint.sh /entrypoint.sh
			
 
				+RUN chmod ugo+x /entrypoint.sh
			
 
				+
			
 
				+COPY 
			
 
				+
			
 
				+RUN mkdir /results
			
 
				+
			
 
				+ENTRYPOINT ["/entrypoint.sh"]
			
--- a/docker/hibench-kube/conf/core-site-ime.xml
+++ b/docker/hibench-kube/conf/core-site-ime.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-ime.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-kube/conf/core-site-lustre.xml
+++ b/docker/hibench-kube/conf/core-site-lustre.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-lustre.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-kube/conf/core-site-nfs.xml
+++ b/docker/hibench-kube/conf/core-site-nfs.xml
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+    <configuration>
			
 
				+        <property>
			
 
				+            <name>fs.default.name</name>
			
 
				+            <value>hdfs://hdfs-namenode-nfs.iccs-hibench:8020</value>
			
 
				+        </property>
			
 
				+    </configuration>
			
--- a/docker/hibench-kube/conf/hadoop-ime.conf
+++ b/docker/hibench-kube/conf/hadoop-ime.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home ${HADOOP_HOME}
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-ime.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-kube/conf/hadoop-lustre.conf
+++ b/docker/hibench-kube/conf/hadoop-lustre.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home ${HADOOP_HOME}
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-lustre.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-kube/conf/hadoop-nfs.conf
+++ b/docker/hibench-kube/conf/hadoop-nfs.conf
@@ -0,0 +1,14 @@
 
				+# Hadoop home
			
 
				+hibench.hadoop.home /usr/local/hadoop
			
 
				+
			
 
				+# The path of hadoop executable
			
 
				+hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
			
 
				+
			
 
				+# Hadoop configraution directory
			
 
				+hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
			
 
				+
			
 
				+# The root HDFS path to store HiBench data
			
 
				+hibench.hdfs.master hdfs://hdfs-namenode-nfs.iccs-hibench:8020
			
 
				+
			
 
				+# Hadoop release provider. Supported value: apache, cdh5, hdp
			
 
				+hibench.hadoop.release apache
			
--- a/docker/hibench-kube/conf/spark.conf
+++ b/docker/hibench-kube/conf/spark.conf
@@ -0,0 +1,46 @@
 
				+# Spark home
			
 
				+hibench.spark.home      /spark
			
 
				+
			
 
				+# Spark master
			
 
				+#   standalone mode: spark://xxx:7077
			
 
				+#   YARN mode: yarn-client
			
 
				+hibench.spark.master    k8s://https://172.9.0.240:6443
			
 
				+
			
 
				+# executor number and cores when running on Yarn
			
 
				+hibench.yarn.executor.num     2
			
 
				+hibench.yarn.executor.cores   4
			
 
				+
			
 
				+# executor and driver memory in standalone & YARN mode
			
 
				+spark.executor.memory  4g
			
 
				+spark.driver.memory    4g
			
 
				+
			
 
				+# set spark parallelism property according to hibench's parallelism value
			
 
				+spark.default.parallelism     ${hibench.default.map.parallelism}
			
 
				+
			
 
				+# set spark sql's default shuffle partitions according to hibench's parallelism value
			
 
				+spark.sql.shuffle.partitions  ${hibench.default.shuffle.parallelism}
			
 
				+
			
 
				+#======================================================
			
 
				+# Spark Streaming
			
 
				+#======================================================
			
 
				+# Spark streaming Batchnterval in millisecond (default 100)
			
 
				+hibench.streambench.spark.batchInterval          100
			
 
				+
			
 
				+# Number of nodes that will receive kafka input (default: 4)
			
 
				+hibench.streambench.spark.receiverNumber        4
			
 
				+
			
 
				+# Indicate RDD storage level. (default: 2)
			
 
				+# 0 = StorageLevel.MEMORY_ONLY
			
 
				+# 1 = StorageLevel.MEMORY_AND_DISK_SER
			
 
				+# other = StorageLevel.MEMORY_AND_DISK_SER_2
			
 
				+hibench.streambench.spark.storageLevel 2
			
 
				+
			
 
				+# indicate whether to test the write ahead log new feature (default: false)
			
 
				+hibench.streambench.spark.enableWAL false
			
 
				+
			
 
				+# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp)
			
 
				+hibench.streambench.spark.checkpointPath /var/tmp
			
 
				+
			
 
				+# whether to use direct approach or not (dafault: true)
			
 
				+hibench.streambench.spark.useDirectMode true
			
 
				+
			
--- a/docker/hibench-kube/config
+++ b/docker/hibench-kube/config
--- a/docker/hibench-kube/entrypoint.sh
+++ b/docker/hibench-kube/entrypoint.sh
@@ -0,0 +1,137 @@
 
				+#!/bin/bash
			
 
				+#
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+
			
 
				+# echo commands to the terminal output
			
 
				+set -ex
			
 
				+
			
 
				+# Check whether there is a passwd entry for the container UID
			
 
				+myuid=$(id -u)
			
 
				+mygid=$(id -g)
			
 
				+# turn off -e for getent because it will return error code in anonymous uid case
			
 
				+set +e
			
 
				+uidentry=$(getent passwd $myuid)
			
 
				+set -e
			
 
				+
			
 
				+# If there is no passwd entry for the container UID, attempt to create one
			
 
				+if [ -z "$uidentry" ] ; then
			
 
				+    if [ -w /etc/passwd ] ; then
			
 
				+        echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
			
 
				+    else
			
 
				+        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+SPARK_K8S_CMD="$1"
			
 
				+case "$SPARK_K8S_CMD" in
			
 
				+    driver | driver-py | driver-r | executor)
			
 
				+      shift 1
			
 
				+      ;;
			
 
				+    "")
			
 
				+      ;;
			
 
				+    *)
			
 
				+      echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
			
 
				+      exec /usr/bin/tini -s -- "$@"
			
 
				+      ;;
			
 
				+esac
			
 
				+
			
 
				+SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
			
 
				+env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
			
 
				+readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
			
 
				+
			
 
				+if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
			
 
				+  SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
			
 
				+fi
			
 
				+
			
 
				+if [ -n "$PYSPARK_FILES" ]; then
			
 
				+    PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES"
			
 
				+fi
			
 
				+
			
 
				+PYSPARK_ARGS=""
			
 
				+if [ -n "$PYSPARK_APP_ARGS" ]; then
			
 
				+    PYSPARK_ARGS="$PYSPARK_APP_ARGS"
			
 
				+fi
			
 
				+
			
 
				+R_ARGS=""
			
 
				+if [ -n "$R_APP_ARGS" ]; then
			
 
				+    R_ARGS="$R_APP_ARGS"
			
 
				+fi
			
 
				+
			
 
				+if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
			
 
				+    pyv="$(python -V 2>&1)"
			
 
				+    export PYTHON_VERSION="${pyv:7}"
			
 
				+    export PYSPARK_PYTHON="python"
			
 
				+    export PYSPARK_DRIVER_PYTHON="python"
			
 
				+elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
			
 
				+    pyv3="$(python3 -V 2>&1)"
			
 
				+    export PYTHON_VERSION="${pyv3:7}"
			
 
				+    export PYSPARK_PYTHON="python3"
			
 
				+    export PYSPARK_DRIVER_PYTHON="python3"
			
 
				+fi
			
 
				+
			
 
				+case "$SPARK_K8S_CMD" in
			
 
				+  driver)
			
 
				+    CMD=(
			
 
				+      "$SPARK_HOME/bin/spark-submit"
			
 
				+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
			
 
				+      --deploy-mode client
			
 
				+      "$@"
			
 
				+    )
			
 
				+    ;;
			
 
				+  driver-py)
			
 
				+    CMD=(
			
 
				+      "$SPARK_HOME/bin/spark-submit"
			
 
				+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
			
 
				+      --deploy-mode client
			
 
				+      "$@" $PYSPARK_PRIMARY $PYSPARK_ARGS
			
 
				+    )
			
 
				+    ;;
			
 
				+    driver-r)
			
 
				+    CMD=(
			
 
				+      "$SPARK_HOME/bin/spark-submit"
			
 
				+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
			
 
				+      --deploy-mode client
			
 
				+      "$@" $R_PRIMARY $R_ARGS
			
 
				+    )
			
 
				+    ;;
			
 
				+  executor)
			
 
				+    CMD=(
			
 
				+      ${JAVA_HOME}/bin/java
			
 
				+      "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
			
 
				+      -Xms$SPARK_EXECUTOR_MEMORY
			
 
				+      -Xmx$SPARK_EXECUTOR_MEMORY
			
 
				+      -cp "$SPARK_CLASSPATH"
			
 
				+      org.apache.spark.executor.CoarseGrainedExecutorBackend
			
 
				+      --driver-url $SPARK_DRIVER_URL
			
 
				+      --executor-id $SPARK_EXECUTOR_ID
			
 
				+      --cores $SPARK_EXECUTOR_CORES
			
 
				+      --app-id $SPARK_APPLICATION_ID
			
 
				+      --hostname $SPARK_EXECUTOR_POD_IP
			
 
				+    )
			
 
				+    ;;
			
 
				+
			
 
				+  *)
			
 
				+    echo "Unknown command: $SPARK_K8S_CMD" 1>&2
			
 
				+    exit 1
			
 
				+esac
			
 
				+
			
 
				+# Execute the container CMD under tini for better hygiene
			
 
				+BENCH=$(hostname | awk -F'-' '{print $1}')
			
 
				+EXEC=$(hostname | awk -F'-' '{print $3"-"$4}')
			
 
				+mkdir -p /results/$BENCH/$SCENARIO/$EXEC
			
 
				+/pcm/pcm.x -r -csv=/results/$BENCH/$SCENARIO/$EXEC/pcm.csv 1>&- 2>&- &
			
 
				+exec /usr/bin/tini -s -- "${CMD[@]}"
			
--- a/docker/spark-2.4
+++ b/docker/spark-2.4
@@ -0,0 +1 @@
 
				+Subproject commit 32a28ff8cee534cd5fa0da1bdcf5efdc46d8c830
			
--- a/docker/spark-3.0
+++ b/docker/spark-3.0
@@ -0,0 +1 @@
 
				+Subproject commit 4425c3a9c35d4ab59976d8a409d18c933a5f9180
		`@@ -0,0 +1 @@`
		`+Subproject commit 32a28ff8cee534cd5fa0da1bdcf5efdc46d8c830`
		`@@ -0,0 +1 @@`
		`+Subproject commit 4425c3a9c35d4ab59976d8a409d18c933a5f9180`