#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # 4G x np = 4 * (k*1K) ^ 2 # A G * np = 4 * k^2 * 1M # A * 250 * np = k^2 # A = 6 # k = sqrt(1500*np) # np = 1 => k = 32 # np = 2 => k = 48 # np = 3 => k = 64 # np = 4 => k = 64 # Problem size NBLOCKS=16 BLOCKSIZE=1024 SIZE=$(($NBLOCKS*$BLOCKSIZE)) echo "JOB ID ${PBS_JOBID}" nnodes=$(cat machinefile.${PBS_JOBID}|wc -l) echo "got $nnodes mpi nodes" # Calibrate ncalibrate=0 for i in `seq 1 $ncalibrate` do echo "STARPU_CALIBRATE $i/$ncalibrate" STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa done func() { ngpus=$1 np=$2 p=$3 q=$4 nblocks=$5 echo "*******************************************"> log echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log echo "*******************************************">> log cat log cat log >> log.all STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err cat log.out > log cat log.err >> log cat log cat log >> log.all } rm -f log.all #how many time do we repeat each experiment ? nloops=3 per_node_max_memory=7000 for np in 1 2 4 do for nblocks in 16 32 48 64 80 do for ngpus_per_node in 1 2 3 4 do for loop in `seq 1 $nloops` do # Compute p and q from np case $np in 1) p=1; q=1;; 2) p=2; q=1;; 4) p=2; q=2;; *) echo -n "does not support $np nodes yet";; esac # Does the problem fit into memory ? matrix_size=$(($nblocks * $BLOCKSIZE)) per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np)) echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks" if test $per_node_memory -ge $per_node_max_memory; then echo "Problem is too large !" else func $ngpus_per_node $np $p $q $nblocks echo "go !" fi done done done done