| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 | #!/bin/bash# StarPU --- Runtime system for heterogeneous multicore architectures.# # Copyright (C) 2010  Université de Bordeaux# Copyright (C) 2010  CNRS# # StarPU is free software; you can redistribute it and/or modify# it under the terms of the GNU Lesser General Public License as published by# the Free Software Foundation; either version 2.1 of the License, or (at# your option) any later version.# # StarPU is distributed in the hope that it will be useful, but# WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.# # See the GNU Lesser General Public License in COPYING.LGPL for more details.# 4G x np = 4 * (k*1K) ^ 2# A G * np = 4 * k^2 * 1M# A * 250 * np = k^2# A = 6# k = sqrt(1500*np)# np = 1 => k = 32# np = 2 => k = 48# np = 3 => k = 64 # np = 4 => k = 64# Problem sizeNBLOCKS=16BLOCKSIZE=1024SIZE=$(($NBLOCKS*$BLOCKSIZE))echo "JOB ID ${PBS_JOBID}"nnodes=$(cat machinefile.${PBS_JOBID}|wc -l)echo "got $nnodes mpi nodes"# Calibratencalibrate=0for i in `seq 1 $ncalibrate`doecho "STARPU_CALIBRATE $i/$ncalibrate"STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numadonefunc(){ngpus=$1np=$2p=$3q=$4nblocks=$5echo "*******************************************"> logecho "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> logecho "*******************************************">> logcat logcat log >> log.allSTARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.errcat log.out > logcat log.err >> logcat logcat log >> log.all}rm -f log.all#how many time do we repeat each experiment ?nloops=3per_node_max_memory=7000for np in 1 2 4do	for nblocks in 16 32 48 64 80	do		for ngpus_per_node in 1 2 3 4		do			for loop in `seq 1 $nloops`			do				# Compute p and q from np				case $np in				  1) p=1; q=1;;				  2) p=2; q=1;;				  4) p=2; q=2;;				  *) echo -n "does not support $np nodes yet";;				esac				# Does the problem fit into memory ?				matrix_size=$(($nblocks * $BLOCKSIZE))				per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np))				echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks"				if test $per_node_memory -ge $per_node_max_memory; then						echo "Problem is too large !"				else					func $ngpus_per_node $np $p $q $nblocks					echo "go !"				fi			done		done	donedone
 |