perf.sh 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/bin/bash
  2. # 4G x np = 4 * (k*1K) ^ 2
  3. # A G * np = 4 * k^2 * 1M
  4. # A * 250 * np = k^2
  5. # A = 6
  6. # k = sqrt(1500*np)
  7. # np = 1 => k = 32
  8. # np = 2 => k = 48
  9. # np = 3 => k = 64
  10. # np = 4 => k = 64
  11. # Problem size
  12. NBLOCKS=16
  13. BLOCKSIZE=1024
  14. SIZE=$(($NBLOCKS*$BLOCKSIZE))
  15. echo "JOB ID ${PBS_JOBID}"
  16. nnodes=$(cat machinefile.${PBS_JOBID}|wc -l)
  17. echo "got $nnodes mpi nodes"
  18. # Calibrate
  19. ncalibrate=0
  20. for i in `seq 1 $ncalibrate`
  21. do
  22. echo "STARPU_CALIBRATE $i/$ncalibrate"
  23. STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
  24. done
  25. func()
  26. {
  27. ngpus=$1
  28. np=$2
  29. p=$3
  30. q=$4
  31. nblocks=$5
  32. echo "*******************************************"> log
  33. echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log
  34. echo "*******************************************">> log
  35. cat log
  36. cat log >> log.all
  37. STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
  38. cat log.out > log
  39. cat log.err >> log
  40. cat log
  41. cat log >> log.all
  42. }
  43. rm -f log.all
  44. #how many time do we repeat each experiment ?
  45. nloops=3
  46. per_node_max_memory=7000
  47. for np in 1 2 4
  48. do
  49. for nblocks in 16 32 48 64 80
  50. do
  51. for ngpus_per_node in 1 2 3 4
  52. do
  53. for loop in `seq 1 $nloops`
  54. do
  55. # Compute p and q from np
  56. case $np in
  57. 1) p=1; q=1;;
  58. 2) p=2; q=1;;
  59. 4) p=2; q=2;;
  60. *) echo -n "does not support $np nodes yet";;
  61. esac
  62. # Does the problem fit into memory ?
  63. matrix_size=$(($nblocks * $BLOCKSIZE))
  64. per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np))
  65. echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks"
  66. if test $per_node_memory -ge $per_node_max_memory; then
  67. echo "Problem is too large !"
  68. else
  69. func $ngpus_per_node $np $p $q $nblocks
  70. echo "go !"
  71. fi
  72. done
  73. done
  74. done
  75. done