perf.sh 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #!/bin/bash
  2. # StarPU --- Runtime system for heterogeneous multicore architectures.
  3. #
  4. # Copyright (C) 2010-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  5. #
  6. # StarPU is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU Lesser General Public License as published by
  8. # the Free Software Foundation; either version 2.1 of the License, or (at
  9. # your option) any later version.
  10. #
  11. # StarPU is distributed in the hope that it will be useful, but
  12. # WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. #
  15. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. #
  17. # 4G x np = 4 * (k*1K) ^ 2
  18. # A G * np = 4 * k^2 * 1M
  19. # A * 250 * np = k^2
  20. # A = 6
  21. # k = sqrt(1500*np)
  22. # np = 1 => k = 32
  23. # np = 2 => k = 48
  24. # np = 3 => k = 64
  25. # np = 4 => k = 64
  26. # Problem size
  27. NBLOCKS=16
  28. BLOCKSIZE=1024
  29. SIZE=$(($NBLOCKS*$BLOCKSIZE))
  30. echo "JOB ID ${PBS_JOBID}"
  31. nnodes=$(cat machinefile.${PBS_JOBID}|wc -l)
  32. echo "got $nnodes mpi nodes"
  33. # Calibrate
  34. ncalibrate=0
  35. for i in `seq 1 $ncalibrate`
  36. do
  37. echo "STARPU_CALIBRATE $i/$ncalibrate"
  38. STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
  39. done
  40. func()
  41. {
  42. ngpus=$1
  43. np=$2
  44. p=$3
  45. q=$4
  46. nblocks=$5
  47. echo "*******************************************"> log
  48. echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log
  49. echo "*******************************************">> log
  50. cat log
  51. cat log >> log.all
  52. STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
  53. cat log.out > log
  54. cat log.err >> log
  55. cat log
  56. cat log >> log.all
  57. }
  58. rm -f log.all
  59. #how many time do we repeat each experiment ?
  60. nloops=3
  61. per_node_max_memory=7000
  62. for np in 1 2 4
  63. do
  64. for nblocks in 16 32 48 64 80
  65. do
  66. for ngpus_per_node in 1 2 3 4
  67. do
  68. for loop in `seq 1 $nloops`
  69. do
  70. # Compute p and q from np
  71. case $np in
  72. 1) p=1; q=1;;
  73. 2) p=2; q=1;;
  74. 4) p=2; q=2;;
  75. *) echo -n "does not support $np nodes yet";;
  76. esac
  77. # Does the problem fit into memory ?
  78. matrix_size=$(($nblocks * $BLOCKSIZE))
  79. per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np))
  80. echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks"
  81. if test $per_node_memory -ge $per_node_max_memory; then
  82. echo "Problem is too large !"
  83. else
  84. func $ngpus_per_node $np $p $q $nblocks
  85. echo "go !"
  86. fi
  87. done
  88. done
  89. done
  90. done