starpu_mpi_comm_check.sh 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #!/bin/bash
  2. # StarPU --- Runtime system for heterogeneous multicore architectures.
  3. #
  4. # Copyright (C) 2017 CNRS
  5. #
  6. # StarPU is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU Lesser General Public License as published by
  8. # the Free Software Foundation; either version 2.1 of the License, or (at
  9. # your option) any later version.
  10. #
  11. # StarPU is distributed in the hope that it will be useful, but
  12. # WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. #
  15. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. #
  17. # Script to check MPI communications are done properly
  18. # The application should be launched with STARPU_MPI_COMM=1
  19. # e.g
  20. # $ export STARPU_MPI_COMM=1
  21. # $ mpirun --output-filename starpu_mpi.log appli parameters
  22. # and then the script can be launched with the output files
  23. # $ starpu_mpi_comm_check.sh starpu_mpi.log.*
  24. if test -z "$1"
  25. then
  26. echo Syntax error: parameter missing
  27. exit 1
  28. fi
  29. # Get the nodes identifiers
  30. nodes=$(for f in $*
  31. do
  32. grep starpu_mpi $f | grep '\[' | awk '{print $1}'| sed 's/\[\(.*\)\]\[starpu_mpi\]/\1/' | grep "^[[:digit:]]*$"
  33. done |sort|uniq
  34. )
  35. echo nodes $nodes
  36. DIR=/tmp
  37. # for each node, extract send and receive communications
  38. for node in $nodes
  39. do
  40. for f in $*
  41. do
  42. grep starpu_mpi $f |grep "\[$node"
  43. done > $DIR/starpu_mpi_node$node.log
  44. grep -- "-->" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_send.log
  45. grep -- "<--" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_recv.log
  46. done
  47. # count the number of traced lines
  48. #for node in $nodes
  49. #do
  50. # wc -l $DIR/starpu_mpi_node${node}_recv.log
  51. # lines=$(grep :42:42 $DIR/starpu_mpi_node${node}_recv.log | wc -l)
  52. # lines2=$(( lines + lines ))
  53. # echo $lines2
  54. # lines3=$(( lines2 + lines ))
  55. # echo $lines3
  56. #done
  57. # for each pair of nodes, check tags are sent and received in the same order
  58. for src in $nodes
  59. do
  60. for dst in $nodes
  61. do
  62. if test $src != $dst
  63. then
  64. grep ":$dst:42:" $DIR/starpu_mpi_node${src}_send.log| awk -F':' '{print $6}' > $DIR/node${src}_send_to_${dst}.log
  65. grep ":$src:42:" $DIR/starpu_mpi_node${dst}_recv.log|awk -F ':' '{print $6}'> $DIR/node${dst}_recv_from_${src}.log
  66. diff --side-by-side --suppress-common-lines $DIR/node${src}_send_to_${dst}.log $DIR/node${dst}_recv_from_${src}.log > $DIR/check_$$
  67. if test -s $DIR/check_$$
  68. then
  69. echo $src $dst
  70. less $DIR/check_$$
  71. fi
  72. fi
  73. done
  74. done
  75. # check each envelope reception is followed by the appropriate data reception
  76. # first line: MPI_Recv of the envelope
  77. # second line: display envelope information
  78. # third line: MPI_Recv of the data
  79. for node in $nodes
  80. do
  81. echo processing $DIR/starpu_mpi_node${node}_recv.log
  82. (
  83. while read line
  84. do
  85. read line2
  86. read line3
  87. #echo processing
  88. tag2=$(echo $line2 | awk -F ':' '{print $6}')
  89. tag3=$(echo $line3 | awk -F ':' '{print $6}')
  90. if test "$tag2" != "$tag3"
  91. then
  92. echo erreur
  93. echo $tag2 $tag3
  94. echo $line
  95. echo $line2
  96. echo $line3
  97. fi
  98. done
  99. ) < $DIR/starpu_mpi_node${node}_recv.log
  100. done