starpu_mpi_comm_check.sh 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #!/bin/bash
  2. # StarPU --- Runtime system for heterogeneous multicore architectures.
  3. #
  4. # Copyright (C) 2017 CNRS
  5. #
  6. # StarPU is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU Lesser General Public License as published by
  8. # the Free Software Foundation; either version 2.1 of the License, or (at
  9. # your option) any later version.
  10. #
  11. # StarPU is distributed in the hope that it will be useful, but
  12. # WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. #
  15. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. # Script to check MPI communications are done properly
  17. # The application should be launched with STARPU_MPI_COMM=1
  18. # e.g
  19. # $ export STARPU_MPI_COMM=1
  20. # $ mpirun --output-filename starpu_mpi.log appli parameters
  21. # and then the script can be launched with the output files
  22. # $ starpu_mpi_comm_check.sh starpu_mpi.log.*
  23. if test -z "$1"
  24. then
  25. echo Syntax error: parameter missing
  26. exit 1
  27. fi
  28. # Get the nodes identifiers
  29. nodes=$(for f in $*
  30. do
  31. grep starpu_mpi $f | grep '\[' | awk '{print $1}'| sed 's/\[\(.*\)\]\[starpu_mpi\]/\1/' | grep "^[[:digit:]]*$"
  32. done |sort|uniq
  33. )
  34. echo nodes $nodes
  35. DIR=/tmp
  36. # for each node, extract send and receive communications
  37. for node in $nodes
  38. do
  39. for f in $*
  40. do
  41. grep starpu_mpi $f |grep "\[$node"
  42. done > $DIR/starpu_mpi_node$node.log
  43. grep -- "-->" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_send.log
  44. grep -- "<--" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_recv.log
  45. done
  46. # count the number of traced lines
  47. #for node in $nodes
  48. #do
  49. # wc -l $DIR/starpu_mpi_node${node}_recv.log
  50. # lines=$(grep :42:42 $DIR/starpu_mpi_node${node}_recv.log | wc -l)
  51. # lines2=$(( lines + lines ))
  52. # echo $lines2
  53. # lines3=$(( lines2 + lines ))
  54. # echo $lines3
  55. #done
  56. # for each pair of nodes, check tags are sent and received in the same order
  57. for src in $nodes
  58. do
  59. for dst in $nodes
  60. do
  61. if test $src != $dst
  62. then
  63. grep ":$dst:42:" $DIR/starpu_mpi_node${src}_send.log| awk -F':' '{print $6}' > $DIR/node${src}_send_to_${dst}.log
  64. grep ":$src:42:" $DIR/starpu_mpi_node${dst}_recv.log|awk -F ':' '{print $6}'> $DIR/node${dst}_recv_from_${src}.log
  65. diff --side-by-side --suppress-common-lines $DIR/node${src}_send_to_${dst}.log $DIR/node${dst}_recv_from_${src}.log > $DIR/check_$$
  66. if test -s $DIR/check_$$
  67. then
  68. echo $src $dst
  69. less $DIR/check_$$
  70. fi
  71. fi
  72. done
  73. done
  74. # check each envelope reception is followed by the appropriate data reception
  75. # first line: MPI_Recv of the envelope
  76. # second line: display envelope information
  77. # third line: MPI_Recv of the data
  78. for node in $nodes
  79. do
  80. echo processing $DIR/starpu_mpi_node${node}_recv.log
  81. (
  82. while read line
  83. do
  84. read line2
  85. read line3
  86. #echo processing
  87. tag2=$(echo $line2 | awk -F ':' '{print $6}')
  88. tag3=$(echo $line3 | awk -F ':' '{print $6}')
  89. if test "$tag2" != "$tag3"
  90. then
  91. echo erreur
  92. echo $tag2 $tag3
  93. echo $line
  94. echo $line2
  95. echo $line3
  96. fi
  97. done
  98. ) < $DIR/starpu_mpi_node${node}_recv.log
  99. done