bandwith-cuda.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. #include <starpu.h>
  2. #include <assert.h>
  3. #include <sys/time.h>
  4. #include <string.h>
  5. /* size of the buffer used for bandwith measurement */
  6. #define SIZE 32*1024*1024*sizeof(char)
  7. #define NITER 32
  8. double cudadev_timing_htod[MAXCUDADEVS] = {0.0};
  9. double cudadev_timing_dtoh[MAXCUDADEVS] = {0.0};
  10. void measure_bandwith_between_host_and_dev(int dev)
  11. {
  12. /* Initiliaze CUDA context on the device */
  13. cudaSetDevice(dev);
  14. /* hack to force the initialization */
  15. cudaFree(0);
  16. /* Allocate a buffer on the device */
  17. unsigned char *d_buffer;
  18. cudaMalloc((void **)&d_buffer, SIZE);
  19. assert(d_buffer);
  20. /* Allocate a buffer on the host */
  21. unsigned char *h_buffer;
  22. cudaHostAlloc((void **)&h_buffer, SIZE, 0);
  23. assert(h_buffer);
  24. /* Fill them */
  25. memset(h_buffer, 0, SIZE);
  26. cudaMemset(d_buffer, 0, SIZE);
  27. unsigned iter;
  28. double timing;
  29. struct timeval start;
  30. struct timeval end;
  31. /* Measure upload bandwith */
  32. gettimeofday(&start, NULL);
  33. for (iter = 0; iter < NITER; iter++)
  34. {
  35. cudaMemcpy(d_buffer, h_buffer, SIZE, cudaMemcpyHostToDevice);
  36. cudaThreadSynchronize();
  37. }
  38. gettimeofday(&end, NULL);
  39. timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  40. cudadev_timing_htod[dev] = timing/NITER;
  41. /* Measure download bandwith */
  42. gettimeofday(&start, NULL);
  43. for (iter = 0; iter < NITER; iter++)
  44. {
  45. cudaMemcpy(h_buffer, d_buffer, SIZE, cudaMemcpyDeviceToHost);
  46. cudaThreadSynchronize();
  47. }
  48. gettimeofday(&end, NULL);
  49. timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  50. cudadev_timing_dtoh[dev] = timing/NITER;
  51. /* Free buffers */
  52. cudaFreeHost(h_buffer);
  53. cudaFree(d_buffer);
  54. cudaThreadExit();
  55. }
  56. #define MAXNODES 16
  57. int main(int argc, char **argv)
  58. {
  59. int ncuda;
  60. cudaGetDeviceCount(&ncuda);
  61. fprintf(stderr, "FOUD %d devices\n", ncuda);
  62. int i, j;
  63. for (i = 0; i < ncuda; i++)
  64. {
  65. /* measure bandwith between Host and Device i */
  66. measure_bandwith_between_host_and_dev(i);
  67. }
  68. fprintf(stderr, "\n\nLatency Matrix\n\n");
  69. fprintf(stderr, "{\n");
  70. for (i = 0; i < MAXNODES; i++)
  71. {
  72. fprintf(stderr, "\t{");
  73. for (j = 0; j < MAXNODES; j++)
  74. {
  75. double latency;
  76. if ((i > ncuda) || (j > ncuda))
  77. {
  78. /* convention */
  79. latency = -1.0;
  80. }
  81. else if (i == j)
  82. {
  83. latency = 0.0;
  84. }
  85. else {
  86. latency = ((i && j)?2000.0:500.0);
  87. }
  88. fprintf(stderr, "%.2f%s", latency, ((j != (MAXNODES -1)?", ":"")));
  89. }
  90. fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
  91. }
  92. fprintf(stderr, "};\n");
  93. fprintf(stderr, "\n\nBandwith Matrix\n\n");
  94. fprintf(stderr, "{\n");
  95. for (i = 0; i < MAXNODES; i++)
  96. {
  97. fprintf(stderr, "\t{");
  98. for (j = 0; j < MAXNODES; j++)
  99. {
  100. double bandwith;
  101. if ((i > ncuda) || (j > ncuda))
  102. {
  103. bandwith = -1.0;
  104. }
  105. else if (i != j)
  106. {
  107. /* Bandwith = (SIZE)/(time i -> ram + time ram -> j)*/
  108. double time_i_to_ram = (i==0)?0.0:cudadev_timing_dtoh[i-1];
  109. double time_ram_to_j = (j==0)?0.0:cudadev_timing_htod[j-1];
  110. double timing = time_i_to_ram + time_ram_to_j;
  111. bandwith = 1.0*SIZE/timing;
  112. }
  113. else {
  114. /* convention */
  115. bandwith = 0.0;
  116. }
  117. fprintf(stderr, "%.2f%s", bandwith, ((j != (MAXNODES -1)?", ":"")));
  118. }
  119. fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
  120. }
  121. fprintf(stderr, "};\n");
  122. return 0;
  123. }