|
@@ -1,156 +0,0 @@
|
|
|
-#include <starpu.h>
|
|
|
-#include <assert.h>
|
|
|
-#include <sys/time.h>
|
|
|
-#include <string.h>
|
|
|
-
|
|
|
-/* size of the buffer used for bandwith measurement */
|
|
|
-#define SIZE 32*1024*1024*sizeof(char)
|
|
|
-
|
|
|
-#define NITER 32
|
|
|
-
|
|
|
-double cudadev_timing_htod[MAXCUDADEVS] = {0.0};
|
|
|
-double cudadev_timing_dtoh[MAXCUDADEVS] = {0.0};
|
|
|
-
|
|
|
-void measure_bandwith_between_host_and_dev(int dev)
|
|
|
-{
|
|
|
- /* Initiliaze CUDA context on the device */
|
|
|
- cudaSetDevice(dev);
|
|
|
-
|
|
|
- /* hack to force the initialization */
|
|
|
- cudaFree(0);
|
|
|
-
|
|
|
- /* Allocate a buffer on the device */
|
|
|
- unsigned char *d_buffer;
|
|
|
- cudaMalloc((void **)&d_buffer, SIZE);
|
|
|
- assert(d_buffer);
|
|
|
-
|
|
|
- /* Allocate a buffer on the host */
|
|
|
- unsigned char *h_buffer;
|
|
|
- cudaHostAlloc((void **)&h_buffer, SIZE, 0);
|
|
|
- assert(h_buffer);
|
|
|
-
|
|
|
- /* Fill them */
|
|
|
- memset(h_buffer, 0, SIZE);
|
|
|
- cudaMemset(d_buffer, 0, SIZE);
|
|
|
-
|
|
|
- unsigned iter;
|
|
|
- double timing;
|
|
|
- struct timeval start;
|
|
|
- struct timeval end;
|
|
|
-
|
|
|
- /* Measure upload bandwith */
|
|
|
- gettimeofday(&start, NULL);
|
|
|
- for (iter = 0; iter < NITER; iter++)
|
|
|
- {
|
|
|
- cudaMemcpy(d_buffer, h_buffer, SIZE, cudaMemcpyHostToDevice);
|
|
|
- cudaThreadSynchronize();
|
|
|
- }
|
|
|
- gettimeofday(&end, NULL);
|
|
|
- timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
|
|
|
-
|
|
|
- cudadev_timing_htod[dev] = timing/NITER;
|
|
|
-
|
|
|
- /* Measure download bandwith */
|
|
|
- gettimeofday(&start, NULL);
|
|
|
- for (iter = 0; iter < NITER; iter++)
|
|
|
- {
|
|
|
- cudaMemcpy(h_buffer, d_buffer, SIZE, cudaMemcpyDeviceToHost);
|
|
|
- cudaThreadSynchronize();
|
|
|
- }
|
|
|
- gettimeofday(&end, NULL);
|
|
|
- timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
|
|
|
-
|
|
|
- cudadev_timing_dtoh[dev] = timing/NITER;
|
|
|
-
|
|
|
- /* Free buffers */
|
|
|
- cudaFreeHost(h_buffer);
|
|
|
- cudaFree(d_buffer);
|
|
|
-
|
|
|
- cudaThreadExit();
|
|
|
-}
|
|
|
-
|
|
|
-#define MAXNODES 16
|
|
|
-
|
|
|
-int main(int argc, char **argv)
|
|
|
-{
|
|
|
- int ncuda;
|
|
|
- cudaGetDeviceCount(&ncuda);
|
|
|
-
|
|
|
- fprintf(stderr, "FOUD %d devices\n", ncuda);
|
|
|
-
|
|
|
- int i, j;
|
|
|
- for (i = 0; i < ncuda; i++)
|
|
|
- {
|
|
|
- /* measure bandwith between Host and Device i */
|
|
|
- measure_bandwith_between_host_and_dev(i);
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "\n\nLatency Matrix\n\n");
|
|
|
-
|
|
|
- fprintf(stderr, "{\n");
|
|
|
- for (i = 0; i < MAXNODES; i++)
|
|
|
- {
|
|
|
- fprintf(stderr, "\t{");
|
|
|
- for (j = 0; j < MAXNODES; j++)
|
|
|
- {
|
|
|
- double latency;
|
|
|
-
|
|
|
- if ((i > ncuda) || (j > ncuda))
|
|
|
- {
|
|
|
- /* convention */
|
|
|
- latency = -1.0;
|
|
|
- }
|
|
|
- else if (i == j)
|
|
|
- {
|
|
|
- latency = 0.0;
|
|
|
- }
|
|
|
- else {
|
|
|
- latency = ((i && j)?2000.0:500.0);
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "%.2f%s", latency, ((j != (MAXNODES -1)?", ":"")));
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
|
|
|
- }
|
|
|
- fprintf(stderr, "};\n");
|
|
|
-
|
|
|
- fprintf(stderr, "\n\nBandwith Matrix\n\n");
|
|
|
-
|
|
|
- fprintf(stderr, "{\n");
|
|
|
- for (i = 0; i < MAXNODES; i++)
|
|
|
- {
|
|
|
- fprintf(stderr, "\t{");
|
|
|
- for (j = 0; j < MAXNODES; j++)
|
|
|
- {
|
|
|
- double bandwith;
|
|
|
-
|
|
|
- if ((i > ncuda) || (j > ncuda))
|
|
|
- {
|
|
|
- bandwith = -1.0;
|
|
|
- }
|
|
|
- else if (i != j)
|
|
|
- {
|
|
|
- /* Bandwith = (SIZE)/(time i -> ram + time ram -> j)*/
|
|
|
- double time_i_to_ram = (i==0)?0.0:cudadev_timing_dtoh[i-1];
|
|
|
- double time_ram_to_j = (j==0)?0.0:cudadev_timing_htod[j-1];
|
|
|
-
|
|
|
- double timing = time_i_to_ram + time_ram_to_j;
|
|
|
-
|
|
|
- bandwith = 1.0*SIZE/timing;
|
|
|
- }
|
|
|
- else {
|
|
|
- /* convention */
|
|
|
- bandwith = 0.0;
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "%.2f%s", bandwith, ((j != (MAXNODES -1)?", ":"")));
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
|
|
|
- }
|
|
|
-
|
|
|
- fprintf(stderr, "};\n");
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|