Prechádzať zdrojové kódy

add some dirty program to measure the bandwith/latency used by the performance models

Cédric Augonnet 15 rokov pred
rodič
commit
e9b8da269e
2 zmenil súbory, kde vykonal 162 pridanie a 2 odobranie
  1. 6 2
      tools/Makefile.am
  2. 156 0
      tools/bandwith-cuda.c

+ 6 - 2
tools/Makefile.am

@@ -34,6 +34,10 @@ fxt_stats_SOURCES = fxt-stats.c
 fxt_stats_CFLAGS = -I$(top_srcdir)/src/
 endif
 
-bin_PROGRAMS += perfmodel-display
-
+bin_PROGRAMS +=	perfmodel-display
 perfmodel_display_SOURCES = perfmodel-display.c
+
+if USE_CUDA
+bin_PROGRAMS += bandwith-cuda
+bandwith_cuda_SOURCES = bandwith-cuda.c
+endif

+ 156 - 0
tools/bandwith-cuda.c

@@ -0,0 +1,156 @@
+#include <starpu.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <string.h>
+
+/* size of the buffer used for bandwith measurement */
+#define SIZE	32*1024*1024*sizeof(char)
+
+#define NITER	32
+
+double cudadev_timing_htod[MAXCUDADEVS] = {0.0};
+double cudadev_timing_dtoh[MAXCUDADEVS] = {0.0};
+
+void measure_bandwith_between_host_and_dev(int dev)
+{
+	/* Initiliaze CUDA context on the device */
+	cudaSetDevice(dev);
+
+	/* hack to force the initialization */
+	cudaFree(0);
+
+	/* Allocate a buffer on the device */
+	unsigned char *d_buffer;
+	cudaMalloc((void **)&d_buffer, SIZE);
+	assert(d_buffer);
+
+	/* Allocate a buffer on the host */
+	unsigned char *h_buffer;
+	cudaHostAlloc((void **)&h_buffer, SIZE, 0); 
+	assert(h_buffer);
+
+	/* Fill them */
+	memset(h_buffer, 0, SIZE);
+	cudaMemset(d_buffer, 0, SIZE);
+
+	unsigned iter;
+	double timing;
+	struct timeval start;
+	struct timeval end;
+
+	/* Measure upload bandwith */
+	gettimeofday(&start, NULL);
+	for (iter = 0; iter < NITER; iter++)
+	{
+		cudaMemcpy(d_buffer, h_buffer, SIZE, cudaMemcpyHostToDevice);
+		cudaThreadSynchronize();
+	}
+	gettimeofday(&end, NULL);
+	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+	cudadev_timing_htod[dev] = timing/NITER;
+
+	/* Measure download bandwith */
+	gettimeofday(&start, NULL);
+	for (iter = 0; iter < NITER; iter++)
+	{
+		cudaMemcpy(h_buffer, d_buffer, SIZE, cudaMemcpyDeviceToHost);
+		cudaThreadSynchronize();
+	}
+	gettimeofday(&end, NULL);
+	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+	cudadev_timing_dtoh[dev] = timing/NITER;
+
+	/* Free buffers */
+	cudaFreeHost(h_buffer);
+	cudaFree(d_buffer);
+
+	cudaThreadExit();
+}
+
+#define MAXNODES	16
+
+int main(int argc, char **argv)
+{
+        int ncuda;
+        cudaGetDeviceCount(&ncuda);
+
+	fprintf(stderr, "FOUD %d devices\n", ncuda);
+
+	int i, j;
+	for (i = 0; i < ncuda; i++)
+	{
+		/* measure bandwith between Host and Device i */
+		measure_bandwith_between_host_and_dev(i);
+	}
+
+	fprintf(stderr, "\n\nLatency Matrix\n\n");
+
+	fprintf(stderr, "{\n");
+	for (j = 0; j < MAXNODES; j++)
+	{
+		fprintf(stderr, "\t{");
+		for (i = 0; i < MAXNODES; i++)
+		{
+			double latency;
+
+			if ((i > ncuda) || (j > ncuda))
+			{
+				/* convention */
+				latency = -1.0;
+			}
+			else if (i == j)
+			{
+				latency = 0.0;
+			}
+			else {
+				latency = ((i && j)?2000.0:500.0);
+			}
+	
+			fprintf(stderr, "%.2f%s", latency, ((i != (MAXNODES -1)?", ":"")));
+		}
+
+		fprintf(stderr, "}%s\n", ((j != (MAXNODES - 1))?",":""));
+	}
+	fprintf(stderr, "};\n");
+
+	fprintf(stderr, "\n\nBandwith Matrix\n\n");
+
+	fprintf(stderr, "{\n");
+	for (j = 0; j < MAXNODES; j++)
+	{
+		fprintf(stderr, "\t{");
+		for (i = 0; i < MAXNODES; i++)
+		{
+			double bandwith;
+
+			if ((i > ncuda) || (j > ncuda))
+			{
+				bandwith = -1.0;
+			}
+			else if (i != j)
+			{
+				/* Bandwith = (SIZE)/(time i -> ram + time ram -> j)*/
+				double time_i_to_ram = (i==0)?0.0:cudadev_timing_dtoh[i-1];
+				double time_ram_to_j = (j==0)?0.0:cudadev_timing_htod[j-1];
+	
+				double timing = time_i_to_ram + time_ram_to_j;
+	
+				bandwith = 1.0*SIZE/timing;
+			}
+			else {
+				/* convention */
+				bandwith = 0.0;
+			}
+	
+			fprintf(stderr, "%.2f%s", bandwith, ((i != (MAXNODES -1)?", ":"")));
+		}
+
+		fprintf(stderr, "}%s\n", ((j != (MAXNODES - 1))?",":""));
+	}
+
+	fprintf(stderr, "};\n");
+
+	return 0;
+}