Pārlūkot izejas kodu

We don't hardcode the performance of the bus anymore, so that tool is useless
now.

Cédric Augonnet 15 gadi atpakaļ
vecāks
revīzija
0934f728e3
2 mainītis faili ar 0 papildinājumiem un 161 dzēšanām
  1. 0 5
      tools/Makefile.am
  2. 0 156
      tools/bandwith-cuda.c

+ 0 - 5
tools/Makefile.am

@@ -36,8 +36,3 @@ endif
 
 bin_PROGRAMS +=	perfmodel-display
 perfmodel_display_SOURCES = perfmodel-display.c
-
-if USE_CUDA
-bin_PROGRAMS += bandwith-cuda
-bandwith_cuda_SOURCES = bandwith-cuda.c
-endif

+ 0 - 156
tools/bandwith-cuda.c

@@ -1,156 +0,0 @@
-#include <starpu.h>
-#include <assert.h>
-#include <sys/time.h>
-#include <string.h>
-
-/* size of the buffer used for bandwith measurement */
-#define SIZE	32*1024*1024*sizeof(char)
-
-#define NITER	32
-
-double cudadev_timing_htod[MAXCUDADEVS] = {0.0};
-double cudadev_timing_dtoh[MAXCUDADEVS] = {0.0};
-
-void measure_bandwith_between_host_and_dev(int dev)
-{
-	/* Initiliaze CUDA context on the device */
-	cudaSetDevice(dev);
-
-	/* hack to force the initialization */
-	cudaFree(0);
-
-	/* Allocate a buffer on the device */
-	unsigned char *d_buffer;
-	cudaMalloc((void **)&d_buffer, SIZE);
-	assert(d_buffer);
-
-	/* Allocate a buffer on the host */
-	unsigned char *h_buffer;
-	cudaHostAlloc((void **)&h_buffer, SIZE, 0); 
-	assert(h_buffer);
-
-	/* Fill them */
-	memset(h_buffer, 0, SIZE);
-	cudaMemset(d_buffer, 0, SIZE);
-
-	unsigned iter;
-	double timing;
-	struct timeval start;
-	struct timeval end;
-
-	/* Measure upload bandwith */
-	gettimeofday(&start, NULL);
-	for (iter = 0; iter < NITER; iter++)
-	{
-		cudaMemcpy(d_buffer, h_buffer, SIZE, cudaMemcpyHostToDevice);
-		cudaThreadSynchronize();
-	}
-	gettimeofday(&end, NULL);
-	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-
-	cudadev_timing_htod[dev] = timing/NITER;
-
-	/* Measure download bandwith */
-	gettimeofday(&start, NULL);
-	for (iter = 0; iter < NITER; iter++)
-	{
-		cudaMemcpy(h_buffer, d_buffer, SIZE, cudaMemcpyDeviceToHost);
-		cudaThreadSynchronize();
-	}
-	gettimeofday(&end, NULL);
-	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-
-	cudadev_timing_dtoh[dev] = timing/NITER;
-
-	/* Free buffers */
-	cudaFreeHost(h_buffer);
-	cudaFree(d_buffer);
-
-	cudaThreadExit();
-}
-
-#define MAXNODES	16
-
-int main(int argc, char **argv)
-{
-        int ncuda;
-        cudaGetDeviceCount(&ncuda);
-
-	fprintf(stderr, "FOUD %d devices\n", ncuda);
-
-	int i, j;
-	for (i = 0; i < ncuda; i++)
-	{
-		/* measure bandwith between Host and Device i */
-		measure_bandwith_between_host_and_dev(i);
-	}
-
-	fprintf(stderr, "\n\nLatency Matrix\n\n");
-
-	fprintf(stderr, "{\n");
-	for (i = 0; i < MAXNODES; i++)
-	{
-		fprintf(stderr, "\t{");
-		for (j = 0; j < MAXNODES; j++)
-		{
-			double latency;
-
-			if ((i > ncuda) || (j > ncuda))
-			{
-				/* convention */
-				latency = -1.0;
-			}
-			else if (i == j)
-			{
-				latency = 0.0;
-			}
-			else {
-				latency = ((i && j)?2000.0:500.0);
-			}
-	
-			fprintf(stderr, "%.2f%s", latency, ((j != (MAXNODES -1)?", ":"")));
-		}
-
-		fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
-	}
-	fprintf(stderr, "};\n");
-
-	fprintf(stderr, "\n\nBandwith Matrix\n\n");
-
-	fprintf(stderr, "{\n");
-	for (i = 0; i < MAXNODES; i++)
-	{
-		fprintf(stderr, "\t{");
-		for (j = 0; j < MAXNODES; j++)
-		{
-			double bandwith;
-
-			if ((i > ncuda) || (j > ncuda))
-			{
-				bandwith = -1.0;
-			}
-			else if (i != j)
-			{
-				/* Bandwith = (SIZE)/(time i -> ram + time ram -> j)*/
-				double time_i_to_ram = (i==0)?0.0:cudadev_timing_dtoh[i-1];
-				double time_ram_to_j = (j==0)?0.0:cudadev_timing_htod[j-1];
-	
-				double timing = time_i_to_ram + time_ram_to_j;
-	
-				bandwith = 1.0*SIZE/timing;
-			}
-			else {
-				/* convention */
-				bandwith = 0.0;
-			}
-	
-			fprintf(stderr, "%.2f%s", bandwith, ((j != (MAXNODES -1)?", ":"")));
-		}
-
-		fprintf(stderr, "}%s\n", ((i != (MAXNODES - 1))?",":""));
-	}
-
-	fprintf(stderr, "};\n");
-
-	return 0;
-}