123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- import sys
- import os
- import subprocess
- import string
- import random
- def malloc_host_data(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays):
- cudaoutfile.write("int size="+str(size_of_arrays)+";\n")
- cudaoutfile.write("int intBytes = size*sizeof(int);\n")
- cudaoutfile.write("int floatBytes = size*sizeof(float);\n")
- for i in range(num_int_arrays):
- cudaoutfile.write("int *A"+str(i)+";\n")
- cudaoutfile.write("A"+str(i)+" = (int *)malloc(intBytes);\n")
- for i in range(num_float_arrays):
- cudaoutfile.write("float *B"+str(i)+";\n")
- cudaoutfile.write("B"+str(i)+" = (float *)malloc(floatBytes);\n")
- coutfile.write("int size="+str(size_of_arrays)+";\n")
- coutfile.write("int intBytes = size*sizeof(int);\n")
- coutfile.write("int floatBytes = size*sizeof(float);\n")
- for i in range(num_int_arrays):
- coutfile.write("int *A"+str(i)+";\n")
- coutfile.write("A"+str(i)+" = (int *)malloc(intBytes);\n")
- for i in range(num_float_arrays):
- coutfile.write("float *B"+str(i)+";\n")
- coutfile.write("B"+str(i)+" = (float *)malloc(floatBytes);\n")
- def init_arrays(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays):
- cudaoutfile.write("for(int i=0;i<"+str(size_of_arrays)+";i++){\n")
- coutfile.write("for(int i=0;i<"+str(size_of_arrays)+";i++){\n")
- for i in range(num_int_arrays):
- operation = random.randrange(2)
- if operation == 0:
- line = "A"+str(i)+"[i] = "+str(random.randrange(100))+"+i+1;\n"
- cudaoutfile.write(line)
- coutfile.write(line)
- if operation == 1:
- line = "A"+str(i)+"[i] = "+str(random.randrange(100))+"*i+1;\n"
- cudaoutfile.write(line)
- coutfile.write(line)
- for i in range(num_float_arrays):
- operation = random.randrange(2)
- if operation == 0:
- line = "B"+str(i)+"[i] = "+str(random.uniform(0,100))+"+i+1;\n"
- cudaoutfile.write(line)
- coutfile.write(line)
- if operation == 1:
- line = "B"+str(i)+"[i] = "+str(random.uniform(0,100))+"*i+1;\n"
- cudaoutfile.write(line)
- coutfile.write(line)
- cudaoutfile.write("}\n")
- coutfile.write("}\n")
- def const_init_arrays_for_static_dataset(outfile, num_int_arrays, num_float_arrays, size_of_arrays):
- for i in range(num_int_arrays):
- outfile.write("int A"+str(i)+"[10];\n")
- for i in range(num_float_arrays):
- outfile.write("float B"+str(i)+"[10];\n")
- def copy_data_to_device(outfile, num_int_arrays, num_float_arrays, size_of_arrays):
-
-
-
- for i in range(num_int_arrays):
- outfile.write("int *d_A"+str(i)+";\n")
- outfile.write("cudaMalloc((void **)&d_A"+str(i)+","+str(size_of_arrays)+"*sizeof(int));\n")
- outfile.write("cudaMemcpy(d_A"+str(i)+",A"+str(i)+","+str(size_of_arrays)+"*sizeof(int),cudaMemcpyHostToDevice);\n")
- for i in range(num_float_arrays):
- outfile.write("float *d_B"+str(i)+";\n")
- outfile.write("cudaMalloc((void **)&d_B"+str(i)+","+str(size_of_arrays)+"*sizeof(float));\n")
- outfile.write("cudaMemcpy(d_B"+str(i)+",B"+str(i)+","+str(size_of_arrays)+"*sizeof(float),cudaMemcpyHostToDevice);\n")
- def make_kernel(cudaoutfile, coutfile, cloutfile, num_int_arrays, num_float_arrays, size_of_arrays):
- line_gpu = "__global__ void kernel_gpu"
- line_cpu = "void kernel_cpu"
- line = "("
- for i in range(num_int_arrays):
- line = line + "int *A"+str(i)+","
- for i in range(num_float_arrays):
- line = line + "float *B"+str(i)+","
-
- kernel_body = ""
-
- if num_int_arrays > 1:
- output_int_arrays = random.randrange(num_int_arrays-1)+1
- elif num_int_arrays == 1:
- output_int_arrays = 1
- else:
- output_int_arrays = 0
- if num_float_arrays > 1:
- output_float_arrays = random.randrange(num_float_arrays-1)+1
- elif num_float_arrays == 1:
- output_float_arrays = 1
- else:
- output_float_arrays = 0
- for i in range(output_int_arrays):
- kernel_body = kernel_body + "A"+str(i)+"[i] = "
- input_int_arrays = random.randrange(4)+1
- for j in range(input_int_arrays):
- if num_int_arrays > 1:
- out_array = random.randrange(num_int_arrays-output_int_arrays)
- else:
- out_array = -1;
- kernel_body = kernel_body + "A"+str(output_int_arrays+out_array)+"[i]"
- operation = random.randrange(4)
- if operation == 0:
- kernel_body = kernel_body+"+"
- if operation == 1:
- kernel_body = kernel_body+"-"
- if operation == 2:
- kernel_body = kernel_body+"*"
- if operation == 3:
- kernel_body = kernel_body+"/"
- kernel_body = kernel_body[:-1]+";\n"
- for i in range(output_float_arrays):
- kernel_body = kernel_body + "B"+str(i)+"[i] = "
- input_float_arrays = random.randrange(4)+1
- for j in range(input_float_arrays):
- if num_float_arrays > 1:
- out_array = random.randrange(num_float_arrays-output_float_arrays)
- else:
- out_array = -1;
- kernel_body = kernel_body + "B"+str(output_float_arrays+out_array)+"[i]"
- operation = random.randrange(4)
- if operation == 0:
- kernel_body = kernel_body+"+"
- if operation == 1:
- kernel_body = kernel_body+"-"
- if operation == 2:
- kernel_body = kernel_body+"*"
- if operation == 3:
- kernel_body = kernel_body+"/"
- kernel_body = kernel_body[:-1]+";\n"
-
- cudaoutfile.write(line_gpu+line+"int N){\n")
- cudaoutfile.write("int i = blockIdx.x * blockDim.x + threadIdx.x;\n")
- cudaoutfile.write("if (i < N){\n")
- cudaoutfile.write(kernel_body)
- cudaoutfile.write("}\n")
- cudaoutfile.write("}\n")
- cudaoutfile.write("\n")
-
-
- cudaoutfile.write(line_cpu+line+"int N){\n")
- cudaoutfile.write("for(int i=0;i<N;i++){\n")
- cudaoutfile.write(kernel_body)
- cudaoutfile.write("}\n")
- cudaoutfile.write("}\n")
- coutfile.write(line_cpu+line+"int N){\n")
- coutfile.write("for(int i=0;i<N;i++){\n")
- coutfile.write(kernel_body)
- coutfile.write("}\n")
- coutfile.write("}\n")
- cloutfile.write("for(int i=0;i<N;i++){\n")
- cloutfile.write(kernel_body)
- cloutfile.write("}\n")
- def call_kernels(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays):
-
- cudaoutfile.write("int n = "+str(int(size_of_arrays/256)+1)+"*256;\n")
- cudaoutfile.write("int block_size = 256;\n")
- cudaoutfile.write("int block_no = n/block_size;\n")
- cudaoutfile.write("struct timeval time0,time1;\n")
- cudaoutfile.write("gettimeofday(&time0,NULL);\n")
- line = "kernel_gpu<<<block_no,block_size>>>("
- for i in range(num_int_arrays):
- line = line + "d_A"+str(i)+","
- for i in range(num_float_arrays):
- line = line + "d_B"+str(i)+","
- cudaoutfile.write(line+str(size_of_arrays)+");\n")
- cudaoutfile.write("cudaThreadSynchronize();\n")
- cudaoutfile.write("gettimeofday(&time1,NULL);\n")
- cudaoutfile.write("double totaltime10 = (time1.tv_sec*1000000.0 + time1.tv_usec) - (time0.tv_sec*1000000.0 + time0.tv_usec);\n")
- cudaoutfile.write("fprintf(stderr, \"GPU time: %lf msecs \", (totaltime10)/1000.0F);\n")
-
- cudaoutfile.write("gettimeofday(&time0,NULL);\n")
- coutfile.write("struct timeval time0,time1;\n")
- coutfile.write("gettimeofday(&time0,NULL);\n")
-
- coutfile.write("FILE *file_for_block_of_interest = fopen(\"./profile_in_block.txt\",\"w\");\n")
- coutfile.write("if(file_for_block_of_interest) {\n")
- coutfile.write("char Buf[2] = \"1\";\n")
- coutfile.write("fwrite(Buf, 1, 1, file_for_block_of_interest);\n")
- coutfile.write("fclose(file_for_block_of_interest);}\n")
- line = "kernel_cpu("
- for i in range(num_int_arrays):
- line = line + "A"+str(i)+","
- for i in range(num_float_arrays):
- line = line + "B"+str(i)+","
- cudaoutfile.write(line+str(size_of_arrays)+");\n")
- coutfile.write(line+str(size_of_arrays)+");\n")
-
- coutfile.write("file_for_block_of_interest = fopen(\"./profile_in_block.txt\",\"w\");\n")
- coutfile.write("if(file_for_block_of_interest) {\n")
- coutfile.write("char Buf[2] = \"0\";\n")
- coutfile.write("fwrite(Buf, 1, 1, file_for_block_of_interest);\n")
- coutfile.write("fclose(file_for_block_of_interest);}\n")
- cudaoutfile.write("gettimeofday(&time1,NULL);\n")
- cudaoutfile.write("totaltime10 = (time1.tv_sec*1000000.0 + time1.tv_usec) - (time0.tv_sec*1000000.0 + time0.tv_usec);\n")
- cudaoutfile.write("fprintf(stderr, \"CPU time: %lf msecs \", (totaltime10)/1000.0F);\n")
- coutfile.write("gettimeofday(&time1,NULL);\n")
- coutfile.write("double totaltime10 = (time1.tv_sec*1000000.0 + time1.tv_usec) - (time0.tv_sec*1000000.0 + time0.tv_usec);\n")
- coutfile.write("fprintf(stderr, \"CPU time: %lf msecs \", (totaltime10)/1000.0F); \n")
- def make_tests(outfile, num_int_arrays, num_float_arrays, size_of_arrays):
-
-
-
- for i in range(num_int_arrays):
- outfile.write("int *testA"+str(i)+";\n")
- outfile.write("testA"+str(i)+" = (int *)malloc(intBytes);\n")
- outfile.write("cudaMemcpy(testA"+str(i)+",d_A"+str(i)+","+str(size_of_arrays)+"*sizeof(int),cudaMemcpyDeviceToHost);\n")
- for i in range(num_float_arrays):
- outfile.write("float *testB"+str(i)+";\n")
- outfile.write("testB"+str(i)+" = (float *)malloc(floatBytes);\n")
- outfile.write("cudaMemcpy(testB"+str(i)+",d_B"+str(i)+","+str(size_of_arrays)+"*sizeof(int),cudaMemcpyDeviceToHost);\n")
- outfile.write("for(int i=0;i<"+str(size_of_arrays)+";i++){\n")
- for i in range(num_int_arrays):
- outfile.write("if (A"+str(i)+"[i] != testA"+str(i)+"[i]) {\n")
- outfile.write("printf(\"Invalid kernel \");\n")
- outfile.write("break;}\n")
-
-
-
-
- outfile.write("}\n")
- def free_pointers(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays):
- for i in range(num_int_arrays):
- coutfile.write("free(A"+str(i)+");\n")
- cudaoutfile.write("free(A"+str(i)+");\n")
- cudaoutfile.write("free(testA"+str(i)+");\n")
- cudaoutfile.write("cudaFree(d_A"+str(i)+");\n")
- for i in range(num_float_arrays):
- coutfile.write("free(B"+str(i)+");\n")
- cudaoutfile.write("free(B"+str(i)+");\n")
- cudaoutfile.write("free(testB"+str(i)+");\n")
- cudaoutfile.write("cudaFree(d_B"+str(i)+");\n")
- num_of_programs = sys.argv[1]
- output_directory = sys.argv[2]
- for i in range (int(num_of_programs)):
- cudafile = output_directory+"/cudatest"+str(i)+".cu"
- cfile = output_directory+"/ctest"+str(i)+".c"
- clfile = output_directory+"/test"+str(i)+".cl"
- with open(cudafile,"a") as cudaoutfile:
- with open(cfile,"a") as coutfile:
- with open(clfile,"a") as cloutfile:
- cudaoutfile.write("#include <iostream>\n")
- cudaoutfile.write("#include <cuda.h>\n")
- cudaoutfile.write("#include <stdio.h>\n")
- cudaoutfile.write("#include <stdlib.h>\n")
- cudaoutfile.write("#include <sys/time.h>\n")
- cudaoutfile.write("using namespace std;\n")
- cudaoutfile.write("\n")
- coutfile.write("#include <stdio.h>\n")
- coutfile.write("#include <stdlib.h>\n")
- coutfile.write("#include <sys/time.h>\n")
-
- coutfile.write("\n")
- num_int_arrays = random.randrange(10)
- num_float_arrays = random.randrange(10)
- size_of_arrays = random.randrange(1000000)
- cloutfile.write("__kernel void memset_kernel(){\n")
- const_init_arrays_for_static_dataset(cloutfile, num_int_arrays,num_float_arrays,size_of_arrays)
-
- make_kernel(cudaoutfile, coutfile, cloutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- cudaoutfile.write("\n")
- cudaoutfile.write("int main(int argc,char **argv) {\n")
- coutfile.write("\n")
- coutfile.write("int main(int argc,char **argv) {\n")
- cudaoutfile.write("fprintf(stderr, \""+str(i)+" \");\n")
- malloc_host_data(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- init_arrays(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- copy_data_to_device(cudaoutfile, num_int_arrays,num_float_arrays,size_of_arrays)
-
- call_kernels(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- make_tests(cudaoutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- free_pointers(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays)
- cudaoutfile.write("printf(\"\\n\");")
- coutfile.write("printf(\"\\n\");")
- cudaoutfile.write("return 0; }")
- coutfile.write("return 0; }")
- cloutfile.write("}")
|