import sys import os import subprocess import string import random # it genarates random CPU programs for use with PinTools (.c files), # CUDA programs (READY) (.cu files), OPENCL (NOT READY) programs (.cl files), # CPU+CUDA (.cu files) with kernel time measurement and evaluation of proper operation def malloc_host_data(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays): cudaoutfile.write("int size="+str(size_of_arrays)+";\n") cudaoutfile.write("int intBytes = size*sizeof(int);\n") cudaoutfile.write("int floatBytes = size*sizeof(float);\n") for i in range(num_int_arrays): cudaoutfile.write("int *A"+str(i)+";\n") cudaoutfile.write("A"+str(i)+" = (int *)malloc(intBytes);\n") for i in range(num_float_arrays): cudaoutfile.write("float *B"+str(i)+";\n") cudaoutfile.write("B"+str(i)+" = (float *)malloc(floatBytes);\n") coutfile.write("int size="+str(size_of_arrays)+";\n") coutfile.write("int intBytes = size*sizeof(int);\n") coutfile.write("int floatBytes = size*sizeof(float);\n") for i in range(num_int_arrays): coutfile.write("int *A"+str(i)+";\n") coutfile.write("A"+str(i)+" = (int *)malloc(intBytes);\n") for i in range(num_float_arrays): coutfile.write("float *B"+str(i)+";\n") coutfile.write("B"+str(i)+" = (float *)malloc(floatBytes);\n") def init_arrays(cudaoutfile, coutfile, num_int_arrays, num_float_arrays, size_of_arrays): cudaoutfile.write("for(int i=0;i<"+str(size_of_arrays)+";i++){\n") coutfile.write("for(int i=0;i<"+str(size_of_arrays)+";i++){\n") for i in range(num_int_arrays): operation = random.randrange(2) if operation == 0: line = "A"+str(i)+"[i] = "+str(random.randrange(100))+"+i+1;\n" cudaoutfile.write(line) coutfile.write(line) if operation == 1: line = "A"+str(i)+"[i] = "+str(random.randrange(100))+"*i+1;\n" cudaoutfile.write(line) coutfile.write(line) for i in range(num_float_arrays): operation = random.randrange(2) if operation == 0: line = "B"+str(i)+"[i] = "+str(random.uniform(0,100))+"+i+1;\n" cudaoutfile.write(line) coutfile.write(line) if operation == 1: line = "B"+str(i)+"[i] = "+str(random.uniform(0,100))+"*i+1;\n" cudaoutfile.write(line) coutfile.write(line) cudaoutfile.write("}\n") coutfile.write("}\n") def const_init_arrays_for_static_dataset(outfile, num_int_arrays, num_float_arrays, size_of_arrays): for i in range(num_int_arrays): outfile.write("int A"+str(i)+"[10];\n") for i in range(num_float_arrays): outfile.write("float B"+str(i)+"[10];\n") def copy_data_to_device(outfile, num_int_arrays, num_float_arrays, size_of_arrays): #outfile.write("int size="+str(size_of_arrays)+";\n") #outfile.write("int intBytes = size*sizeof(int);\n") #outfile.write("int floatBytes = size*sizeof(float);\n") for i in range(num_int_arrays): outfile.write("int *d_A"+str(i)+";\n") outfile.write("cudaMalloc((void **)&d_A"+str(i)+","+str(size_of_arrays)+"*sizeof(int));\n") outfile.write("cudaMemcpy(d_A"+str(i)+",A"+str(i)+","+str(size_of_arrays)+"*sizeof(int),cudaMemcpyHostToDevice);\n") for i in range(num_float_arrays): outfile.write("float *d_B"+str(i)+";\n") outfile.write("cudaMalloc((void **)&d_B"+str(i)+","+str(size_of_arrays)+"*sizeof(float));\n") outfile.write("cudaMemcpy(d_B"+str(i)+",B"+str(i)+","+str(size_of_arrays)+"*sizeof(float),cudaMemcpyHostToDevice);\n") def make_kernel(cudaoutfile, coutfile, cloutfile, num_int_arrays, num_float_arrays, size_of_arrays): line_gpu = "__global__ void kernel_gpu" line_cpu = "void kernel_cpu" line = "(" for i in range(num_int_arrays): line = line + "int *A"+str(i)+"," for i in range(num_float_arrays): line = line + "float *B"+str(i)+"," #generate the computation part kernel_body = "" #select output arrays if num_int_arrays > 1: output_int_arrays = random.randrange(num_int_arrays-1)+1 elif num_int_arrays == 1: output_int_arrays = 1 else: output_int_arrays = 0 if num_float_arrays > 1: output_float_arrays = random.randrange(num_float_arrays-1)+1 elif num_float_arrays == 1: output_float_arrays = 1 else: output_float_arrays = 0 for i in range(output_int_arrays): kernel_body = kernel_body + "A"+str(i)+"[i] = " input_int_arrays = random.randrange(4)+1 for j in range(input_int_arrays): if num_int_arrays > 1: out_array = random.randrange(num_int_arrays-output_int_arrays) else: out_array = -1; kernel_body = kernel_body + "A"+str(output_int_arrays+out_array)+"[i]" operation = random.randrange(4) if operation == 0: kernel_body = kernel_body+"+" if operation == 1: kernel_body = kernel_body+"-" if operation == 2: kernel_body = kernel_body+"*" if operation == 3: kernel_body = kernel_body+"/" kernel_body = kernel_body[:-1]+";\n" for i in range(output_float_arrays): kernel_body = kernel_body + "B"+str(i)+"[i] = " input_float_arrays = random.randrange(4)+1 for j in range(input_float_arrays): if num_float_arrays > 1: out_array = random.randrange(num_float_arrays-output_float_arrays) else: out_array = -1; kernel_body = kernel_body + "B"+str(output_float_arrays+out_array)+"[i]" operation = random.randrange(4) if operation == 0: kernel_body = kernel_body+"+" if operation == 1: kernel_body = kernel_body+"-" if operation == 2: kernel_body = kernel_body+"*" if operation == 3: kernel_body = kernel_body+"/" kernel_body = kernel_body[:-1]+";\n" # gpu kernel cudaoutfile.write(line_gpu+line+"int N){\n") cudaoutfile.write("int i = blockIdx.x * blockDim.x + threadIdx.x;\n") cudaoutfile.write("if (i < N){\n") cudaoutfile.write(kernel_body) cudaoutfile.write("}\n") cudaoutfile.write("}\n") cudaoutfile.write("\n") # cpu kernel cudaoutfile.write(line_cpu+line+"int N){\n") cudaoutfile.write("for(int i=0;i\n") cudaoutfile.write("#include \n") cudaoutfile.write("#include \n") cudaoutfile.write("#include \n") cudaoutfile.write("#include \n") cudaoutfile.write("using namespace std;\n") cudaoutfile.write("\n") coutfile.write("#include \n") coutfile.write("#include \n") coutfile.write("#include \n") #coutfile.write("using namespace std;\n") coutfile.write("\n") num_int_arrays = random.randrange(10) num_float_arrays = random.randrange(10) size_of_arrays = random.randrange(1000000) cloutfile.write("__kernel void memset_kernel(){\n") const_init_arrays_for_static_dataset(cloutfile, num_int_arrays,num_float_arrays,size_of_arrays) # 1 for only c, 2 for cuda make_kernel(cudaoutfile, coutfile, cloutfile, num_int_arrays,num_float_arrays,size_of_arrays) cudaoutfile.write("\n") cudaoutfile.write("int main(int argc,char **argv) {\n") coutfile.write("\n") coutfile.write("int main(int argc,char **argv) {\n") cudaoutfile.write("fprintf(stderr, \""+str(i)+" \");\n") malloc_host_data(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays) init_arrays(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays) copy_data_to_device(cudaoutfile, num_int_arrays,num_float_arrays,size_of_arrays) # 1 for only c, 2 for cuda call_kernels(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays) make_tests(cudaoutfile, num_int_arrays,num_float_arrays,size_of_arrays) free_pointers(cudaoutfile, coutfile, num_int_arrays,num_float_arrays,size_of_arrays) cudaoutfile.write("printf(\"\\n\");") coutfile.write("printf(\"\\n\");") cudaoutfile.write("return 0; }") coutfile.write("return 0; }") cloutfile.write("}")