|
@@ -144,7 +144,145 @@ function add_device_to_interval_call(expr :: StarpuExpr)
|
|
|
return apply(func_to_apply, expr)
|
|
|
end
|
|
|
|
|
|
+function translate_cublas(expr :: StarpuExpr)
|
|
|
+ function func_to_run(x :: StarpuExpr)
|
|
|
+ # STARPU_BLAS => (CUBLAS, TRANS, FILLMODE, ALPHA, SIDE, DIAG)
|
|
|
+ blas_to_cublas = Dict(:STARPU_SGEMM => (:cublasSgemm, [1, 2], [], [6, 11], [], []),
|
|
|
+ :STARPU_DGEMM => (:cublasDgemm, [1, 2], [], [6, 11], [], []),
|
|
|
+ :STARPU_SGEMV => (:cublasSgemv, [1], [], [4,9], [], []),
|
|
|
+ :STARPU_DGEMV => (:cublasDgemv, [1], [], [4,9], [], []),
|
|
|
+ :STARPU_SSCAL => (:cublasSscal, [], [], [2], [], []),
|
|
|
+ :STARPU_DSCAL => (:cublasDscal, [], [], [2], [], []),
|
|
|
+ :STARPU_STRSM => (:cublasStrsm, [3], [2], [7,10], [1], [4]),
|
|
|
+ :STARPU_DTRSM => (:cublasDtrsm, [3], [2], [7,10], [1], [4]),
|
|
|
+ :STARPU_SSYR => (:cublasSsyr, [], [1], [3], [], []),
|
|
|
+ :STARPU_SSYRK => (:cublasSsyrk, [2], [1], [5,8], [], []),
|
|
|
+ :STARPU_SGER => (:cublasSger, [], [], [3], [], []),
|
|
|
+ :STARPU_DGER => (:cublasDger, [], [], [3], [], []),
|
|
|
+ :STARPU_STRSV => (:cublasStrsv, [2], [1], [], [], [3]),
|
|
|
+ :STARPU_STRMM => (:cublasStrmm, [3], [2], [7], [1], [4]),
|
|
|
+ :STARPU_DTRMM => (:cublasDtrmm, [3], [2], [7], [1], [4]),
|
|
|
+ :STARPU_STRMV => (:cublasStrmv, [2], [1], [], [], [3]),
|
|
|
+ :STARPU_SAXPY => (:cublasSaxpy, [], [], [2], [], []),
|
|
|
+ :STARPU_DAXPY => (:cublasDaxpy, [], [], [2], [], []),
|
|
|
+ :STARPU_SSWAP => (:cublasSswap, [], [], [], [], []),
|
|
|
+ :STARPU_DSWAP => (:cublasDswap, [], [], [], [], []))
|
|
|
+
|
|
|
+ if !(isa(x, StarpuExprCall) && x.func in keys(blas_to_cublas))
|
|
|
+ return x
|
|
|
+ end
|
|
|
+
|
|
|
+ new_args = x.args
|
|
|
+
|
|
|
+ # cublasOperation_t parameters (e.g. StarpuExprValue("N")
|
|
|
+ for i in blas_to_cublas[x.func][2]
|
|
|
+ if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String)
|
|
|
+ error("Argument $i of ", x.func, " must be a string")
|
|
|
+ end
|
|
|
+
|
|
|
+ value = new_args[i].value
|
|
|
+
|
|
|
+ if value == "N" || value == "n"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_OP_N)
|
|
|
+ elseif value == "T" || value == "t"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_OP_T)
|
|
|
+ elseif value == "C" || value == "c"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_OP_C)
|
|
|
+ else
|
|
|
+ error("Unhandled value for rgument $i of ", x.func, ": ", value,
|
|
|
+ "expecting (\"N\", \"T\", or \"C\")")
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ # cublasFillMode_t parameters (e.g. StarpuExprValue("L")
|
|
|
+ for i in blas_to_cublas[x.func][3]
|
|
|
+ if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String)
|
|
|
+ error("Argument $i of ", x.func, " must be a string")
|
|
|
+ end
|
|
|
|
|
|
+ value = new_args[i].value
|
|
|
+
|
|
|
+ if value == "L" || value == "l"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_LOWER)
|
|
|
+ elseif value == "U" || value == "u"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_UPPER)
|
|
|
+ else
|
|
|
+ error("Unhandled value for rgument $i of ", x.func, ": ", value,
|
|
|
+ "expecting (\"L\" or \"U\")")
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ # scalar parameters (alpha, beta, ...): alpha -> &alpha
|
|
|
+ for i in blas_to_cublas[x.func][4]
|
|
|
+ if !isa(new_args[i], StarpuExprVar)
|
|
|
+ error("Argument $i of ", x.func, " must be a variable")
|
|
|
+ end
|
|
|
+ var_name = new_args[i].name
|
|
|
+ new_args[i] = StarpuExprVar(Symbol("&$var_name"))
|
|
|
+ end
|
|
|
+
|
|
|
+ # cublasSideMode_t parameters (e.g. StarpuExprValue("L")
|
|
|
+ for i in blas_to_cublas[x.func][5]
|
|
|
+ if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String)
|
|
|
+ error("Argument $i of ", x.func, " must be a string")
|
|
|
+ end
|
|
|
+
|
|
|
+ value = new_args[i].value
|
|
|
+
|
|
|
+ if value == "L" || value == "l"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_SIDE_LEFT)
|
|
|
+ elseif value == "R" || value == "r"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_SIDE_RIGHT)
|
|
|
+ else
|
|
|
+ error("Unhandled value for rgument $i of ", x.func, ": ", value,
|
|
|
+ "expecting (\"L\" or \"R\")")
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ # cublasSideMode_t parameters (e.g. StarpuExprValue("L")
|
|
|
+ for i in blas_to_cublas[x.func][5]
|
|
|
+ if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String)
|
|
|
+ error("Argument $i of ", x.func, " must be a string")
|
|
|
+ end
|
|
|
+
|
|
|
+ value = new_args[i].value
|
|
|
+
|
|
|
+ if value == "L" || value == "l"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_SIDE_LEFT)
|
|
|
+ elseif value == "R" || value == "r"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_SIDE_RIGHT)
|
|
|
+ else
|
|
|
+ error("Unhandled value for rgument $i of ", x.func, ": ", value,
|
|
|
+ "expecting (\"L\" or \"R\")")
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ # cublasDiag_Typet parameters (e.g. StarpuExprValue("N")
|
|
|
+ for i in blas_to_cublas[x.func][6]
|
|
|
+ if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String)
|
|
|
+ error("Argument $i of ", x.func, " must be a string")
|
|
|
+ end
|
|
|
+
|
|
|
+ value = new_args[i].value
|
|
|
+
|
|
|
+ if value == "N" || value == "n"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_DIAG_NON_UNIT)
|
|
|
+ elseif value == "U" || value == "u"
|
|
|
+ new_args[i] = StarpuExprVar(:CUBLAS_DIAG_UNIT)
|
|
|
+ else
|
|
|
+ error("Unhandled value for rgument $i of ", x.func, ": ", value,
|
|
|
+ "expecting (\"N\" or \"U\")")
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ new_args = [@parse(starpu_cublas_get_local_handle()), x.args...]
|
|
|
+
|
|
|
+ return StarpuExprBlock([StarpuExprCall(blas_to_cublas[x.func][1], new_args),
|
|
|
+ @parse cudaStreamSynchronize(starpu_cuda_get_local_stream())])
|
|
|
+ end
|
|
|
+
|
|
|
+ return apply(func_to_run, expr)
|
|
|
+end
|
|
|
|
|
|
function transform_to_cuda_kernel(func :: StarpuExprFunction)
|
|
|
|
|
@@ -152,45 +290,48 @@ function transform_to_cuda_kernel(func :: StarpuExprFunction)
|
|
|
|
|
|
init, indep, finish = extract_init_indep_finish(cpu_func.body)
|
|
|
|
|
|
- if indep == nothing
|
|
|
- error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
|
|
|
- end
|
|
|
+ cpu_instr = init
|
|
|
+ kernel = nothing
|
|
|
|
|
|
- prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
|
|
|
+ # Generate a CUDA kernel only if there is an independent loop (@parallel macro).
|
|
|
+ if (indep != nothing)
|
|
|
+ prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
|
|
|
|
|
|
- kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
|
|
|
- prekernel_instr = vcat(init, prekernel_instr)
|
|
|
- kernel_instr = vcat(kernel_instr, indep.body)
|
|
|
+ kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
|
|
|
+ cpu_instr = vcat(cpu_instr, prekernel_instr)
|
|
|
+ kernel_instr = vcat(kernel_instr, indep.body)
|
|
|
|
|
|
- indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
|
|
|
- prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
|
|
|
+ indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
|
|
|
+ prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(cpu_instr), cpu_func.args)
|
|
|
|
|
|
- for undef_var in indep_for_undef
|
|
|
+ for undef_var in indep_for_undef
|
|
|
|
|
|
- found_var = find_variable(undef_var, prekernel_def)
|
|
|
+ found_var = find_variable(undef_var, prekernel_def)
|
|
|
|
|
|
- if found_var == nothing # TODO : error then ?
|
|
|
- continue
|
|
|
+ if found_var == nothing # TODO : error then ?
|
|
|
+ continue
|
|
|
+ end
|
|
|
+
|
|
|
+ push!(kernel_args, found_var)
|
|
|
end
|
|
|
|
|
|
- push!(kernel_args, found_var)
|
|
|
+ call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
|
|
|
+ kernelname=Symbol("KERNEL_",func.func);
|
|
|
+ cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
|
|
|
+ push!(cpu_instr, cuda_call)
|
|
|
+ push!(cpu_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
|
|
|
+ kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr))
|
|
|
+ kernel = add_device_to_interval_call(kernel)
|
|
|
+ kernel = flatten_blocks(kernel)
|
|
|
end
|
|
|
|
|
|
- call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
|
|
|
- kernelname=Symbol("KERNEL_",func.func);
|
|
|
- cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
|
|
|
- push!(prekernel_instr, cuda_call)
|
|
|
- push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
|
|
|
- prekernel_instr = vcat(prekernel_instr, finish)
|
|
|
+ cpu_instr = vcat(cpu_instr, finish)
|
|
|
|
|
|
prekernel_name = Symbol("CUDA_", func.func)
|
|
|
- prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
|
|
|
+ prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, StarpuExprBlock(cpu_instr))
|
|
|
+ prekernel = translate_cublas(prekernel)
|
|
|
prekernel = flatten_blocks(prekernel)
|
|
|
|
|
|
- kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr))
|
|
|
- kernel = add_device_to_interval_call(kernel)
|
|
|
- kernel = flatten_blocks(kernel)
|
|
|
-
|
|
|
return prekernel, kernel
|
|
|
end
|
|
|
|