cpu_cuda_mult.jl 910 B

123456789101112131415161718192021222324252627282930
  1. include("../../src/Compiler/include.jl")
  2. starpu_new_cpu_kernel_file("../build/generated_cpu_mult.c")
  3. starpu_new_cuda_kernel_file("../build/generated_cuda_mult.cu")
  4. @cpu_cuda_kernel function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Void
  5. width_m2 :: Int64 = width(m2)
  6. height_m1 :: Int64 = height(m1)
  7. width_m1 :: Int64 = width(m1)
  8. A ::Float64 = abs(-4.0)
  9. @indep for j in (1 : width_m2)
  10. @indep for i in (1 : height_m1)
  11. sum :: Float32 = 0.
  12. for k in (1 : width_m1)
  13. sum = sum + m1[i, k] * m2[k, j]
  14. end
  15. m3[i, j] = sum
  16. end
  17. end
  18. end
  19. compile_cpu_kernels("../build/generated_cpu_mult.so")
  20. compile_cuda_kernels("../build/generated_cuda_mult.so")
  21. combine_kernel_files("../build/generated_tasks.so", ["../build/generated_cpu_mult.so", "../build/generated_cuda_mult.so"])