cpu_cuda_mult.jl 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. include("../../src/Compiler/include.jl")
  17. starpu_new_cpu_kernel_file("../build/generated_cpu_mult.c")
  18. starpu_new_cuda_kernel_file("../build/generated_cuda_mult.cu")
  19. @cpu_cuda_kernel function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Void
  20. width_m2 :: Int64 = width(m2)
  21. height_m1 :: Int64 = height(m1)
  22. width_m1 :: Int64 = width(m1)
  23. A ::Float64 = abs(-4.0)
  24. @indep for j in (1 : width_m2)
  25. @indep for i in (1 : height_m1)
  26. sum :: Float32 = 0.
  27. for k in (1 : width_m1)
  28. sum = sum + m1[i, k] * m2[k, j]
  29. end
  30. m3[i, j] = sum
  31. end
  32. end
  33. end
  34. compile_cpu_kernels("../build/generated_cpu_mult.so")
  35. compile_cuda_kernels("../build/generated_cuda_mult.so")
  36. combine_kernel_files("../build/generated_tasks.so", ["../build/generated_cpu_mult.so", "../build/generated_cuda_mult.so"])