cpu_cuda_nbody.jl 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. include("../../src/Compiler/include.jl")
  17. starpu_new_cpu_kernel_file("../build/generated_cpu_nbody.c")
  18. starpu_new_cuda_kernel_file("../build/generated_cuda_nbody.cu")
  19. @cpu_cuda_kernel function nbody_acc(positions ::Matrix{Float64}, accelerations ::Matrix{Float64}, masses ::Vector{Float64}, parameters ::Vector{Float64}, sliceID ::Vector{Int64}) ::Void
  20. widthp ::Int64 = width(positions)
  21. widtha ::Int64 = width(accelerations)
  22. @indep for plan = 1:widtha
  23. sumaccx ::Float64 = 0
  24. sumaccy ::Float64 = 0
  25. for oplan = 1:widthp
  26. eps ::Float64 = parameters[3]
  27. Id ::Int64 = sliceID[1]*widtha
  28. G ::Float64 = parameters[1]
  29. b ::Int64 = ((plan + Id) >= oplan) + ((plan + Id) <= oplan)
  30. if (b < 2)
  31. dx ::Float64 = positions[1, oplan] - positions[1, plan + Id]
  32. dy ::Float64 = positions[2, oplan] - positions[2, plan + Id]
  33. modul ::Float64= sqrt(dx *dx + dy * dy)
  34. sumaccx = sumaccx + (G * masses[oplan] * dx) / ((modul + eps) * (modul + eps) * (modul + eps))
  35. sumaccy = sumaccy + (G * masses[oplan] * dy) / ((modul + eps) * (modul + eps) * (modul + eps))
  36. # sumaccx = sumaccx + (G * masses[oplan]) * (dx / sqrt(dx * dx + dy * dy)) / (dx * dx + dy * dy + eps)
  37. # sumaccy = sumaccy + (G * masses[oplan]) * (dy / sqrt(dx * dx + dy * dy)) / (dy * dy + dx * dx + eps)
  38. end
  39. end
  40. accelerations[1, plan] = sumaccx
  41. accelerations[2, plan] = sumaccy
  42. end
  43. end
  44. @cpu_cuda_kernel function nbody_updt(positions ::Matrix{Float64}, velocities ::Matrix{Float64}, accelerations ::Matrix{Float64}, parameters ::Vector{Float64}) ::Void
  45. widthp ::Int64 = width(positions)
  46. @indep for i = 1:widthp
  47. velocities[1, i] = velocities[1, i] + accelerations[1, i] * parameters[2]
  48. velocities[2, i] = velocities[2, i] + accelerations[2, i] * parameters[2]
  49. positions[1, i] = positions[1, i] + velocities[1, i] * parameters[2]
  50. positions[2, i] = positions[2, i] + velocities[2, i] * parameters[2]
  51. end
  52. end
  53. compile_cpu_kernels("../build/generated_cpu_nbody.so")
  54. compile_cuda_kernels("../build/generated_cuda_nbody.so")
  55. combine_kernel_files("../build/generated_tasks_nbody.so", ["../build/generated_cpu_nbody.so", "../build/generated_cuda_nbody.so"])