vector_scal.jl 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. import Libdl
  17. using StarPU
  18. using LinearAlgebra
  19. @target STARPU_CPU+STARPU_CUDA
  20. @codelet function vector_scal(m::Int32, v :: Vector{Float32}, k :: Float32, l :: Float32) :: Float32
  21. N :: Int32 = length(v)
  22. # Naive version
  23. @parallel for i in (1 : N)
  24. v[i] = v[i] * m + l + k
  25. end
  26. end
  27. starpu_init()
  28. function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32, l :: Float32)
  29. tmin=0
  30. @starpu_block let
  31. hV = starpu_data_register(v)
  32. tmin=0
  33. perfmodel = starpu_perfmodel(
  34. perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
  35. symbol = "history_perf"
  36. )
  37. cl = starpu_codelet(
  38. cpu_func = "vector_scal",
  39. # cuda_func = "vector_scal",
  40. #opencl_func="",
  41. modes = [STARPU_RW],
  42. perfmodel = perfmodel
  43. )
  44. for i in (1 : 1)
  45. t=time_ns()
  46. @starpu_sync_tasks begin
  47. handles = [hV]
  48. task = starpu_task(cl = cl, handles = handles, cl_arg=(m, k, l))
  49. starpu_task_submit(task)
  50. end
  51. # @starpu_sync_tasks for task in (1:1)
  52. # @starpu_async_cl vector_scal(hV, STARPU_RW, [m, k, l])
  53. # end
  54. t=time_ns()-t
  55. if (tmin==0 || tmin>t)
  56. tmin=t
  57. end
  58. end
  59. end
  60. return tmin
  61. end
  62. function check(ref, res, m, k, l)
  63. expected = ref .* m .+ (k+l)
  64. for i in 1:length(expected)
  65. got = res[i]
  66. exp = expected[i]
  67. err = abs(exp - got) / exp
  68. if err > 0.0001
  69. error("[$i] -> $got != $exp (err $err)")
  70. end
  71. end
  72. end
  73. function compute_times(io,start_dim, step_dim, stop_dim)
  74. for size in (start_dim : step_dim : stop_dim)
  75. V = Array(rand(Cfloat, size))
  76. V_ref = copy(V)
  77. starpu_memory_pin(V)
  78. m :: Int32 = 10
  79. k :: Float32 = 2.
  80. l :: Float32 = 3.
  81. println("INPUT ", V[1:10])
  82. mt = vector_scal_with_starpu(V, m, k, l)
  83. starpu_memory_unpin(V)
  84. println("OUTPUT ", V[1:10])
  85. println(io,"$size $mt")
  86. println("$size $mt")
  87. check(V_ref, V, m, k, l)
  88. end
  89. end
  90. if size(ARGS, 1) < 1
  91. filename="x.dat"
  92. else
  93. filename=ARGS[1]
  94. end
  95. io=open(filename,"w")
  96. compute_times(io,1024,1024,4096)
  97. close(io)
  98. starpu_shutdown()