axpy.jl 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. using StarPU
  17. const EPSILON = 1e-6
  18. function check(alpha, X, Y)
  19. for i in 1:length(X)
  20. expected_value = alpha * X[i] + 4.0
  21. if abs(Y[i] - expected_value) > expected_value * EPSILON
  22. error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value)
  23. end
  24. end
  25. end
  26. function main()
  27. N = 16 * 1024 * 1024
  28. NBLOCKS = 8
  29. alpha = 3.41
  30. starpu_init()
  31. starpu_cublas_init()
  32. X = Array(fill(1.0f0, N))
  33. Y = Array(fill(4.0f0, N))
  34. starpu_memory_pin(X)
  35. starpu_memory_pin(Y)
  36. println("BEFORE x[0] = ", X[1])
  37. println("BEFORE y[0] = ", Y[1])
  38. block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)
  39. perfmodel = starpu_perfmodel(
  40. perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
  41. symbol = "history_perf"
  42. )
  43. cl = starpu_codelet(
  44. cpu_func = STARPU_SAXPY,
  45. cuda_func = STARPU_SAXPY,
  46. modes = [STARPU_R, STARPU_RW],
  47. perfmodel = perfmodel
  48. )
  49. @starpu_block let
  50. hX,hY = starpu_data_register(X, Y)
  51. starpu_data_partition(hX, block_filter)
  52. starpu_data_partition(hY, block_filter)
  53. t_start = time_ns()
  54. for b in 1:NBLOCKS
  55. task = starpu_task(cl = cl, handles = [hX[b],hY[b]], cl_arg=(Float32(alpha),),
  56. tag=starpu_tag_t(b))
  57. starpu_task_submit(task)
  58. end
  59. starpu_task_wait_for_all()
  60. t_end = time_ns()
  61. timing = (t_end - t_start) / 1000
  62. println("timing -> ", timing, " us ", 3*N*4/timing, "MB/s")
  63. end
  64. println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")")
  65. check(alpha, X, Y)
  66. starpu_shutdown()
  67. end
  68. main()