axpy.jl 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. using StarPU
  17. using Printf
  18. const EPSILON = 1e-6
  19. function check(alpha, X, Y)
  20. for i in 1:length(X)
  21. expected_value = alpha * X[i] + 4.0
  22. if abs(Y[i] - expected_value) > expected_value * EPSILON
  23. error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value)
  24. end
  25. end
  26. end
  27. function axpy(N, NBLOCKS, alpha, display = true)
  28. X = Array(fill(1.0f0, N))
  29. Y = Array(fill(4.0f0, N))
  30. starpu_memory_pin(X)
  31. starpu_memory_pin(Y)
  32. block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)
  33. perfmodel = starpu_perfmodel(
  34. perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
  35. symbol = "history_perf"
  36. )
  37. cl = starpu_codelet(
  38. cpu_func = STARPU_SAXPY,
  39. cuda_func = STARPU_SAXPY,
  40. modes = [STARPU_R, STARPU_RW],
  41. perfmodel = perfmodel
  42. )
  43. if display
  44. println("BEFORE x[0] = ", X[1])
  45. println("BEFORE y[0] = ", Y[1])
  46. end
  47. t_start = time_ns()
  48. @starpu_block let
  49. hX,hY = starpu_data_register(X, Y)
  50. starpu_data_partition(hX, block_filter)
  51. starpu_data_partition(hY, block_filter)
  52. for b in 1:NBLOCKS
  53. task = starpu_task(cl = cl, handles = [hX[b],hY[b]], cl_arg=(Float32(alpha),),
  54. tag=starpu_tag_t(b))
  55. starpu_task_submit(task)
  56. end
  57. starpu_task_wait_for_all()
  58. end
  59. t_end = time_ns()
  60. timing = (t_end-t_start)/1000
  61. if display
  62. @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing)
  63. # println("timing -> ", timing, " us ", floor(3*N*4/timing), "MB/s")
  64. println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")")
  65. end
  66. check(alpha, X, Y)
  67. starpu_memory_unpin(X)
  68. starpu_memory_unpin(Y)
  69. end
  70. function main()
  71. N = 16 * 1024 * 1024
  72. NBLOCKS = 8
  73. alpha = 3.41
  74. starpu_init()
  75. starpu_cublas_init()
  76. # warmup
  77. axpy(10, 1, alpha, false)
  78. axpy(N, NBLOCKS, alpha)
  79. starpu_shutdown()
  80. end
  81. main()