axpy.jl 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. using StarPU
  17. using Printf
  18. const EPSILON = 1e-6
  19. function check(alpha, X, Y)
  20. for i in 1:length(X)
  21. expected_value = alpha * X[i] + 4.0
  22. if abs(Y[i] - expected_value) > expected_value * EPSILON
  23. error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value)
  24. end
  25. end
  26. end
  27. @target STARPU_CPU+STARPU_CUDA
  28. @codelet function axpy(X :: Vector{Float32}, Y :: Vector{Float32}, alpha ::Float32) :: Nothing
  29. STARPU_SAXPY(length(X), alpha, X, 1, Y, 1)
  30. return
  31. end
  32. function axpy(N, NBLOCKS, alpha, display = true)
  33. X = Array(fill(1.0f0, N))
  34. Y = Array(fill(4.0f0, N))
  35. starpu_memory_pin(X)
  36. starpu_memory_pin(Y)
  37. block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)
  38. if display
  39. println("BEFORE x[0] = ", X[1])
  40. println("BEFORE y[0] = ", Y[1])
  41. end
  42. t_start = time_ns()
  43. @starpu_block let
  44. hX,hY = starpu_data_register(X, Y)
  45. starpu_data_partition(hX, block_filter)
  46. starpu_data_partition(hY, block_filter)
  47. for b in 1:NBLOCKS
  48. starpu_task_insert(codelet_name = "axpy",
  49. handles = [hX[b], hY[b]],
  50. cl_arg = (Float32(alpha),),
  51. tag = starpu_tag_t(b),
  52. modes = [STARPU_R, STARPU_RW])
  53. end
  54. starpu_task_wait_for_all()
  55. end
  56. t_end = time_ns()
  57. timing = (t_end-t_start)/1000
  58. if display
  59. @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing)
  60. println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")")
  61. end
  62. check(alpha, X, Y)
  63. starpu_memory_unpin(X)
  64. starpu_memory_unpin(Y)
  65. end
  66. function main()
  67. N = 16 * 1024 * 1024
  68. NBLOCKS = 8
  69. alpha = 3.41
  70. starpu_init()
  71. starpu_cublas_init()
  72. # warmup
  73. axpy(10, 1, alpha, false)
  74. axpy(N, NBLOCKS, alpha)
  75. starpu_shutdown()
  76. end
  77. main()