cholesky_implicit.jl 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. using StarPU
  17. using LinearAlgebra.BLAS
  18. include("cholesky_common.jl")
  19. function cholesky(mat :: Matrix{Float32}, size, nblocks)
  20. include("cholesky_codelets.jl")
  21. horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nblocks)
  22. vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nblocks)
  23. @starpu_block let
  24. h_mat = starpu_data_register(mat)
  25. starpu_data_map_filters(h_mat, horiz, vert)
  26. for k in 1:nblocks
  27. starpu_iteration_push(k)
  28. starpu_task_insert(cl = cl_11, handles = [h_mat[k, k]], tag_only = tag11(k))
  29. for m in k+1:nblocks
  30. starpu_task_insert(cl = cl_21, handles = [h_mat[k, k], h_mat[m, k]], tag_only = tag21(m, k))
  31. end
  32. starpu_data_wont_use(h_mat[k, k])
  33. for m in k+1:nblocks
  34. for n in k+1:nblocks
  35. if n <= m
  36. starpu_task_insert(cl = cl_22, handles = [h_mat[m, k], h_mat[n, k], h_mat[m, n]], tag_only= tag22(k, m, n))
  37. end
  38. end
  39. starpu_data_wont_use(h_mat[m, k])
  40. end
  41. starpu_iteration_pop()
  42. end
  43. starpu_task_wait_for_all()
  44. end
  45. end
  46. starpu_init()
  47. starpu_cublas_init()
  48. println("# size\tms\tGFlops")
  49. if length(ARGS) > 0 && ARGS[1] == "-quickcheck"
  50. main(1024, 8, verify = true)
  51. else
  52. for size in 1024:1024:15360
  53. main(size, 16)
  54. end
  55. end
  56. starpu_shutdown()