nf_redux_test.f90 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. program main
  2. use iso_c_binding
  3. use fstarpu_mod
  4. use fstarpu_mpi_mod
  5. implicit none
  6. integer, target :: ret, np, i, j
  7. type(c_ptr) :: task_cl, task_rw_cl, task_red_cl, task_ini_cl
  8. character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR
  9. character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR
  10. character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR
  11. real(kind(1.d0)), target :: a1, a2, b1, b2
  12. integer(kind=8) :: tag, err
  13. type(c_ptr) :: a1hdl, a2hdl, b1hdl, b2hdl
  14. integer, target :: comm, comm_world, comm_w_rank, comm_size
  15. integer(c_int), target :: w_node
  16. call fstarpu_fxt_autostart_profiling(0)
  17. ret = fstarpu_init(c_null_ptr)
  18. ret = fstarpu_mpi_init(1)
  19. comm_world = fstarpu_mpi_world_comm()
  20. comm_w_rank = fstarpu_mpi_world_rank()
  21. comm_size = fstarpu_mpi_world_size()
  22. if (comm_size.ne.4) then
  23. write(*,'(" ")')
  24. write(*,'("This application is meant to run with 4 MPI")')
  25. stop 1
  26. end if
  27. err = fstarpu_mpi_barrier(comm_world)
  28. if(comm_w_rank.eq.0) then
  29. write(*,'(" ")')
  30. a1 = 1.0
  31. write(*,*) "init_a1", a1
  32. b1 = 0.5
  33. write(*,*) "init b1", b1
  34. end if
  35. if(comm_w_rank.eq.1) then
  36. write(*,'(" ")')
  37. a2 = 2.0
  38. write(*,*) "init_a2", a2
  39. b2 = 0.8
  40. write(*,*) "init b2", b2
  41. end if
  42. ! allocate and fill codelet structs
  43. task_cl = fstarpu_codelet_allocate()
  44. call fstarpu_codelet_set_name(task_cl, name)
  45. call fstarpu_codelet_add_cpu_func(task_cl, C_FUNLOC(cl_cpu_task))
  46. call fstarpu_codelet_add_buffer(task_cl, FSTARPU_REDUX)
  47. call fstarpu_codelet_add_buffer(task_cl, FSTARPU_R)
  48. ! allocate and reduction codelets
  49. task_red_cl = fstarpu_codelet_allocate()
  50. call fstarpu_codelet_set_name(task_red_cl, namered)
  51. call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red))
  52. call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW)
  53. call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R)
  54. task_ini_cl = fstarpu_codelet_allocate()
  55. call fstarpu_codelet_set_name(task_ini_cl, nameini)
  56. call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini))
  57. call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W)
  58. err = fstarpu_mpi_barrier(comm_world)
  59. tag = 0
  60. if(comm_w_rank.eq.0) then
  61. call fstarpu_variable_data_register(a1hdl, 0, c_loc(a1),c_sizeof(a1))
  62. call fstarpu_variable_data_register(b1hdl, 0, c_loc(b1),c_sizeof(b1))
  63. else
  64. call fstarpu_variable_data_register(a1hdl, -1, c_null_ptr,c_sizeof(a1))
  65. call fstarpu_variable_data_register(b1hdl, -1, c_null_ptr,c_sizeof(b1))
  66. end if
  67. call fstarpu_mpi_data_register(a1hdl,tag,0)
  68. call fstarpu_mpi_data_register(b1hdl, tag+1,0)
  69. tag = tag + 2
  70. if(comm_w_rank.eq.1) then
  71. call fstarpu_variable_data_register(a2hdl, 0, c_loc(a2),c_sizeof(a2))
  72. call fstarpu_variable_data_register(b2hdl, 0, c_loc(b2),c_sizeof(b2))
  73. else
  74. call fstarpu_variable_data_register(a2hdl, -1, c_null_ptr,c_sizeof(a2))
  75. call fstarpu_variable_data_register(b2hdl, -1, c_null_ptr,c_sizeof(b2))
  76. end if
  77. call fstarpu_mpi_data_register(a2hdl,tag,1)
  78. call fstarpu_mpi_data_register(b2hdl, tag+1, 1)
  79. tag = tag + 2
  80. call fstarpu_data_set_reduction_methods(a1hdl, task_red_cl,task_ini_cl)
  81. call fstarpu_data_set_reduction_methods(a2hdl, task_red_cl,task_ini_cl)
  82. err = fstarpu_mpi_barrier(comm_world)
  83. call fstarpu_fxt_start_profiling()
  84. w_node = 3
  85. comm = comm_world
  86. call fstarpu_mpi_task_insert( (/ c_loc(comm), &
  87. task_cl, &
  88. FSTARPU_REDUX, a1hdl, &
  89. FSTARPU_R, b1hdl, &
  90. FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), &
  91. C_NULL_PTR /))
  92. w_node = 2
  93. comm = comm_world
  94. call fstarpu_mpi_task_insert( (/ c_loc(comm), &
  95. task_cl, &
  96. FSTARPU_REDUX, a2hdl, &
  97. FSTARPU_R, b2hdl, &
  98. FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), &
  99. C_NULL_PTR /))
  100. call fstarpu_mpi_redux_data(comm_world, a1hdl)
  101. call fstarpu_mpi_redux_data(comm_world, a2hdl)
  102. ! write(*,*) "waiting all tasks ..."
  103. err = fstarpu_mpi_wait_for_all(comm_world)
  104. if(comm_w_rank.eq.0) then
  105. write(*,*) 'computed result ---> ',a1, "expected =",4.5
  106. end if
  107. if(comm_w_rank.eq.1) then
  108. write(*,*) 'computed result ---> ',a2, "expected=",5.8
  109. end if
  110. call fstarpu_data_unregister(a1hdl)
  111. call fstarpu_data_unregister(a2hdl)
  112. call fstarpu_data_unregister(b1hdl)
  113. call fstarpu_data_unregister(b2hdl)
  114. call fstarpu_fxt_stop_profiling()
  115. call fstarpu_codelet_free(task_cl)
  116. call fstarpu_codelet_free(task_red_cl)
  117. call fstarpu_codelet_free(task_ini_cl)
  118. err = fstarpu_mpi_shutdown()
  119. call fstarpu_shutdown()
  120. stop
  121. contains
  122. recursive subroutine cl_cpu_task (buffers, cl_args) bind(C)
  123. use iso_c_binding ! C interfacing module
  124. use fstarpu_mod ! StarPU interfacing module
  125. implicit none
  126. type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused
  127. integer(c_int) :: ret, worker_id
  128. integer :: comm_rank
  129. integer, target :: i
  130. real(kind(1.d0)), pointer :: a, b
  131. real(kind(1.d0)) :: old_a
  132. worker_id = fstarpu_worker_get_id()
  133. comm_rank = fstarpu_mpi_world_rank()
  134. call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a)
  135. call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b)
  136. call sleep(1.d0)
  137. old_a = a
  138. a = 3.0 + b
  139. write(*,*) "task (c_w_rank:",comm_rank,") from ",old_a,"to",a
  140. return
  141. end subroutine cl_cpu_task
  142. recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C)
  143. use iso_c_binding ! C interfacing module
  144. use fstarpu_mod ! StarPU interfacing module
  145. implicit none
  146. type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused
  147. integer(c_int) :: ret
  148. integer, target :: comm_rank
  149. real(kind(1.d0)), pointer :: as, ad
  150. real(kind(1.d0)) :: old_ad
  151. comm_rank = fstarpu_mpi_world_rank()
  152. call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad)
  153. call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as)
  154. old_ad = ad
  155. ad = ad + as
  156. call sleep(1.d0)
  157. write(*,*) "red_cl (c_w_rank:",comm_rank,")",as, old_ad, ' ---> ',ad
  158. return
  159. end subroutine cl_cpu_task_red
  160. recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C)
  161. use iso_c_binding ! C interfacing module
  162. use fstarpu_mod ! StarPU interfacing module
  163. implicit none
  164. type(c_ptr), value, intent(in) :: buffers, cl_args
  165. ! cl_args is unused
  166. integer(c_int) :: ret
  167. integer, target :: comm_rank
  168. real(kind(1.d0)), pointer :: a
  169. comm_rank = fstarpu_mpi_world_rank()
  170. call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a)
  171. call sleep(0.5d0)
  172. a = 0.0
  173. write(*,*) "ini_cl (c_w_rank:",comm_rank,")"
  174. return
  175. end subroutine cl_cpu_task_ini
  176. subroutine sleep(t)
  177. implicit none
  178. integer :: t_start, t_end, t_rate
  179. real(kind(1.d0)) :: ta, t
  180. call system_clock(t_start)
  181. do
  182. call system_clock(t_end, t_rate)
  183. ta = real(t_end-t_start)/real(t_rate)
  184. if(ta.gt.t) return
  185. end do
  186. end subroutine sleep
  187. end program main