nf_example.f90 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. ! StarPU --- Runtime system for heterogeneous multicore architectures.
  2. !
  3. ! Copyright (C) 2017 CNRS
  4. ! Copyright (C) 2015,2016 Inria
  5. ! Copyright (C) 2016,2019 Université de Bordeaux
  6. ! Copyright (C) 2015 ONERA
  7. !
  8. ! StarPU is free software; you can redistribute it and/or modify
  9. ! it under the terms of the GNU Lesser General Public License as published by
  10. ! the Free Software Foundation; either version 2.1 of the License, or (at
  11. ! your option) any later version.
  12. !
  13. ! StarPU is distributed in the hope that it will be useful, but
  14. ! WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. !
  17. ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. !
  19. ! This is an example of Fortran90 program making use of StarPU.
  20. ! It registers a few matrices for each element of a domain, performs
  21. ! update computations on them, and checks the result.
  22. PROGRAM f90_example
  23. USE nf_types
  24. USE fstarpu_mod
  25. USE nf_compute
  26. USE iso_c_binding
  27. IMPLICIT NONE
  28. TYPE(type_mesh) :: mesh
  29. TYPE(type_numpar),TARGET :: numpar
  30. TYPE(type_mesh_elt),POINTER :: elt => NULL()
  31. INTEGER(KIND=C_INT) :: i,Nelt,res,cpus
  32. INTEGER(KIND=C_INT) :: starpu_maj,starpu_min,starpu_rev
  33. INTEGER(KIND=C_INT) :: neq,ng,nb,it,it_tot
  34. REAL(KIND=C_DOUBLE) :: r, coeff2
  35. REAL(KIND=C_DOUBLE),TARGET :: flops
  36. TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet
  37. TYPE(C_PTR) :: cl_copy_element = C_NULL_PTR ! copy codelet
  38. !Initialization with arbitrary data
  39. Nelt = 2
  40. it_tot = 2
  41. numpar%Neq_max = 5
  42. numpar%coeff = 1.0
  43. ALLOCATE(mesh%elt(Nelt))
  44. DO i = 1,Nelt
  45. elt => mesh%elt(i)
  46. elt%Ng = 4
  47. elt%Np = 2
  48. ALLOCATE(elt%ro(numpar%Neq_max,elt%Np))
  49. ALLOCATE(elt%dro(numpar%Neq_max,elt%Np))
  50. ALLOCATE(elt%basis(elt%Np,elt%Ng))
  51. CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i)
  52. ENDDO
  53. !Initialization of StarPU
  54. res = fstarpu_init(C_NULL_PTR)
  55. IF (res == -19) THEN
  56. STOP 77
  57. END IF
  58. CALL fstarpu_get_version(starpu_maj,starpu_min,starpu_rev)
  59. WRITE(6,'(a,i4,a,i4,a,i4)') "StarPU version: ", starpu_maj , "." , starpu_min , "." , starpu_rev
  60. cpus = fstarpu_cpu_worker_get_count()
  61. IF (cpus == 0) THEN
  62. CALL fstarpu_shutdown()
  63. STOP 77
  64. END IF
  65. cl_loop_element = fstarpu_codelet_allocate()
  66. CALL fstarpu_codelet_add_cpu_func(cl_loop_element, C_FUNLOC(loop_element_cpu_fortran))
  67. CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R)
  68. CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_RW)
  69. CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R)
  70. CALL fstarpu_codelet_set_name(cl_loop_element, C_CHAR_"LOOP_ELEMENT"//C_NULL_CHAR)
  71. cl_copy_element = fstarpu_codelet_allocate()
  72. CALL fstarpu_codelet_add_cpu_func(cl_copy_element, C_FUNLOC(copy_element_cpu_fortran))
  73. CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_RW)
  74. CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_R)
  75. CALL fstarpu_codelet_set_name(cl_copy_element, C_CHAR_"COPY_ELEMENT"//C_NULL_CHAR)
  76. !Registration of elements
  77. DO i = 1,Nelt
  78. elt => mesh%elt(i)
  79. call fstarpu_matrix_data_register(elt%ro_h, 0, c_loc(elt%ro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%ro(1,1)))
  80. call fstarpu_matrix_data_register(elt%dro_h, 0, c_loc(elt%dro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%dro(1,1)))
  81. call fstarpu_matrix_data_register(elt%basis_h, 0, c_loc(elt%basis), elt%Np, elt%Np, elt%Ng, c_sizeof(elt%basis(1,1)))
  82. ENDDO
  83. !Compute
  84. DO it = 1,it_tot
  85. ! compute new dro for each element
  86. DO i = 1,Nelt
  87. elt => mesh%elt(i)
  88. flops = elt%Ng * ( (elt%Np * numpar%Neq_max * 2) + 1 + elt%Np * numpar%Neq_max)
  89. CALL fstarpu_insert_task((/ cl_loop_element, &
  90. FSTARPU_VALUE, c_loc(numpar%coeff), FSTARPU_SZ_C_DOUBLE, &
  91. FSTARPU_R, elt%ro_h, &
  92. FSTARPU_RW, elt%dro_h, &
  93. FSTARPU_R, elt%basis_h, &
  94. FSTARPU_FLOPS, c_loc(flops), &
  95. C_NULL_PTR /))
  96. ENDDO
  97. ! sync (if needed by the algorithm)
  98. CALL fstarpu_task_wait_for_all()
  99. ! - - - - -
  100. ! copy dro to ro for each element
  101. DO i = 1,Nelt
  102. elt => mesh%elt(i)
  103. CALL fstarpu_insert_task((/ cl_copy_element, &
  104. FSTARPU_RW, elt%ro_h, &
  105. FSTARPU_R, elt%dro_h, &
  106. C_NULL_PTR /))
  107. ENDDO
  108. ! sync (if needed by the algorithm)
  109. CALL fstarpu_task_wait_for_all()
  110. ENDDO
  111. !Unregistration of elements
  112. DO i = 1,Nelt
  113. elt => mesh%elt(i)
  114. CALL fstarpu_data_unregister(elt%ro_h)
  115. CALL fstarpu_data_unregister(elt%dro_h)
  116. CALL fstarpu_data_unregister(elt%basis_h)
  117. ENDDO
  118. !Terminate StarPU, no task can be submitted after
  119. CALL fstarpu_shutdown()
  120. !Check data with StarPU
  121. WRITE(6,'(a)') " "
  122. WRITE(6,'(a)') " %%%% RESULTS STARPU %%%% "
  123. WRITE(6,'(a)') " "
  124. DO i = 1,Nelt
  125. WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = "
  126. WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro
  127. WRITE(6,'(a)') " ------------------------ "
  128. ENDDO
  129. !Same compute without StarPU
  130. DO i = 1,Nelt
  131. elt => mesh%elt(i)
  132. CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i)
  133. ENDDO
  134. DO it = 1, it_tot
  135. DO i = 1,Nelt
  136. elt => mesh%elt(i)
  137. CALL loop_element_cpu(elt%ro,elt%dro,elt%basis,numpar%coeff,numpar%Neq_max,elt%Ng,elt%Np)
  138. elt%ro = elt%ro + elt%dro
  139. ENDDO
  140. ENDDO
  141. WRITE(6,'(a)') " "
  142. WRITE(6,'(a)') " %%%% RESULTS VERIFICATION %%%% "
  143. WRITE(6,'(a)') " "
  144. DO i = 1,Nelt
  145. WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = "
  146. WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro
  147. WRITE(6,'(a)') " ------------------------ "
  148. ENDDO
  149. WRITE(6,'(a)') " "
  150. !Deallocation
  151. CALL fstarpu_codelet_free(cl_loop_element)
  152. CALL fstarpu_codelet_free(cl_copy_element)
  153. DO i = 1,Nelt
  154. elt => mesh%elt(i)
  155. DEALLOCATE(elt%ro)
  156. DEALLOCATE(elt%dro)
  157. DEALLOCATE(elt%basis)
  158. ENDDO
  159. DEALLOCATE(mesh%elt)
  160. END PROGRAM f90_example