Bladeren bron

Merge branch 'master' into fpga

Samuel Thibault 5 jaren geleden
bovenliggende
commit
99abc1661e
100 gewijzigde bestanden met toevoegingen van 4437 en 2849 verwijderingen
  1. 2 0
      .gitignore
  2. 1 0
      ChangeLog
  3. 161 251
      configure.ac
  4. 1 1
      doc/doxygen/chapters/310_data_management.doxy
  5. 4 3
      doc/doxygen/chapters/320_scheduling.doxy
  6. 2 1
      doc/doxygen/chapters/350_scheduling_policy_definition.doxy
  7. 5 0
      doc/doxygen/chapters/370_online_performance_tools.doxy
  8. 2 2
      examples/mult/sgemm.sh
  9. 9 0
      include/fstarpu_mod.f90
  10. 8 1
      include/starpu_perfmodel.h
  11. 14 0
      include/starpu_scheduler.h
  12. 4 0
      julia/StarPU.jl/Manifest.toml
  13. 7 0
      julia/StarPU.jl/Project.toml
  14. 2 0
      julia/StarPU.jl/REQUIRE
  15. 1230 0
      julia/StarPU.jl/src/StarPU.jl
  16. 261 0
      julia/StarPU.jl/src/compiler/c.jl
  17. 349 0
      julia/StarPU.jl/src/compiler/cuda.jl
  18. 0 0
      julia/StarPU.jl/src/compiler/expression_manipulation.jl
  19. 928 0
      julia/StarPU.jl/src/compiler/expressions.jl
  20. 132 0
      julia/StarPU.jl/src/compiler/file_generation.jl
  21. 13 0
      julia/StarPU.jl/src/compiler/include.jl
  22. 5 8
      julia/src/Compiler/parsing.jl
  23. 0 12
      julia/src/Compiler/utils.jl
  24. 1 7
      julia/src/Wrapper/C/jlstarpu.h
  25. 21 66
      julia/src/Wrapper/C/jlstarpu_data_handles.c
  26. 1 7
      julia/src/Wrapper/C/jlstarpu_simple_functions.c
  27. 2 7
      julia/src/Wrapper/C/jlstarpu_task.h
  28. 9 9
      julia/src/Wrapper/C/jlstarpu_task_submit.c
  29. 1 2
      julia/src/Wrapper/C/jlstarpu_utils.h
  30. 23 26
      julia/src/Wrapper/Julia/linked_list.jl
  31. 0 0
      julia/black_scholes/black_scholes.c
  32. 83 13
      julia/tst/black_scholes/cpu_cuda_black_scholes.jl
  33. 263 0
      julia/mandelbrot/mandelbrot.c
  34. 30 0
      julia/mandelbrot/mandelbrot.jl
  35. 20 0
      julia/mult/README
  36. 55 0
      julia/mult/makefile
  37. 237 0
      julia/mult/mult.c
  38. 146 0
      julia/mult/mult.jl
  39. 4 0
      julia/mult/mult.plot
  40. 44 0
      julia/mult/mult_native.jl
  41. 11 0
      julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat
  42. 29 0
      julia/mult/res/mult_gen_gcc9_1x4.dat
  43. 29 0
      julia/mult/res/mult_gen_gcc9_4x1.dat
  44. 7 0
      julia/mult/res/mult_gen_gcc9_s100_4x1.dat
  45. 17 0
      julia/mult/res/mult_gen_gcc9_s50_4x1.dat
  46. 4 0
      julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat
  47. 13 0
      julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat
  48. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat
  49. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat
  50. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat
  51. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat
  52. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2.dat
  53. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat
  54. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat
  55. 5 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat
  56. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x1.dat
  57. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat
  58. 11 0
      julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat
  59. 9 0
      julia/mult/res/mult_gen_gcc9_s80_4x1.dat
  60. 3 0
      julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat
  61. 11 0
      julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat
  62. 62 0
      julia/mult/res/mult_native.dat
  63. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat
  64. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat
  65. 11 0
      julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat
  66. 11 0
      julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat
  67. 11 0
      julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat
  68. 0 57
      julia/src/Compiler/C/add_for_loop_declarations.jl
  69. 0 15
      julia/src/Compiler/C/create_cpu_kernel.jl
  70. 0 27
      julia/src/Compiler/C/flatten_blocks.jl
  71. 0 76
      julia/src/Compiler/C/substitute_args.jl
  72. 0 25
      julia/src/Compiler/C/substitute_func_calls.jl
  73. 0 52
      julia/src/Compiler/C/substitute_indexing.jl
  74. 0 179
      julia/src/Compiler/Cuda/create_cuda_kernel.jl
  75. 0 49
      julia/src/Compiler/Cuda/indep_for.jl
  76. 0 121
      julia/src/Compiler/Cuda/indep_for_kernel_ids.jl
  77. 0 60
      julia/src/Compiler/Expressions/affect.jl
  78. 0 68
      julia/src/Compiler/Expressions/block.jl
  79. 0 75
      julia/src/Compiler/Expressions/call.jl
  80. 0 60
      julia/src/Compiler/Expressions/cuda_call.jl
  81. 0 44
      julia/src/Compiler/Expressions/field.jl
  82. 0 100
      julia/src/Compiler/Expressions/for.jl
  83. 0 85
      julia/src/Compiler/Expressions/function.jl
  84. 0 94
      julia/src/Compiler/Expressions/if.jl
  85. 0 48
      julia/src/Compiler/Expressions/interval.jl
  86. 0 70
      julia/src/Compiler/Expressions/ref.jl
  87. 0 33
      julia/src/Compiler/Expressions/return.jl
  88. 0 63
      julia/src/Compiler/Expressions/simple_expressions.jl
  89. 0 109
      julia/src/Compiler/Expressions/typed.jl
  90. 0 53
      julia/src/Compiler/Expressions/while.jl
  91. 0 69
      julia/src/Compiler/Generate_files/c_files.jl
  92. 0 134
      julia/src/Compiler/Generate_files/cuda_files.jl
  93. 0 54
      julia/src/Compiler/Generate_files/so_files.jl
  94. 0 39
      julia/src/Compiler/include.jl
  95. 0 146
      julia/src/Wrapper/Julia/starpu_codelet.jl
  96. 0 234
      julia/src/Wrapper/Julia/starpu_data_handle.jl
  97. 0 49
      julia/src/Wrapper/Julia/starpu_define.jl
  98. 0 125
      julia/src/Wrapper/Julia/starpu_destructible.jl
  99. 0 20
      julia/src/Wrapper/Julia/starpu_include.jl
  100. 0 0
      julia/src/Wrapper/Julia/starpu_init_shutdown.jl

+ 2 - 0
.gitignore

@@ -7,6 +7,8 @@
 /build
 /build2
 /build-aux
+/build_starpu
+/install
 /GPATH
 /GRTAGS
 /GTAGS

+ 1 - 0
ChangeLog

@@ -29,6 +29,7 @@ New features:
   * New number_events.data trace file which monitors number of events in trace
     files. This file can be parsed by the new script
     starpu_fxt_number_events_to_names.py to convert event keys to event names.
+  * New STARPU_PER_WORKER perfmodel.
 
 Small changes:
   * Use the S4U interface of Simgrid instead of xbt and MSG.

+ 161 - 251
configure.ac

@@ -92,6 +92,7 @@ if test x$enable_perf_debug = xyes; then
     enable_shared=no
 fi
 default_enable_mpi_check=maybe
+default_enable_mpi=yes
 
 ###############################################################################
 #                                                                             #
@@ -306,6 +307,9 @@ if test x$enable_simgrid = xyes ; then
         # want that by default
 	default_enable_mpi_check=no
 
+	# disable MPI support by default
+	default_enable_mpi=no
+
 	# Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++...
 	AC_LANG_PUSH([C++])
 	if test x$enable_shared = xno ; then
@@ -370,145 +374,138 @@ fi
 
 ###############################################################################
 #                                                                             #
-#                                    MPI                                      #
+#                                LIBTOOLS                                     #
 #                                                                             #
 ###############################################################################
 
-AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi],
-                              [Disable StarPU MPI library generation])],
-            [enable_mpi=$enableval],
-            [enable_mpi=yes])
+#c++11 detection
+AX_CXX_COMPILE_STDCXX(11,noext,optional)
 
-AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave],
-                              [Enable StarPU to run with the master-slave mode])],
-            use_mpi_master_slave=$enableval,
-            use_mpi_master_slave=no)
+AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11)
+AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
+if test $HAVE_CXX11 -eq 1; then
+  AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])
+fi
 
-#Check MPICC
-AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc[=<path to mpicc>]],
-           [Path of the mpicc compiler])],
-   [
-       if test x$withval = xyes; then
-           AC_MSG_ERROR(--with-mpicc must be given a pathname)
-       elif test x$withval = xno ; then
-           mpi_requested=no
-	   mpicc_path=""
-	   use_mpi=no
-       else
-	   mpi_requested=yes
-           mpicc_path=$withval
-       fi
-   ],
-   [
-       mpi_requested=maybe
-       if test x$enable_simgrid = xyes ; then
-           DEFAULT_MPICC=smpicc
-       else
-           DEFAULT_MPICC=mpicc
-       fi
-       # nothing was specified: default value is used
-       AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH])
-   ])
+LT_PREREQ([2.2])
+LT_INIT([win32-dll])
 
-# in case MPI was explicitely required, but is not available, this is an error
-if test x$mpi_requested = xyes -a ! -x "$mpicc_path"; then
-   AC_MSG_ERROR([Compiler MPI not valid])
-fi
+AC_HEADER_STDC
 
-if test x$mpi_requested != xno ; then
-   # We test if the MPICC compiler exists
-     if test ! -x $mpicc_path; then
-         #MPICC does not exists or is not executable
-	 AC_MSG_RESULT(The mpicc compiler '$mpicc_path' does not have the execute permission)
-	 use_mpi=no
-     else
-	 use_mpi=yes
-	 if test x$enable_simgrid = xyes ; then
-             AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<path to smpirun>]],
-						  [Path of the smpirun helper])],
-			 [
-			     if test x$withval = xyes; then
-				 AC_MSG_ERROR(--with-smpirun must be given a pathname)
-			     else
-				 smpirun_path=$withval
-			     fi
-			 ],
-			 [
-			     # nothing was specified: default value is used
-			     AC_PATH_PROG(smpirun_path, smpirun, [no], [$simgrid_dir/bin:$PATH])
-			 ])
-	 fi
-     fi
+AC_C_RESTRICT
+
+# Check if bash is available
+AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH])
+
+# Record git version
+AC_PATH_PROG(gitcommand, git)
+if test "$gitcommand" = "" ; then
+   if test -f $srcdir/STARPU-REVISION ; then
+      cp $srcdir/STARPU-REVISION .
+   else
+      echo "unknown" > ./STARPU-REVISION
+   fi
+else
+   git log -n 1 --pretty="%H" $srcdir > ./STARPU-REVISION
 fi
 
-AC_MSG_CHECKING(mpicc path)
+AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
+
+###############################################################################
+#                                                                             #
+#                           MPI compilers                                     #
+#                                                                             #
+###############################################################################
+
+#Check MPICC
+if test x$enable_simgrid = xyes ; then
+    DEFAULT_MPICC=smpicc
+else
+    DEFAULT_MPICC=mpicc
+fi
+AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc=<path to mpicc>], [Path of the mpicc compiler])], [DEFAULT_MPICC=$withval])
+AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH])
+AC_MSG_CHECKING(whether mpicc is available)
 AC_MSG_RESULT($mpicc_path)
 AC_SUBST(MPICC, $mpicc_path)
 
-
 #Check MPICXX/MPIC++
-AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx[=<path to mpicxx>]],
-           [Path of the mpicxx/mpic++ compiler])],
-   [
-       if test x$withval = xyes; then
-           AC_MSG_ERROR(--with-mpicxx must be given a pathname)
-       else
-           mpicxx_path=$withval
-       fi
-   ],
-   [
-       if test x$enable_simgrid = xyes ; then
-           DEFAULT_MPICXX=smpicxx
-       else
-           DEFAULT_MPICXX=mpicxx
-       fi
-       # nothing was specified: default value is used
-       AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
+if test x$enable_simgrid = xyes ; then
+    DEFAULT_MPICXX=smpicxx
+else
+    DEFAULT_MPICXX=mpicxx
+fi
+AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx=<path to mpicxx>], [Path of the mpicxx/mpic++ compiler])], [DEFAULT_MPICXX=$withval])
+AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
 
-       # try with mpic++ if mpicxx was not found
-       if test x$mpicxx_path = xno ; then
-            DEFAULT_MPICXX=mpic++
-            AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
-       fi
-   ])
+# try with mpic++ if mpicxx was not found
+if test x$mpicxx_path = xno ; then
+    DEFAULT_MPICXX=mpic++
+    AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
+fi
 
 # We test if the MPICXX/MPIC++ compiler exists
 if test ! -x $mpicxx_path; then
-    #MPICXX/MPIC++ does not exists or is not executable
     AC_MSG_RESULT(The mpicxx compiler '$mpicxx_path' does not have the execute permission)
-    use_mpicxx=no
-else
-    use_mpicxx=yes
+    mpicxx_path=no
 fi
 
-AC_MSG_CHECKING(mpicxx/mpic++ path)
+AC_MSG_CHECKING(whether mpicxx is available)
 AC_MSG_RESULT($mpicxx_path)
 AC_SUBST(MPICXX, $mpicxx_path)
 
+###############################################################################
+#                                                                             #
+#                                    MPI                                      #
+#                                                                             #
+###############################################################################
 
-if test x$use_mpi = xyes -a \( x$enable_mpi = xyes -o x$use_mpi_master_slave = xyes \) ; then
-    cc_or_mpicc=$mpicc_path
-        # For some reason, libtool uses gcc instead of mpicc when linking
-        # libstarpumpi.
-        # On Darwin (and maybe other systems ?) the linker will fail (undefined
-        # references to MPI_*). We manually add the required flags to fix this
-        # issue.
-        AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
-else
-    cc_or_mpicc=$CC
+AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi],
+                              [Disable StarPU MPI library generation])],
+            [enable_mpi=$enableval],
+            [enable_mpi=$default_enable_mpi])
+
+# in case MPI was explicitely required, but mpicc is not available, this is an error
+if test x$enable_mpi = xyes -a ! -x "$mpicc_path"; then
+   AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid])
 fi
 
-AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+build_mpi_lib=$enable_mpi
 
-AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend],
-				   [Prevent StarPU MPI from reading buffers while being sent over MPI])],
-				   enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no)
-if test x$enable_mpi_pedantic_isend = xyes; then
-	AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend])
+###############################################################################
+#                                                                             #
+#                                NEW MADELEINE                                #
+#                                                                             #
+###############################################################################
+
+AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad],
+		                    [Enable StarPU MPI library generation using the new madeleine backend])],
+            [enable_nmad=$enableval],
+            [enable_nmad=no])
+
+build_nmad_lib=no
+AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+#We can only build StarPU MPI Library if User wants it and MPI is available
+if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then
+    build_nmad_lib=yes
+    build_mpi_lib=no
+    PKG_CHECK_MODULES([NMAD],[nmad])
+else
+    build_nmad_lib=no
 fi
 
-#We can only build MPI Master Slave if User wants it and MPI is available
-if test x$use_mpi_master_slave = xyes -a x$use_mpi = xyes -a x$use_mpicxx = xyes; then
+###############################################################################
+#                                                                             #
+#                             MPI Master Slave                                #
+#                                                                             #
+###############################################################################
+
+AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave],
+                              [Enable StarPU to run with the master-slave mode])],
+              use_mpi_master_slave=$enableval,
+              use_mpi_master_slave=no)
+#We can only build MPI Master Slave if User wants it and MPI compiler are available
+if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then
     build_mpi_master_slave=yes
 else
     build_mpi_master_slave=no
@@ -517,7 +514,9 @@ fi
 #users cannot use both at the same time
 if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then
     AC_MSG_WARN(StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI...)
-	enable_mpi=no
+    build_mpi_lib=no
+    build_nmad_lib=no
+    enable_mpi=no
 fi
 
 if test x$build_mpi_master_slave = xyes; then
@@ -549,95 +548,19 @@ AC_ARG_ENABLE(maxmpidev, [AS_HELP_STRING([--enable-maxmpidev=<number>],
 AC_MSG_RESULT($nmaxmpidev)
 AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices])
 
-
-###############################################################################
-#                                                                             #
-#                                NEW MADELEINE                                #
-#                                                                             #
-###############################################################################
-
-AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad],
-                              [Enable StarPU MPI library generation using new madeleine instead of mpi])],
-            [enable_nmad=$enableval],
-            [enable_nmad=no])
-
-if test x$use_mpi = xyes -a \( x$enable_nmad \) ; then
-    cc_or_mpicc=$mpicc_path
-        # For some reason, libtool uses gcc instead of mpicc when linking
-        # libstarpumpi.
-        # On Darwin (and maybe other systems ?) the linker will fail (undefined
-        # references to MPI_*). We manually add the required flags to fix this
-        # issue.
-        AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
-else
-    cc_or_mpicc=$CC
-fi
-
-build_nmad_lib=no
-AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
-#We can only build StarPU MPI Library if User wants it and MPI is available
-if test x$use_mpi = xyes -a x$enable_nmad = xyes ; then
-    build_nmad_lib=yes
-    enable_mpi=no
-    PKG_CHECK_MODULES([NMAD],[nmad])
-else
-    build_nmad_lib=no
-fi
-
-# in case NMAD was explicitely required, but the compiler MPI, this is an error
-if test x$enable_nmad = xyes -a ! -x "$mpicc_path"; then
-   AC_MSG_ERROR([Compiler MPI not valid])
-fi
-
-
-AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated)
-AC_MSG_RESULT($build_nmad_lib)
-
 ###############################################################################
 #                                                                             #
-#                                LIBTOOLS                                     #
+#                       Miscellaneous things for MPI                          #
 #                                                                             #
 ###############################################################################
 
-#c++11 detection
-AX_CXX_COMPILE_STDCXX(11,noext,optional)
-
-AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11)
-AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
-if test $HAVE_CXX11 -eq 1; then
-  AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])
-fi
-
-LT_PREREQ([2.2])
-LT_INIT([win32-dll])
-
-AC_HEADER_STDC
-
-AC_C_RESTRICT
-
-# Check if bash is available
-AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH])
-
-# Record git version
-AC_PATH_PROG(gitcommand, git)
-if test "$gitcommand" = "" ; then
-   if test -f $srcdir/STARPU-REVISION ; then
-      cp $srcdir/STARPU-REVISION .
-   else
-      echo "unknown" > ./STARPU-REVISION
-   fi
-else
-   git log -n 1 --pretty="%H" $srcdir > ./STARPU-REVISION
+AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend],
+				   [Prevent StarPU MPI from reading buffers while being sent over MPI])],
+				   enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no)
+if test x$enable_mpi_pedantic_isend = xyes; then
+	AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend])
 fi
 
-AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
-
-###############################################################################
-#                                                                             #
-#                       Miscellaneous things for MPI                          #
-#                                                                             #
-###############################################################################
-
 # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check
 AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]),
 	      [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check])
@@ -651,68 +574,45 @@ fi
 if test x$enable_mpi_check = xno ; then
     running_mpi_check=no
 fi
+if test x$enable_mpi = xno ; then
+    running_mpi_check=no
+fi
 
-
-if test x$enable_simgrid = xno ; then
+if test x$enable_mpi = xyes -a x$running_mpi_check = xyes ; then
     # Check if mpiexec is available
-    AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec[=<path to mpiexec>]],
-                [Path of mpiexec])],
-        [
-            if test x$withval = xyes; then
-                AC_MSG_ERROR(--with-mpiexec must be given a pathname)
-            else
-                mpiexec_path=$withval
-            fi
-        ],
-        [
-            # nothing was specified: look in the path
-	    if test x$mpicc_path = x ; then
-		AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$PATH])
-	    else
-		AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$(dirname $mpicc_path):$PATH])
-	    fi
-        ])
-
+    if test x$enable_simgrid = xyes ; then
+	DEFAULT_MPIEXEC=smpirun
+        AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<path to smpirun>]], [Path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval])
+	AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$simgrid_dir/bin:$PATH])
+    else
+	DEFAULT_MPIEXEC=mpiexec
+	AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=<path to mpiexec>], [Path of mpiexec])], [DEFAULT_MPIEXEC=$withval])
+	if test x$mpicc_path = x ; then
+	    AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$PATH])
+	else
+	    AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$(dirname $mpicc_path):$PATH])
+	fi
+    fi
     AC_MSG_CHECKING(whether mpiexec is available)
     AC_MSG_RESULT($mpiexec_path)
 
     # We test if MPIEXEC exists
     if test ! -x $mpiexec_path; then
-        # if it's not valid, it could be the parameter given to configure.ac was not a full path, let's look for it
-	if test x$mpicc_path = x ; then
-            AC_PATH_PROG(mpiexec_path_bis, $mpiexec_path, [no], [$PATH])
-	else
-            AC_PATH_PROG(mpiexec_path_bis, $mpiexec_path, [no], [$(dirname $mpicc_path):$PATH])
-	fi
-        AC_MSG_CHECKING(whether mpiexec is available (2nd try))
-        AC_MSG_RESULT($mpiexec_path_bis)
-	if test -x $mpiexec_path_bis; then
-	   mpiexec_path=$mpiexec_path_bis
-	else
-           #MPIEXEC does not exists or is not executable
-           AC_MSG_RESULT(The mpiexec script is not valid)
-           running_mpi_check=no
-           mpiexec_path=""
-	fi
+        AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid)
+        running_mpi_check=no
+        mpiexec_path=""
     fi
     AC_SUBST(MPIEXEC,$mpiexec_path)
 fi
 
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
-if test x$use_mpi = xyes ; then
-    AC_MSG_CHECKING(whether MPI tests should be run)
-    AC_MSG_RESULT($running_mpi_check)
-fi
-
-#We can only build StarPU MPI Library if User wants it and MPI is available
-if test x$use_mpi = xyes -a x$enable_mpi = xyes ; then
-    build_mpi_lib=yes
-else
-    build_mpi_lib=no
-fi
+AC_MSG_CHECKING(whether MPI tests should be run)
+AC_MSG_RESULT($running_mpi_check)
 
 AC_MSG_CHECKING(whether the StarPU MPI library should be generated)
 AC_MSG_RESULT($build_mpi_lib)
+AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated)
+AC_MSG_RESULT($build_nmad_lib)
 
 AM_CONDITIONAL(USE_MPI, test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes)
 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
@@ -722,14 +622,12 @@ if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
 	else
 		AC_DEFINE(STARPU_USE_MPI_NMAD,[1],[whether the StarPU MPI library (with a NewMadeleine implementation) is available])
 	fi
-else
-	running_mpi_check=no
 fi
 
-if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
+if test x$enable_mpi = xyes ; then
     if test x$enable_simgrid = xyes ; then
         if test x$enable_shared = xyes ; then
-	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this, or disable MPI with --disable-mpi])
+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, theb use --disable-shared to fix this, else disable MPI with --disable-mpi])
         else
 	    CFLAGS="$CFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
@@ -744,17 +642,16 @@ AM_CONDITIONAL(STARPU_USE_MPI_NMAD, test x$build_nmad_lib = xyes)
 AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes)
 
 AC_ARG_WITH(mpiexec-args, [AS_HELP_STRING([--with-mpiexec-args[=<arguments to give when running mpiexec>]],
-			[Arguments for mpiexec])],
-	[
+					  [Arguments for mpiexec])],
+	    [
 		mpiexec_args=$withval
-	])
+	    ])
 AC_SUBST(MPIEXEC_ARGS,$mpiexec_args)
 
-
 AC_MSG_CHECKING(whether MPI debug messages should be displayed)
 AC_ARG_ENABLE(mpi-verbose, [AS_HELP_STRING([--enable-mpi-verbose],
-			[display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])],
-			enable_mpi_verbose=$enableval, enable_mpi_verbose=no)
+					   [display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])],
+	      enable_mpi_verbose=$enableval, enable_mpi_verbose=no)
 AC_MSG_RESULT($enable_mpi_verbose)
 if test x$enable_mpi_verbose = xyes; then
 	AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages])
@@ -764,6 +661,19 @@ if test x$enable_mpi_verbose = xextra; then
 	AC_DEFINE(STARPU_MPI_EXTRA_VERBOSE, [1], [display MPI verbose debug messages])
 fi
 
+if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then
+    cc_or_mpicc=$mpicc_path
+    # For some reason, libtool uses gcc instead of mpicc when linking
+    # libstarpumpi.
+    # On Darwin (and maybe other systems ?) the linker will fail (undefined
+    # references to MPI_*). We manually add the required flags to fix this
+    # issue.
+    AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
+else
+    cc_or_mpicc=$CC
+fi
+AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+
 ###############################################################################
 #                                                                             #
 #                           MIC device compilation                            #
@@ -3676,7 +3586,7 @@ AC_OUTPUT([
 	Makefile
 	src/Makefile
 	tools/Makefile
-	tools/replay/Makefile
+	tools/replay-mpi/Makefile
 	tools/starpu_env
 	tools/starpu_codelet_profile
 	tools/starpu_codelet_histo_profile

+ 1 - 1
doc/doxygen/chapters/310_data_management.doxy

@@ -602,7 +602,7 @@ whole machine, it would not be efficient to accumulate them in only one place,
 incurring data transmission each and access concurrency.
 
 StarPU provides a mode ::STARPU_REDUX, which permits to optimize
-this case: it will allocate a buffer on each memory node, and accumulate
+this case: it will allocate a buffer on each worker (lazily), and accumulate
 intermediate results there. When the data is eventually accessed in the normal
 mode ::STARPU_R, StarPU will collect the intermediate results in just one
 buffer.

+ 4 - 3
doc/doxygen/chapters/320_scheduling.doxy

@@ -190,9 +190,10 @@ single task gives the consumption of the task in Joules, which can be given to
 starpu_perfmodel_update_history().
 
 Another way to provide the energy performance is to define a
-perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
-starpu_perfmodel::arch_cost_function field to a function which shall return the
-estimated consumption of the task in Joules. Such a function can for instance
+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or
+::STARPU_PER_WORKER , and set the starpu_perfmodel::arch_cost_function or
+starpu_perfmodel::worker_cost_function field to a function which shall return
+the estimated consumption of the task in Joules. Such a function can for instance
 use starpu_task_expected_length() on the task (in µs), multiplied by the
 typical power consumption of the device, e.g. in W, and divided by 1000000. to
 get Joules.

+ 2 - 1
doc/doxygen/chapters/350_scheduling_policy_definition.doxy

@@ -45,7 +45,8 @@ provides a complete list of the functions available for writing advanced schedul
 This includes getting an estimation for a task computation completion with
 starpu_task_expected_length(), for the required data transfers with
 starpu_task_expected_data_transfer_time_for(), for the required energy with
-starpu_task_expected_energy(), etc. Other
+starpu_task_expected_energy(), etc. Per-worker variants are also available with
+starpu_task_worker_expected_length(), etc. Other
 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
 starpu_transfer_predict(), ...
 One can also directly test the presence of a data handle with starpu_data_is_on_node().

+ 5 - 0
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -401,6 +401,11 @@ filled with pointers to functions which return the expected duration
 of the task in micro-seconds, one per architecture, see for instance
 <c>tests/datawizard/locality.c</c>
 </li>
+
+<li>
+Provided explicitly by the application (model type ::STARPU_PER_WORKER)
+similarly with the starpu_perfmodel::worker_cost_function field.
+</li>
 </ul>
 
 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and

+ 2 - 2
examples/mult/sgemm.sh

@@ -67,8 +67,8 @@ then
 	$PREFIX/../../tools/starpu_codelet_histo_profile distrib.data || true
 	[ -f distrib.data.starpu_sgemm_gemm.0.492beed5.33177600.pdf ] || true
 
-	if [ -x $PREFIX/../../tools/replay/starpu_replay ]; then
-		$STARPU_LAUNCH $PREFIX/../../tools/replay/starpu_replay tasks.rec
+	if [ -x $PREFIX/../../tools/starpu_replay ]; then
+		$STARPU_LAUNCH $PREFIX/../../tools/starpu_replay tasks.rec
 	fi
 
 	[ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump tasks.rec -o perfs2.rec

+ 9 - 0
include/fstarpu_mod.f90

@@ -1626,6 +1626,15 @@ module fstarpu_mod
                         integer(c_int), value, intent(in) :: prio
                 end subroutine fstarpu_data_idle_prefetch_on_node_prio
 
+                !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
+                function fstarpu_data_is_on_node(dh, node) &
+                                bind(C,name="starpu_data_is_on_node")
+                        use iso_c_binding, only: c_ptr, c_int
+                        integer(c_int)                 :: fstarpu_data_is_on_node
+                        type(c_ptr), value, intent(in) :: dh
+                        integer(c_int), value, intent(in) :: node
+                end function fstarpu_data_is_on_node
+
                 ! void starpu_data_wont_use(starpu_data_handle_t handle);
                 subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use")
                         use iso_c_binding, only: c_ptr

+ 8 - 1
include/starpu_perfmodel.h

@@ -165,6 +165,7 @@ struct starpu_perfmodel_per_arch
 enum starpu_perfmodel_type
 {
         STARPU_PERFMODEL_INVALID=0,
+	STARPU_PER_WORKER,                /**< Application-provided per-worker cost model function */
 	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
 	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
 	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
@@ -226,11 +227,17 @@ struct starpu_perfmodel
 	*/
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 	/**
-	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
+	   Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
 	   number, and must return a task duration estimation in
 	   micro-seconds on that arch.
 	*/
 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
+	/**
+	   Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
+	   number, and must return a task duration estimation in
+	   micro-seconds on that worker.
+	*/
+	double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
 
 	/**
 	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and

+ 14 - 0
include/starpu_scheduler.h

@@ -110,6 +110,10 @@ struct starpu_sched_policy
 	   to be executed by the worker. This method therefore permits
 	   to keep the state of the scheduler coherent even when
 	   StarPU bypasses the scheduling strategy.
+
+	   Note: to get an estimation of the task duration, \p perf_workerid
+	   needs to be used rather than \p workerid, for the case of parallel
+	   tasks.
 	*/
 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
 
@@ -366,6 +370,11 @@ uint32_t starpu_task_data_footprint(struct starpu_task *task);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 
 /**
+   Same as starpu_task_expected_length() but for a precise worker.
+*/
+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
+
+/**
    Return an estimated speedup factor relative to CPU speed
 */
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
@@ -395,6 +404,11 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 
 /**
+   Same as starpu_task_expected_energy but for a precise worker
+*/
+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
+
+/**
    Return expected conversion time in ms (multiformat interface only)
 */
 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);

+ 4 - 0
julia/StarPU.jl/Manifest.toml

@@ -0,0 +1,4 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 7 - 0
julia/StarPU.jl/Project.toml

@@ -0,0 +1,7 @@
+name = "StarPU"
+uuid = "3e36cc6e-3f67-11e9-3531-2137bfe575e8"
+authors = ["barthou "]
+version = "0.1.0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 2 - 0
julia/StarPU.jl/REQUIRE

@@ -0,0 +1,2 @@
+julia 1.0
+Libdl

File diff suppressed because it is too large
+ 1230 - 0
julia/StarPU.jl/src/StarPU.jl


+ 261 - 0
julia/StarPU.jl/src/compiler/c.jl

@@ -0,0 +1,261 @@
+
+
+"""
+    Returns the list of instruction that will be added before for loop of shape
+        "for for_index_var in set ..."
+"""
+function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
+
+    decl_pattern = @parse € :: Int64
+    affect_pattern = @parse € :: Int64 = €
+    interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+
+    id = set.id
+
+    start_var = starpu_parse(Symbol(:start_, id))
+    start_decl = replace_pattern(affect_pattern, start_var, set.start)
+
+    index_var = starpu_parse(for_index_var)
+    index_decl = replace_pattern(decl_pattern, index_var)
+
+    if isa(set.step, StarpuExprValue)
+
+        stop_var = starpu_parse(Symbol(:stop_, id))
+        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
+
+        return StarpuExpr[start_decl, stop_decl, index_decl]
+    end
+
+    step_var = starpu_parse(Symbol(:step_, id))
+    step_decl = replace_pattern(affect_pattern, step_var, set.step)
+
+    dim_var = starpu_parse(Symbol(:dim_, id))
+    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
+
+    iter_var = starpu_parse(Symbol(:iter_, id))
+    iter_decl = replace_pattern(decl_pattern, iter_var)
+
+
+    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
+end
+
+
+function add_for_loop_declarations(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprFor)
+            return x
+        end
+
+        interval_decl = interval_evaluation_declarations(x.set, x.iter)
+
+        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+
+
+function transform_to_cpu_kernel(expr :: StarpuExprFunction)
+
+    output = add_for_loop_declarations(expr)
+    output = substitute_args(output)
+    output = substitute_func_calls(output)
+    output = substitute_indexing(output)
+    output = flatten_blocks(output)
+
+    return output
+end
+
+
+
+function flatten_blocks(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprBlock)
+            return x
+        end
+
+        instrs = StarpuExpr[]
+
+        for sub_expr in x.exprs
+
+            if isa(sub_expr, StarpuExprBlock)
+                push!(instrs, sub_expr.exprs...)
+            else
+                push!(instrs, sub_expr)
+            end
+        end
+
+        return StarpuExprBlock(instrs)
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
+    function func_to_apply(x :: StarpuExpr)
+
+        if x == StarpuExprVar(arg_name)
+            return StarpuExprVar(ptr_name)
+        end
+
+        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
+            return x
+        end
+
+        if (length(x.args) != 1)
+            error("Invalid arity for function $(x.func)")
+        end
+
+        if (x.args[1] != StarpuExprVar(ptr_name))
+            return x
+        end
+
+        new_func = func_substitution[x.func]
+        new_arg = starpu_parse(:($buffer_name[$arg_index]))
+
+        return StarpuExprCall(new_func, [new_arg])
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function substitute_args(expr :: StarpuExprFunction)
+
+    new_body = expr.body
+    func_id = rand_string()
+    buffer_arg_name = Symbol("buffers_", func_id)
+    cl_arg_name = Symbol("cl_arg_", func_id)
+    post = false
+    function_start_affectations = StarpuExpr[]
+
+    for i in (1 : length(expr.args))
+
+        var_id = rand_string()
+        ptr = Symbol(:ptr_, var_id)
+        var_name = ptr
+        
+        if (expr.args[i].typ <: Vector)
+            func_interface = :STARPU_VECTOR_GET_PTR
+        elseif (expr.args[i].typ <: Matrix)
+            func_interface = :STARPU_MATRIX_GET_PTR
+            ld_name = Symbol("ld_", var_id)
+            post_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
+            post=true
+            
+        elseif (expr.args[i].typ <: Float32)
+            func_interface = :STARPU_VARIABLE_GET_PTR
+            var_name = Symbol("scal_", var_id)
+            post_affect = starpu_parse( :($var_name :: Float32 = ($ptr[0])) )
+            post = true
+            
+        end
+        #else
+            #error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
+        #end
+
+        type_in_arg = eltype(expr.args[i].typ)
+        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
+        push!(function_start_affectations, new_affect)
+        if (post)
+            push!(function_start_affectations, post_affect)
+        end
+        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, var_name)
+
+    end
+
+
+    new_args = [
+                    starpu_parse(:($buffer_arg_name :: Matrix{Nothing})),
+                    starpu_parse(:($cl_arg_name :: Vector{Nothing}))
+                ]
+    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
+
+    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
+end
+
+
+
+func_substitution = Dict(
+    :width => :STARPU_MATRIX_GET_NY,
+    :height => :STARPU_MATRIX_GET_NX,
+
+    :length => :STARPU_VECTOR_GET_NX
+)
+
+
+
+function substitute_func_calls(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
+            return x
+        end
+
+        return StarpuExprCall(func_substitution[x.func], x.args)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+function substitute_indexing(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprRef)
+            return x
+        end
+
+        #if !isa(x.ref, StarpuExprVar)
+        #    error("Only variable indexing is allowed") #TODO allow more ?
+        #end
+
+
+        nb_indexes = length(x.indexes)
+
+        if (nb_indexes >= 3)
+            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
+        end
+
+        if (nb_indexes == 0)
+            return x
+
+        elseif nb_indexes == 1
+            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
+                                                                            #TODO : detect when it is a matrix used with one index only
+            return StarpuExprRef(x.ref, [new_index])
+
+        elseif nb_indexes == 2
+
+            var_name = String(x.ref.name)
+
+            if !occursin(r"ptr_", var_name) || isempty(var_name[5:end])
+                error("Invalid variable ($var_name) for multiple index dereferencing")
+            end
+
+            var_id = var_name[5:end]
+            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
+
+            new_index = x.indexes[2]
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
+            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+
+            return StarpuExprRef(x.ref, [new_index])
+        end
+    end
+
+    return apply(func_to_run, expr)
+end

+ 349 - 0
julia/StarPU.jl/src/compiler/cuda.jl

@@ -0,0 +1,349 @@
+
+
+function is_indep_for_expr(x :: StarpuExpr)
+    return isa(x, StarpuExprFor) && x.is_independant
+end
+
+
+function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
+                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
+    init = StarpuExpr[]
+    finish = StarpuExpr[]
+
+    if is_indep_for_expr(expr)
+        return init, StarpuIndepFor(expr), finish
+    end
+
+    if !isa(expr, StarpuExprBlock)
+        return [expr], nothing, finish
+    end
+
+    for i in (1 : length(expr.exprs))
+
+        if !is_indep_for_expr(expr.exprs[i])
+            continue
+        end
+
+        init = expr.exprs[1 : i-1]
+        indep = StarpuIndepFor(expr.exprs[i])
+        finish = expr.exprs[i+1 : end]
+
+        if any(is_indep_for_expr, finish)
+            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
+        end
+
+        return init, indep, finish
+    end
+
+    return expr.exprs, nothing, finish
+end
+
+
+
+
+function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
+
+    undefined_variables = Symbol[]
+    defined_variable_names = map((x -> x.name), already_defined)
+    defined_variable_types = map((x -> x.typ), already_defined)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprFunction)
+            error("No function declaration allowed in this section")
+        end
+
+        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
+
+            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
+                push!(undefined_variables, x.name)
+            end
+
+            return x
+        end
+
+        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
+
+            if isa(x, StarpuExprAffect)
+
+                var = x.var
+
+                if !isa(var, StarpuExprTypedVar)
+                    return x
+                end
+
+                name = var.name
+                typ = var.typ
+
+            else
+                name = x.iter
+                typ = Int64
+            end
+
+            if name in defined_variable_names
+                error("Multiple definition of variable $name")
+            end
+
+            filter!((sym -> sym != name), undefined_variables)
+            push!(defined_variable_names, name)
+            push!(defined_variable_types, typ)
+
+            return x
+        end
+
+        return x
+    end
+
+    apply(func_to_apply, expr)
+    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
+
+    return defined_variable, undefined_variables
+end
+
+
+
+function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
+
+    for x in vars
+        if x.name == name
+            return x
+        end
+    end
+
+    return nothing
+end
+
+
+
+function add_device_to_interval_call(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
+            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function transform_to_cuda_kernel(func :: StarpuExprFunction)
+
+    cpu_func = transform_to_cpu_kernel(func)
+
+    init, indep, finish = extract_init_indep_finish(cpu_func.body)
+
+    if indep == nothing
+        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
+    end
+
+    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
+
+    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
+    prekernel_instr = vcat(init, prekernel_instr)
+    kernel_instr = vcat(kernel_instr, indep.body)
+
+    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
+    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
+
+    for undef_var in indep_for_undef
+
+        found_var = find_variable(undef_var, prekernel_def)
+
+        if found_var == nothing # TODO : error then ?
+            continue
+        end
+
+        push!(kernel_args, found_var)
+    end
+
+    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
+    kernelname=Symbol("KERNEL_",func.func);
+    cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
+    push!(prekernel_instr, cuda_call)
+    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
+    prekernel_instr = vcat(prekernel_instr, finish)
+
+    prekernel_name = Symbol("CUDA_", func.func)
+    prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
+    prekernel = flatten_blocks(prekernel)
+
+    kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr))
+    kernel = add_device_to_interval_call(kernel)
+    kernel = flatten_blocks(kernel)
+    
+    return prekernel, kernel
+end
+
+
+struct StarpuIndepFor
+
+    iters :: Vector{Symbol}
+    sets :: Vector{StarpuExprInterval}
+
+    body :: StarpuExpr
+end
+
+
+function assert_no_indep_for(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+        if (isa(x, StarpuExprFor) && x.is_independant)
+            error("Invalid usage of intricated @indep for loops")
+        end
+
+        return x
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function StarpuIndepFor(expr :: StarpuExprFor)
+
+    if !expr.is_independant
+        error("For expression must be prefixed by @indep")
+    end
+
+    iters = []
+    sets = []
+    for_loop = expr
+
+    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
+
+        push!(iters, for_loop.iter)
+        push!(sets, for_loop.set)
+        for_loop = for_loop.body
+
+        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
+            for_loop = for_loop.exprs[1]
+        end
+    end
+
+    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
+end
+
+
+function translate_index_code(dims :: Vector{StarpuExprVar})
+
+    ndims = length(dims)
+
+    if ndims == 0
+        error("No dimension specified")
+    end
+
+    prod = StarpuExprValue(1)
+    output = StarpuExpr[]
+    reversed_dim = reverse(dims)
+    thread_index_patern = @parse € :: Int64 = (€ / €) % €
+    thread_id = @parse THREAD_ID
+
+    for i in (1 : ndims)
+        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
+        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
+        push!(output, expr)
+
+        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
+    end
+
+    thread_id_pattern = @parse begin
+
+        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
+
+        if (€ >= €)
+            return
+        end
+    end
+
+    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
+    push!(output, bound_verif)
+
+    return reverse(output)
+end
+
+
+
+
+
+
+
+function kernel_index_declarations(ind_for :: StarpuIndepFor)
+
+    pre_kernel_instr = StarpuExpr[]
+    kernel_args = StarpuExprTypedVar[]
+    kernel_instr = StarpuExpr[]
+
+    decl_pattern = @parse € :: Int64 = €
+    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+    iter_pattern = @parse € :: Int64 = € + € * €
+
+    dims = StarpuExprVar[]
+    ker_instr_to_add_later_on = StarpuExpr[]
+
+    for k in (1 : length(ind_for.sets))
+
+        set = ind_for.sets[k]
+
+        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
+        start_decl = replace_pattern(decl_pattern, start_var, set.start)
+
+        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
+        step_decl = replace_pattern(decl_pattern, step_var, set.step)
+
+        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
+        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
+
+        push!(dims, dim_var)
+
+        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
+        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
+
+        iter_var = starpu_parse(ind_for.iters[k])
+        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
+        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
+
+        push!(ker_instr_to_add_later_on, iter_decl)
+    end
+
+
+    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
+end
+
+
+
+function analyse_sets(ind_for :: StarpuIndepFor)
+
+
+    decl_pattern = @parse € :: Int64 = €
+    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
+
+    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
+
+    dim_prod = @parse 1
+
+    for d in dims
+        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
+    end
+
+    nthreads_var = @parse nthreads
+    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
+    push!(pre_kernel_instr, nthreads_decl)
+
+    nblocks_var = @parse nblocks
+    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
+    push!(pre_kernel_instr, nblocks_decl)
+
+
+    index_decomposition = translate_index_code(dims)
+
+    push!(kernel_instr, index_decomposition...)
+    push!(kernel_instr, ker_instr_to_add...)
+
+    return pre_kernel_instr, kernel_args, kernel_instr
+end

julia/src/Compiler/expression_manipulation.jl → julia/StarPU.jl/src/compiler/expression_manipulation.jl


+ 928 - 0
julia/StarPU.jl/src/compiler/expressions.jl

@@ -0,0 +1,928 @@
+
+#======================================================
+                AFFECTATION
+======================================================#
+abstract type StarpuExpr end
+abstract type StarpuExprTyped <: StarpuExpr end
+
+
+struct StarpuExprTypedVar <: StarpuExprTyped
+    name :: Symbol
+    typ :: Type
+end
+
+struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
+    expr :: StarpuExpr
+    typ :: Type
+end
+
+struct StarpuExprAffect <: StarpuExpr
+    var :: StarpuExpr
+    expr :: StarpuExpr
+end
+
+struct StarpuExprBlock <: StarpuExpr
+    exprs :: Vector{StarpuExpr}
+end
+
+struct StarpuExprCall <: StarpuExpr
+    func :: Symbol
+    args :: Vector{StarpuExpr}
+end
+struct StarpuExprCudaCall <: StarpuExpr
+
+    ker_name :: Symbol
+
+    nblocks :: StarpuExpr
+    threads_per_block :: StarpuExpr
+
+    args :: Vector{StarpuExpr}
+
+end
+struct StarpuExprField <: StarpuExpr
+
+    left :: StarpuExpr
+    field :: Symbol
+
+    is_an_arrow :: Bool
+end
+struct StarpuExprInterval <: StarpuExpr
+    start :: StarpuExpr
+    step :: StarpuExpr
+    stop :: StarpuExpr
+
+    id :: String
+
+    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
+        return new(start, step, stop, id)
+    end
+
+end
+struct StarpuExprFor <: StarpuExpr
+
+    iter :: Symbol
+    set:: StarpuExprInterval
+    body :: StarpuExpr
+
+    is_independant :: Bool
+    set_declarations :: Vector{StarpuExpr}
+
+end
+struct StarpuExprFunction <: StarpuExpr
+    ret_type :: Type
+    func :: Symbol
+    args :: Vector{StarpuExprTypedVar}
+    body :: StarpuExpr
+end
+struct StarpuExprIf <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+end
+
+
+struct StarpuExprIfElse <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+    else_statement :: StarpuExpr
+end
+
+struct StarpuExprRef <: StarpuExpr
+    ref :: StarpuExpr
+    indexes :: Vector{StarpuExpr}
+end
+struct StarpuExprReturn <: StarpuExpr
+    value :: StarpuExpr
+end
+struct StarpuExprVar <: StarpuExpr
+    name :: Symbol
+end
+struct StarpuExprInvalid <: StarpuExpr
+end
+
+struct StarpuExprValue <: StarpuExpr
+    value :: Any
+end
+
+struct StarpuExprWhile <: StarpuExpr
+    cond :: StarpuExpr
+    body :: StarpuExpr
+end
+
+
+function starpu_parse_affect(x :: Expr)
+
+    if (x.head != :(=))
+        error("Invalid \"affectation\" expression")
+    end
+
+    var = starpu_parse(x.args[1])
+    expr = starpu_parse(x.args[2])
+
+    return StarpuExprAffect(var, expr)
+end
+
+
+function equals(x :: StarpuExprAffect, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprAffect
+        return false
+    end
+
+    return equals(x.var, y.var) && equals(x.expr, y.expr)
+end
+
+
+function print(io :: IO, x :: StarpuExprAffect ; indent = 0, restrict = false)
+
+    print(io, x.var, indent = indent)
+    print(io, " = ")
+
+    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
+
+    if need_to_transtyp
+        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
+    end
+
+    print(io, x.expr, indent = indent)
+
+    if need_to_transtyp
+        print(io, ")")
+    end
+
+end
+
+function apply(func :: Function, expr :: StarpuExprAffect)
+
+    var = apply(func, expr.var)
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprAffect(var, new_expr))
+end
+
+#======================================================
+                BLOCK
+(series of instruction, not C variable scoping block)
+======================================================#
+
+
+
+
+function is_unwanted(x :: Symbol)
+    return false
+end
+
+function is_unwanted(x :: LineNumberNode)
+    return true
+end
+
+function is_unwanted(x :: Expr)
+    return false
+end
+
+function starpu_parse_block(x :: Expr)
+    if (x.head != :block)
+        error("Invalid \"block\" expression")
+    end    
+    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
+
+    return StarpuExprBlock(exprs)
+end
+
+
+function print(io :: IO, x :: StarpuExprBlock ; indent = 0, restrict=false)
+    for i in (1 : length(x.exprs))
+        print(io, x.exprs[i], indent = indent)
+        print(io, ";")
+        if (i != length(x.exprs))
+            print_newline(io, indent)
+        end
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprBlock)
+
+    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
+end
+
+#======================================================
+                FUNCTION CALL
+======================================================#
+
+
+
+
+function starpu_parse_call(x :: Expr)
+
+    if (x.head != :call)
+        error("Invalid \"call\" expression")
+    end
+
+    func = starpu_parse(x.args[1])
+    if (x.args[1] == Symbol(":"))
+        return starpu_parse_interval(x)
+    end
+    if (!isa(func, StarpuExprVar))
+        error("Invalid \"call\" expression : function must be a variable")
+    end
+
+    args = map(starpu_parse, x.args[2:end])
+
+    return StarpuExprCall(func.name, args)
+end
+
+
+starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
+
+
+function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0, restrict=false)
+
+    print(io, x.func, "(")
+
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, ", ")
+        end
+        print(io, x.args[i], indent = indent)
+    end
+
+    print(io, ")")
+end
+
+
+function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, " ", x.func, " ")
+        end
+        print(io, "(")
+        print(io, x.args[i], indent = indent)
+        print(io, ")")
+    end
+end
+
+function print(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+
+    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
+        print_infix(io, x, indent = indent)
+    else
+        print_prefix(io, x, indent = indent)
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprCall)
+
+    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
+end
+
+
+#======================================================
+                CUDA KERNEL CALL
+======================================================#
+
+
+
+
+
+function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, expr.ker_name)
+    print_newline(io, indent + starpu_indent_size)
+    print(io, "<<< ")
+    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
+    print(io, ", ")
+    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
+    print(io, ", 0, starpu_cuda_get_local_stream()")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, ">>> (")
+
+    for i in (1 : length(expr.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + 2 * starpu_indent_size + 1)
+            end
+        end
+
+        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
+
+    end
+
+    print(io, ");")
+    print_newline(io, indent)
+
+end
+
+
+function apply(func :: Function, expr :: StarpuExprCudaCall)
+
+    nblocks = func(expr.nblocks)
+    threads_per_block = func(expr.threads_per_block)
+    args = map((x -> apply(func, x)), expr.args)
+
+    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
+end
+
+
+#======================================================
+                STRUCTURE FIELDS
+======================================================#
+
+
+
+
+
+function starpu_parse_field(x :: Expr)
+
+    if x.head != :(.) || length(x.args) != 2
+        error("Invalid parsing of dot expression")
+    end
+
+    left = starpu_parse(x.args[1])
+
+    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
+        error("Invalid parsing of dot expression")
+    end
+
+    return StarpuExprField(left, x.args[2].value, false)
+end
+
+
+function print(io :: IO, x :: StarpuExprField ; indent = 0,restrict=false)
+    print(io, "(")
+    print(io, x.left, indent = indent)
+    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprField)
+    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
+end
+
+#======================================================
+                FOR LOOPS
+======================================================#
+
+
+
+
+
+function starpu_parse_for(x :: Expr; is_independant = false)
+
+    if (x.head != :for)
+        error("Invalid \"for\" expression")
+    end
+    affect = x.args[1]
+
+    if (affect.head != :(=))
+        error("Invalid \"for\" iterator affectation")
+    end
+
+    iter = starpu_parse(affect.args[1])
+
+    if (!isa(iter, StarpuExprVar))
+        error("Invalid \"for\" iterator")
+    end
+
+    set = starpu_parse(affect.args[2])
+    if (!isa(set, StarpuExprInterval))
+        error("Set of values in \"for\" loop must be an interval")
+    end
+
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
+end
+
+
+
+
+
+function print(io :: IO, x :: StarpuExprFor ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
+
+    id = x.set.id
+
+    start = "start_" * id
+    stop = "stop_" * id
+    step = "step_" * id
+    dim = "dim_" * id
+    iter = "iter_" * id
+
+    print_newline(io, indent, 2)
+
+    if isa(x.set.step, StarpuExprValue)
+        print(io, "for ($(x.iter) = $start ; ")
+        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
+        print(io, "$(x.iter) $comparison_op $stop ; ")
+        print(io, "$(x.iter) += $(x.set.step.value))")
+
+    else
+        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
+        print(io, "$iter < $dim ; ")
+        print(io, "$iter += 1, $(x.iter) += $step)")
+
+    end
+
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFor)
+
+    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
+    set = apply(func, expr.set)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
+end
+
+
+#======================================================
+                FUNCTION DECLARATION
+======================================================#
+
+
+
+
+function starpu_parse_function(x :: Expr)
+
+    if (x.head != :function)
+        error("Invalid \"function\" expression")
+    end
+
+    typed_decl = starpu_parse(x.args[1])
+
+    if (!isa(typed_decl, StarpuExprTypedExpr))
+        error("Invalid \"function\" prototype : a return type must me explicited")
+    end
+
+    prototype = typed_decl.expr
+
+    if (!isa(prototype, StarpuExprCall))
+        error("Invalid \"function\" prototype")
+    end
+
+    arg_list = StarpuExprTypedVar[]
+
+    for type_arg in prototype.args
+        if (!isa(type_arg, StarpuExprTypedVar))
+            error("Invalid \"function\" argument list")
+        end
+        push!(arg_list, type_arg)
+    end
+
+    body = starpu_parse(x.args[2])
+    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
+end
+
+
+
+function print(io :: IO, x :: StarpuExprFunction ; indent = 0,restrict=false)
+
+    print(io, starpu_type_traduction(x.ret_type), " ")
+    print(io, x.func, '(')
+
+    for i in (1 : length(x.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
+            end
+        end
+       print(io, x.args[i], indent = indent + starpu_indent_size, restrict = true)
+    end
+
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}\n\n")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFunction)
+
+    args = map((x -> apply(func, x)), expr.args)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
+end
+
+
+#======================================================
+                IF STATEMENT
+======================================================#
+
+
+
+
+
+function starpu_parse_if(x :: Expr)
+
+    if (x.head != :if)
+        error("Invalid \"if\" expression")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"if\" statement")
+    end
+
+    cond = starpu_parse(x.args[1])
+    then_statement = starpu_parse(x.args[2])
+
+    if (len == 2)
+        return StarpuExprIf(cond, then_statement)
+    end
+
+    else_statement = starpu_parse(x.args[3])
+
+    return StarpuExprIfElse(cond, then_statement, else_statement)
+end
+
+
+function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, "if (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.then_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+
+    if (!isa(x, StarpuExprIfElse))
+        return
+    end
+
+    print(io, " else")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.else_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIf)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+
+    return func(StarpuExprIf(cond, then_statement))
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIfElse)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+    else_statement = apply(func, expr.else_statement)
+
+    return func(StarpuExprIfElse(cond, then_statement, else_statement))
+end
+
+#======================================================
+                INTERVALS
+======================================================#
+
+
+
+
+function starpu_parse_interval(x :: Expr)
+
+    if (x.head != :(call))
+        error("Invalid \"interval\" expression")
+    end
+    start = starpu_parse(x.args[2])
+    steop = starpu_parse(x.args[3])
+
+    if (length(x.args) == 3)
+        return StarpuExprInterval(start, StarpuExprValue(1), steop)
+    end
+
+    stop = starpu_parse(x.args[4])
+
+    return StarpuExprInterval(start, steop, stop)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprInterval)
+
+    start = apply(func, expr.start)
+    step = apply(func, expr.step)
+    stop = apply(func, expr.stop)
+
+    return func(StarpuExprInterval(start, step, stop, id = expr.id))
+end
+
+#======================================================
+                ARRAYS AND REFERENCES
+======================================================#
+
+
+
+
+function starpu_parse_ref(x :: Expr)
+
+    if (x.head != :ref)
+        error("Invalid \"reference\" expression")
+    end
+
+    ref = starpu_parse(x.args[1])
+    indexes = map(starpu_parse, x.args[2:end])
+
+    #=
+    StarpuExpr[]
+
+    for i in (2 : length(x.args))
+        push!(indexes, starpu_parse(x.args[i]))
+    end=#
+
+    return StarpuExprRef(ref, indexes)
+end
+
+
+
+function equals(x :: StarpuExprRef, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprRef
+        return false
+    end
+
+    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
+        return false
+    end
+
+    return all(map(equals, x.indexes, y.indexes))
+end
+
+
+
+
+function print(io :: IO, x :: StarpuExprRef ; indent = 0,restrict=false)
+
+    print(io, x.ref, indent = indent)
+
+    for i in (1 : length(x.indexes))
+        print(io, "[")
+        print(io, x.indexes[i], indent = indent)
+        print(io, "]")
+    end
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprRef)
+
+    ref = apply(func, expr.ref)
+    indexes = map((x -> apply(func, x)), expr.indexes)
+
+    return func(StarpuExprRef(ref, indexes))
+end
+
+#======================================================
+                RETURN EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_return(x :: Expr)
+    if (x.head != :return)
+        error("Invalid \"return\" expression")
+    end
+
+    value = starpu_parse(x.args[1])
+    # Remove type associated to a single, for a return
+    # allows matching with ExprVar
+    if (isa(value, StarpuExprTypedVar))
+        value = StarpuExprVar(value.name)
+    end
+
+    return StarpuExprReturn(value)
+end
+
+function print(io :: IO, x :: StarpuExprReturn ; indent = 0,restrict=false)
+    print(io, "return ")
+    print(io, x.value, indent = indent)
+end
+
+function apply(func :: Function, expr :: StarpuExprReturn)
+
+    return func(StarpuExprReturn(apply(func, expr.value)))
+end
+
+function apply(func :: Function, expr :: StarpuExpr)
+    return func(expr)
+end
+
+print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
+
+function print(io :: IO, x :: StarpuExprValue ; indent = 0,restrict=false)
+
+    value = x.value
+
+    if value == nothing
+        return
+    end
+
+    if isa(value, AbstractString)
+        print(io, '"', value, '"')
+        return
+    end
+
+    if isa(value, Char)
+        print(io, '\'', value, '\'')
+        return
+    end
+
+    print(io, value)
+end
+
+
+
+
+
+print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
+
+
+
+function starpu_parse(raw_value :: Any)
+    return StarpuExprValue(raw_value)
+end
+
+function starpu_parse(sym :: Symbol)
+    return StarpuExprVar(sym)
+end
+
+#======================================================
+                TYPED EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_typed(x :: Expr)
+
+    if (x.head != :(::))
+        error("Invalid type assigned expression")
+    end
+
+    expr = starpu_parse(x.args[1])
+    typ = nothing
+
+    try
+        typ = eval(x.args[2]) :: Type
+    catch
+        print(x.args[2])
+        error("Invalid type in type assigned expression")
+    end
+
+    if (isa(expr, StarpuExprVar))
+        return StarpuExprTypedVar(expr.name, typ)
+    end
+
+    return StarpuExprTypedExpr(expr, typ)
+end
+
+
+
+
+
+starpu_type_traduction_dict = Dict(
+    Int32 => "int32_t",
+    UInt32 => "uint32_t",
+    Float32 => "float",
+    Int64 => "int64_t",
+    UInt64 => "uint64_t",
+    Float64 => "double",
+    Nothing => "void"
+)
+
+
+
+function starpu_type_traduction(x)
+    if x <: Array
+        return starpu_type_traduction_array(x)
+    end
+
+    if x <: Ptr
+        return starpu_type_traduction(eltype(x)) * "*"
+    end
+
+    return starpu_type_traduction_dict[x]
+
+end
+
+function starpu_type_traduction_array(x :: Type{Array{T,N}})  where {T,N}
+    output = starpu_type_traduction(T)
+    for i in (1 : N)
+        output *= "*"
+    end
+
+    return output
+end
+
+function print(io :: IO, x :: StarpuExprTyped ; indent = 0,restrict=false)
+
+    if (isa(x, StarpuExprTypedVar))
+        print(io,starpu_type_traduction(x.typ), " ")
+        #if (restrict)
+        #    print(io,"restrict ");
+        #end
+        print(io, x.name)
+    else
+        print(io, x.expr, indent = indent)
+    end
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprTypedExpr)
+
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprTypedExpr(new_expr, expr.typ))
+end
+
+#======================================================
+                While loop
+======================================================#
+
+
+function starpu_parse_while(x :: Expr)
+
+    if (x.head != :while)
+        error("Invalid \"while\" loop")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"while\" loop")
+    end
+
+    cond = starpu_parse(x.args[1])
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprWhile(cond, body)
+end
+
+
+function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
+    print_newline(io, indent)
+    print(io, "while (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprWhile)
+
+    cond = apply(func, expr.cond)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprWhile(cond, body))
+end

+ 132 - 0
julia/StarPU.jl/src/compiler/file_generation.jl

@@ -0,0 +1,132 @@
+
+
+
+global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+
+
+
+global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+const cpu_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+"
+
+const cuda_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+#define THREADS_PER_BLOCK 64
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+
+__device__ static inline long long jlstarpu_max__device(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
+{
+	if (stop >= start){
+		return jlstarpu_max__device(0, (stop - start + 1) / step);
+	} else {
+		return jlstarpu_max__device(0, (stop - start - 1) / step);
+	}
+}
+
+
+"
+
+"""
+	Opens a new Cuda source file, where generated GPU kernels will be written
+"""
+function starpu_new_cuda_kernel_file(file_name :: String)
+
+    global generated_cuda_kernel_file_name = file_name
+
+    kernel_file = open(file_name, "w")
+    print(kernel_file, cuda_kernel_file_start)
+    close(kernel_file)
+
+    return nothing
+end
+
+export target
+macro target(x)
+    targets = eval(x)
+    return quote
+        starpu_target=$targets
+        global starpu_target
+    end
+end
+
+export CPU_CODELETS
+global CPU_CODELETS=Dict{String,String}()
+export CUDA_CODELETS
+global CUDA_CODELETS=Dict{String,String}()
+
+"""
+	    Executes @cuda_kernel and @cpu_kernel
+        """
+macro codelet(x)
+    parsed = starpu_parse(x)
+    name=string(x.args[1].args[1].args[1]);
+    dump(name)
+    cpu_expr = transform_to_cpu_kernel(parsed)
+    prekernel, kernel = transform_to_cuda_kernel(parsed)
+    generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c")
+    generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu")
+    targets=starpu_target
+    return quote
+        
+        if ($targets&$STARPU_CPU!=0)
+            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cpu_kernel_file_name)
+            print(kernel_file, $(esc(cpu_kernel_file_start)))
+            print(kernel_file, $cpu_expr)
+            close(kernel_file)
+            CPU_CODELETS[$name]=$name
+        end
+        
+        if ($targets&$STARPU_CUDA!=0)
+            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cuda_kernel_file_name)
+            print(kernel_file, $(esc(cuda_kernel_file_start)))
+            print(kernel_file, "__global__ ", $kernel)
+            print(kernel_file, "\nextern \"C\" ", $prekernel)
+            close(kernel_file)
+            CUDA_CODELETS[$name]="CUDA_"*$name
+        end
+        print("end generation")
+        #starpu_task_library_name="generated_tasks"
+        #global starpu_task_library_name
+    end
+end

+ 13 - 0
julia/StarPU.jl/src/compiler/include.jl

@@ -0,0 +1,13 @@
+export starpu_new_cpu_kernel_file
+export starpu_new_cuda_kernel_file
+export @codelet
+export @target
+
+include("utils.jl")
+include("expressions.jl")
+include("parsing.jl")
+include("expression_manipulation.jl")
+include("c.jl")
+include("cuda.jl")
+include("file_generation.jl")
+

+ 5 - 8
julia/src/Compiler/parsing.jl

@@ -14,19 +14,16 @@ starpu_parse_key_word_parsing_function = Dict{Symbol, Function}()
 function starpu_parse(x :: Expr)
 
     if (x.head == :macrocall)
-
-        if (x.args[1] != Symbol("@indep"))
-            error("Only @indep macro, used before a for loop, is allowed ($(x.args[1]) was found)")
+        if (x.args[1] != Symbol("@parallel"))
+            error("Only @parallel macro, used before a for loop, is allowed ($(x.args[1]) was found)")
         end
 
-        if (length(x.args) != 2)
-            error("Invalid usage of @indep macro")
+        if (length(x.args) != 3)
+            error("Invalid usage of @parallel macro", length(x.args))
         end
-
-        return starpu_parse_for(x.args[2], is_independant = true)
+        return starpu_parse_for(x.args[3], is_independant = true)
     end
 
-
     if !(x.head in keys(starpu_parse_key_word_parsing_function))
         return StarpuExprInvalid() #TODO error ?
     end

+ 0 - 12
julia/src/Compiler/utils.jl

@@ -1,9 +1,6 @@
-
 import Base.print
 
-
 function print_newline(io :: IO, indent = 0, n_lines = 1)
-
     for i in (1 : n_lines)
         print(io, "\n")
     end
@@ -15,11 +12,7 @@ end
 
 starpu_indent_size = 4
 
-
-
-
 function rand_char()
-
     r = rand(UInt) % 62
 
     if (0 <= r < 10)
@@ -32,19 +25,14 @@ function rand_char()
 end
 
 function rand_string(size = 8)
-
     output = ""
 
     for i in (1 : size)
         output *= string(rand_char())
     end
-
     return output
 end
 
-
-
-
 function system(cmd :: String)
     ccall((:system, "libc"), Cint, (Cstring,), cmd)
 end

+ 1 - 7
julia/src/Wrapper/C/jlstarpu.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,12 +28,7 @@
 #include <starpu.h>
 #include <pthread.h>
 
-
 #include "jlstarpu_utils.h"
 #include "jlstarpu_task.h"
 
-
-
-
-
 #endif /* JLSTARPU_H_ */

+ 21 - 66
julia/src/Wrapper/C/jlstarpu_data_handles.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,31 +16,13 @@
 
 #include "jlstarpu.h"
 
-
-
-
-#if 0
-void print_vector_interface(struct starpu_vector_interface * i)
-{
-	printf("Vector interface at %p\n", i);
-	printf("\tdev_handle : %p\n", i->dev_handle);
-	printf("\telement_size : %u\n", i->elemsize);
-	printf("\tnx : %u\n", i->nx);
-	printf("\toffset : %u\n", i->offset);
-	printf("\tptr : %p\n", i->ptr);
-	printf("\tslide_base : %u\n", i->slice_base);
-}
-#endif
-
-
 enum jlstarpu_data_filter_func
 {
-    JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    JLSTARPU_MATRIX_FILTER_BLOCK
+	JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+	JLSTARPU_MATRIX_FILTER_BLOCK,
+	JLSTARPU_VECTOR_FILTER_BLOCK,
 };
 
-
-
 struct jlstarpu_data_filter
 {
 	enum jlstarpu_data_filter_func func;
@@ -52,63 +33,37 @@ struct jlstarpu_data_filter
 
 void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
 {
-
 	switch (func){
-
 	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
 		return starpu_matrix_filter_vertical_block;
-
 	case JLSTARPU_MATRIX_FILTER_BLOCK:
 		return starpu_matrix_filter_block;
-
+	case JLSTARPU_VECTOR_FILTER_BLOCK:
+		return starpu_vector_filter_block;
 	default:
 		return NULL;
-
 	}
 
 }
 
-
-void jlstarpu_translate_data_filter
-(
-		const struct jlstarpu_data_filter * const input,
-		struct starpu_data_filter * output
-)
+void jlstarpu_translate_data_filter(const struct jlstarpu_data_filter * const input,struct starpu_data_filter * output)
 {
 	memset(output, 0, sizeof(struct starpu_data_filter));
-
 	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
 	output->nchildren = input->nchildren;
-
 }
 
-
-
-
-
-
-
-
-
-void jlstarpu_data_partition
-(
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter
-)
+void jlstarpu_data_partition(starpu_data_handle_t handle,const struct jlstarpu_data_filter * const jl_filter)
 {
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
-
 	starpu_data_partition(handle, &filter);
-
 }
 
 
-void jlstarpu_data_map_filters_1_arg
-(
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter
-)
+void jlstarpu_data_map_filters_1_arg(starpu_data_handle_t handle,
+	const struct jlstarpu_data_filter * const jl_filter
+	)
 {
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
@@ -120,10 +75,10 @@ void jlstarpu_data_map_filters_1_arg
 
 void jlstarpu_data_map_filters_2_arg
 (
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter_1,
-		const struct jlstarpu_data_filter * const jl_filter_2
-)
+	starpu_data_handle_t handle,
+	const struct jlstarpu_data_filter * const jl_filter_1,
+	const struct jlstarpu_data_filter * const jl_filter_2
+	)
 {
 	struct starpu_data_filter filter_1;
 	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
@@ -139,12 +94,12 @@ void jlstarpu_data_map_filters_2_arg
 
 
 
-#define JLSTARPU_GET(interface, field, ret_type)\
-	\
-	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x)\
-	{\
-		return (ret_type) x->field;\
-	}\
+#define JLSTARPU_GET(interface, field, ret_type)			\
+									\
+	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x) \
+	{								\
+		return (ret_type) x->field;				\
+	}								\
 
 
 

+ 1 - 7
julia/src/Wrapper/C/jlstarpu_simple_functions.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,16 +15,11 @@
  */
 #include "jlstarpu.h"
 
-
-
-
 int jlstarpu_init(void)
 {
 	return starpu_init(NULL);
 }
 
-
-
 void jlstarpu_set_to_zero(void * ptr, unsigned int size)
 {
 	memset(ptr, 0, size);

+ 2 - 7
julia/src/Wrapper/C/jlstarpu_task.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +26,6 @@
 
 #include "jlstarpu.h"
 
-
-
-
-
-
 struct jlstarpu_codelet
 {
 	uint32_t where;
@@ -40,6 +34,7 @@ struct jlstarpu_codelet
 	char * cpu_func_name;
 
 	starpu_cuda_func_t cuda_func;
+	starpu_opencl_func_t opencl_func;
 
 	int nbuffer;
 	enum starpu_data_access_mode * modes;

+ 9 - 9
julia/src/Wrapper/C/jlstarpu_task_submit.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,14 +43,12 @@ struct starpu_codelet * jlstarpu_translate_codelet(struct jlstarpu_codelet * con
 
 	starpu_codelet_init(output);
 
-
-
 	output->where = input->where;
-
 	output->cpu_funcs[0] = input->cpu_func;
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -70,6 +67,7 @@ void jlstarpu_codelet_update(const struct jlstarpu_codelet * const input, struct
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -84,7 +82,9 @@ void jlstarpu_free_codelet(struct starpu_codelet * cl)
 }
 #endif
 
-
+void jlstarpu_hello() {
+	fprintf(stderr,"coucou !");
+}
 
 #if 0
 struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
@@ -104,7 +104,9 @@ struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const
 }
 #endif
 
-
+char *starpu_find_function(char *name, char *device) {
+	return NULL;
+}
 
 void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
 {
@@ -115,8 +117,6 @@ void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starp
 	output->cl_arg_size = input->cl_arg_size;
 }
 
-
-
 /*
 
 void print_perfmodel(struct starpu_perfmodel * p)

+ 1 - 2
julia/src/Wrapper/C/jlstarpu_utils.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 23 - 26
julia/src/Wrapper/Julia/linked_list.jl

@@ -1,21 +1,18 @@
-
-
-
     export Link
     mutable struct Link{T}
 
         data :: T
 
-        previous :: Union{Nullable{Link{T}}, Link{T}}
-        next :: Union{Nullable{Link{T}}, Link{T}}
+        previous :: Union{Nothing, Link{T}}
+        next :: Union{Nothing, Link{T}}
 
         list
 
         function Link{T}(x :: T, l) where {T}
             output = new()
             output.data = x
-            output.previous = Nullable{Link{T}}()
-            output.next = Nullable{Link{T}}()
+            output.previous = Nothing()
+            output.next = Nothing()
             output.list = l
             return output
         end
@@ -27,14 +24,14 @@
 
         nelement :: Int64
 
-        first :: Union{Nullable{Link{T}}, Link{T}}
-        last :: Union{Nullable{Link{T}}, Link{T}}
+        first :: Union{Nothing, Link{T}}
+        last :: Union{Nothing, Link{T}}
 
         function LinkedList{T}() where {T}
             output = new()
             output.nelement = 0
-            output.first = Nullable{Link{T}}()
-            output.last = Nullable{Link{T}}()
+            output.first = Nothing()
+            output.last = Nothing()
 
             return output
         end
@@ -50,7 +47,7 @@
         l.first = new_first
         new_first.next = old_first
 
-        if (isnull(old_first))
+        if (isnothing(old_first))
             l.last = new_first
         else
             old_first.previous = new_first
@@ -71,7 +68,7 @@
         l.last = new_last
         new_last.previous = old_last
 
-        if (isnull(old_last))
+        if (isnothing(old_last))
             l.first = new_last
         else
             old_last.next = new_last
@@ -106,13 +103,13 @@
         next = lnk.next
         previous = lnk.previous
 
-        if (isnull(next))
+        if (isnothing(next))
             l.last = previous
         else
             next.previous = previous
         end
 
-        if (isnull(previous))
+        if (isnothing(previous))
             l.first = next
         else
             previous.next = next
@@ -140,7 +137,7 @@
         quote
             $(esc(lnk_iterator)) = $(esc(list)).first
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).next
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -155,7 +152,7 @@
         quote
             $(esc(lnk_iterator)) = $(esc(list)).last
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).previous
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -173,7 +170,7 @@
 
         print(io, " ; previous: ")
 
-        if (isnull(lnk.previous))
+        if (isnothing(lnk.previous))
             print(io, "NONE")
         else
             print(io, lnk.previous.data)
@@ -181,7 +178,7 @@
 
         print(io, " ; next: ")
 
-        if (isnull(lnk.next))
+        if (isnothing(lnk.next))
             print(io, "NONE")
         else
             print(io, lnk.next.data)
@@ -199,7 +196,7 @@
 
         @foreach_asc l lnk begin
 
-            if (!isnull(lnk.previous))
+            if (!isnothing(lnk.previous))
                 print(io, ", ")
             end
 
@@ -213,24 +210,24 @@
 
 
 
-    import Base.start
+    #import Base.start
     function start(l :: LinkedList)
         return nothing
     end
 
 
-    import Base.done
+    #import Base.done
     function done(l :: LinkedList, state)
 
         if (state == nothing)
-            return isnull(l.first)
+            return isnothing(l.first)
         end
 
-        return isnull(state.next)
+        return isnothing(state.next)
     end
 
 
-    import Base.next
+    #import Base.next
     function next(l :: LinkedList, state)
 
         if (state == nothing)
@@ -243,7 +240,7 @@
     end
 
 
-    import Base.endof
+    #import Base.endof
     function endof(l :: LinkedList)
         return l.nelement
     end

julia/tst/black_scholes/black_scholes.c → julia/black_scholes/black_scholes.c


+ 83 - 13
julia/tst/black_scholes/cpu_cuda_black_scholes.jl

@@ -1,13 +1,8 @@
-include("../../src/Compiler/include.jl")
+import Libdl
+using StarPU
 
-starpu_new_cpu_kernel_file("../build/generated_cpu_black_scholes.c")
-starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
-
-
-
-
-
-@cpu_cuda_kernel function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) ::Void
+@target STARPU_CPU+STARPU_CUDA
+@codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32
     
     widthn ::Int64 = width(data)
         
@@ -25,7 +20,7 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
     b5 ::Float64 = 1.330274428
 
     
-    @indep for i = 1:widthn
+    @parallel for i = 1:widthn
         
 
         d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
@@ -117,8 +112,83 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
         res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
         
     end
+    return 0
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
+
+    @starpu_block let
+        dat_handle, res_handle = starpu_data_register(data, res)
+
+        starpu_data_partition(dat_handle, vert)
+        starpu_data_partition(res_handle, vert)
+        
+        #Compute the price of call and put option in the res matrix
+        @starpu_sync_tasks for task in (1:nslices)
+            @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] 
+        end
+    end
+    return 0
 end
 
-compile_cpu_kernels("../build/generated_cpu_black_scholes.so")
-compile_cuda_kernels("../build/generated_cuda_black_scholes.so")
-combine_kernel_files("../build/generated_tasks_black_scholes.so", ["../build/generated_cpu_black_scholes.so", "../build/generated_cuda_black_scholes.so"])
+
+function init_data(data, data_nbr);
+    for i in 1:data_nbr
+        data[1,i] = rand(Float64) * 100
+        data[2,i] = rand(Float64) * 100
+        data[3,i] = rand(Float64)
+        data[4,i] = rand(Float64) * 10
+        data[5,i] = rand(Float64) * 10
+    end
+    return data
+end
+        
+
+
+function median_times(data_nbr, nslices, nbr_tests)
+
+    data ::Matrix{Float64} = zeros(5, data_nbr)
+    # data[1,1] = 100.0
+    # data[2,1] = 100.0
+    # data[3,1] = 0.05
+    # data[4,1] = 1.0
+    # data[5,1] = 0.2
+
+
+    res ::Matrix{Float64} = zeros(2, data_nbr)
+
+    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
+
+    for i = 1:nbr_tests
+        
+        init_data(data, data_nbr)
+
+        tic()
+        black_scholes_starpu(data, res, nslices);
+        t = toq()
+
+        exec_times[i] = t
+    end
+    sort!(exec_times)
+    # println(data)
+    # println(res)
+    
+    return exec_times[1 + div(nbr_tests - 1, 2)]
+end
+
+function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
+    i = 1
+    open("black_scholes_times.dat", "w") do f 
+        for data_nbr in (start_nbr : step_nbr : stop_nbr)
+            t = median_times(data_nbr, nslices, nbr_tests)
+            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
+            write(f, "$data_nbr $(t)\n")
+            i = i + 1
+        end
+    end
+end

+ 263 - 0
julia/mandelbrot/mandelbrot.c

@@ -0,0 +1,263 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2019       Mael Keryell
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include "../display.h"
+
+void cpu_mandelbrot(void **, void *);
+void gpu_mandelbrot(void **, void *);
+
+struct Params
+{
+	float cr;
+	float ci;
+	unsigned taskx;
+	unsigned tasky;
+	unsigned width;
+	unsigned height;
+};
+
+
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_mandelbrot},
+	.cuda_funcs = {gpu_mandelbrot},
+	.nbuffers = 1,
+	.modes = {STARPU_RW}
+};
+
+
+void mandelbrot_with_starpu(int *pixels, float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t p_handle;
+
+	starpu_matrix_data_register(&p_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, width, width, height, sizeof(int));
+
+	struct starpu_data_filter vert =
+	{
+		.filter_func = starpu_matrix_filter_vertical_block,
+		.nchildren = nslicesy
+	};
+
+	struct starpu_data_filter horiz =
+	{
+		.filter_func = starpu_matrix_filter_block,
+		.nchildren = nslicesx
+	};
+
+	starpu_data_map_filters(p_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	struct Params *params = malloc(nslicesx*nslicesy*sizeof(struct Params));
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+			struct starpu_task *task = starpu_task_create();
+			
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(p_handle, 2, tasky, taskx);
+			struct Params param = {cr, ci, taskx, tasky, width, height};
+
+			params[taskx + tasky*nslicesx] = param;
+
+			task->cl_arg = (params + taskx + tasky * nslicesx);
+			task->cl_arg_size = sizeof(struct Params);
+			
+			starpu_task_submit(task);
+		}
+	}
+	starpu_task_wait_for_all();
+
+	starpu_data_unpartition(p_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(p_handle);
+
+	free(params);
+}
+
+void init_zero(int * pixels, unsigned width, unsigned height)
+{
+	unsigned i,j;
+	for (i = 0; i < height; i++){
+		for (j = 0; j < width; j++){
+			pixels[j + i*width] = 0;
+		}
+	}
+}
+
+void sort(double *arr, unsigned nbr_tests)
+{
+	unsigned j;
+	
+	int is_sort = 0;
+	
+	while (!is_sort){
+
+		is_sort = 1;
+		
+		for (j = 0; j < nbr_tests - 1; j++){
+			if (arr[j] > arr[j+1]){
+				is_sort = 0;
+				double tmp = arr[j];
+				arr[j] = arr[j+1];
+				arr[j+1] = tmp;
+			}
+		}
+	}
+}
+double median_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double exec_times[nbr_tests];
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+	}
+	char filename[30];
+	sprintf(filename, "PPM/mandelbrot%d.ppm", width);
+	printf("%s\n", filename);
+
+	mandelbrot_graph(filename, Pixels, width, height);
+
+	free(Pixels);
+
+	sort(exec_times, nbr_tests);
+
+	return exec_times[nbr_tests/2];	
+}
+
+void fluctuation_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests, double *exec_times)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+
+		/* char filename[33]; */
+		/* sprintf(filename, "../PPM/mandelbrot%d.ppm", i + 1); */
+		/* printf("%s\n", filename); */
+		/* mandelbrot_graph(filename, Pixels, width, height); */
+	}
+
+
+	free(Pixels);
+
+
+
+	
+}
+
+
+void display_times(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_struct_times.dat", "w");
+
+	for (dim = start_dim; dim <= stop_dim; dim += step_dim){
+		printf("Dimension: %u...\n", dim);
+		double t = median_time(cr, ci, dim, dim, nslices, nslices, nbr_tests);
+		
+		printf("w = %u ; h = %u ; t = %f\n", dim, dim, t);
+		
+		fprintf(myfile, "%f\n", t);
+		}
+	
+	fclose(myfile);
+}
+
+void display_fluctuations(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_fluctuation.dat", "w");
+
+	double *exec_times = malloc(nbr_tests * sizeof(double));
+	fluctuation_time(cr, ci, start_dim, start_dim, nslices, nslices, nbr_tests, exec_times);
+		
+	/* printf("w = %u ; h = %u ; t = %f\n", dim, dim, t); */
+	unsigned i;
+	for (i = 0; i < nbr_tests; i++){
+		printf("test %u: %f seconds\n", i, exec_times[i]);
+		fprintf(myfile, "%u %f\n", i, exec_times[i]);
+	}
+	
+	fclose(myfile);
+	free(exec_times);
+}
+
+
+int main(int argc, char **argv)
+{
+
+	if (argc != 8){
+		printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) nbr_tests\n", argv[0]);
+		return 1;
+	}
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+
+	
+	float cr = (float) atof(argv[1]);
+	float ci = (float) atof(argv[2]);
+	unsigned start_dim = (unsigned) atoi(argv[3]);
+	unsigned step_dim = (unsigned) atoi(argv[4]);	
+	unsigned stop_dim = (unsigned) atoi(argv[5]);
+	unsigned nslices = (unsigned) atoi(argv[6]);
+	unsigned nbr_tests = (unsigned) atoi(argv[7]);
+
+	display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests);
+	
+	
+	/* display_fluctuations(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests); */
+
+
+	starpu_shutdown();
+
+
+	return 0;
+}

+ 30 - 0
julia/mandelbrot/mandelbrot.jl

@@ -0,0 +1,30 @@
+function mandelbrotjl(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64)
+    height,width = size(pixels)
+    zoom = width * 0.25296875
+    val_diverge = 2.0
+    max_iterations = (width/2) * 0.049715909 * log10(zoom);
+
+
+    for y = 1:height
+        for x = 1:width
+            cr = centerr + (x - (width / 2))/zoom
+            zr = cr
+            ci = centeri + (y - (height / 2))/zoom
+            zi = ci
+
+            n = 0
+            while ((n < max_iterations) && (zr*zr + zi*zi < val_diverge*val_diverge))
+                tmp = zr*zr - zi*zi + cr
+                zi = 2*zr*zi + ci
+                zr = tmp
+                n = n+1
+            end
+            
+            if (n < max_iterations)
+                pixels[y,x] = round(255 * n / max_iterations)
+            else
+                pixels[y,x] = 0
+            end
+        end
+    end
+end

+ 20 - 0
julia/mult/README

@@ -0,0 +1,20 @@
+You first need to compile StarPU.jl
+Now, type
+> make
+> make test
+> gnuplot mult.plot
+
+During the test, several versions are compared:
+- mult.c: this is the original C+starpu code
+- mult_native.jl: this is the native julia way of multiplying 2 matrices. There is a call to openBLAS
+- mult_generatedc.jl: generates a C file, genc_matrix_mult.c. This file is compiled into a library, that is loaded and a function is executed.
+- mult_calllib.jl: the env variable JULIA_TASK_LIB should point to a library containing the function matrix_mult. This function is called.
+
+Examples of how to launch mult.jl are given in test rules.
+	 
+A CUDA version is generated too. Untested so far (no nvidia gpu on my laptop!)
+
+
+
+
+

+ 55 - 0
julia/mult/makefile

@@ -0,0 +1,55 @@
+# tile size. Should be changed in mult.jl as well
+STRIDE=72
+
+# ICC compiler
+#CC =icc
+#CFLAGS=-restrict -unroll4 -ipo -falign-loops=256 -O3 -DSTRIDE=${STRIDE} -march=native $(shell pkg-config --cflags starpu-1.3)
+# GCC compiler
+CC=gcc-9
+CFLAGS += -O3 -DSTRIDE=${STRIDE} -mavx -mfma -fomit-frame-pointer -march=native -ffast-math $(shell pkg-config --cflags starpu-1.3)
+
+LDFLAGS +=$(shell pkg-config --libs starpu-1.3)
+EXTERNLIB=extern_tasks.dylib
+GENERATEDLIB=generated_tasks.dylib
+OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c))
+LIBPATH=${PWD}/../StarPU.jl/lib
+
+all: ${EXTERNLIB} 
+
+mult: mult.c cpu_mult.o #gpu_mult.o 
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)	
+
+gpu_mult.o: gpu_mult.cu
+	nvcc -c $(CFLAGS) $^ -o $@
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $^ -o $@
+
+${EXTERNLIB}: cpu_mult.o
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@  
+
+gpu_mult.so: gpu_mult.o
+	nvcc $(CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS)
+
+cpu_mult_sa: cpu_mult_sa.o
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
+
+${GENERATEDLIB}: ${OBJECTS}
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+clean:
+	rm *.so *.o *.dylib c_*.genc gencuda_*.cu *.dat
+
+# Performance Tests
+cstarpu.dat: mult
+	STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 ./mult > $@
+julia_generatedc.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl $@
+julia_native.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult_native.jl $@
+julia_calllib.dat: ${EXTERNLIB}
+	LD_LIBRARY_PATH+=${LIBPATH} JULIA_TASK_LIB="${EXTERNLIB}" STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl julia_calllib.dat
+
+test: cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat
+
+

+ 237 - 0
julia/mult/mult.c

@@ -0,0 +1,237 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     Alexis Juven
+ * Copyright (C) 2012,2013                                Inria
+ * Copyright (C) 2009-2011,2013-2015                      Université de Bordeaux
+ * Copyright (C) 2010                                     Mehdi Juhoor
+ * Copyright (C) 2010-2013,2015,2017                      CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example shows a simple implementation of a blocked matrix
+ * multiplication. Note that this is NOT intended to be an efficient
+ * implementation of sgemm! In this example, we show:
+ *  - how to declare dense matrices (starpu_matrix_data_register)
+ *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
+ *  - how to use filters to partition the matrices into blocks
+ *    (starpu_data_partition and starpu_data_map_filters)
+ *  - how to unpartition data (starpu_data_unpartition) and how to stop
+ *    monitoring data (starpu_data_unregister)
+ *  - how to manipulate subsets of data (starpu_data_get_sub_data)
+ *  - how to construct an autocalibrated performance model (starpu_perfmodel)
+ *  - how to submit asynchronous tasks
+ */
+
+#include <string.h>
+#include <math.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include <starpu.h>
+
+
+
+/*
+ * That program should compute C = A * B
+ *
+ *   A of size (z,y)
+ *   B of size (x,z)
+ *   C of size (x,y)
+
+              |---------------|
+            z |       B       |
+              |---------------|
+       z              x
+     |----|   |---------------|
+     |    |   |               |
+     |    |   |               |
+     | A  | y |       C       |
+     |    |   |               |
+     |    |   |               |
+     |----|   |---------------|
+
+ */
+
+
+
+
+
+//void gpu_mult(void **, void *);
+void cpu_mult(void **, void *);
+
+
+static struct starpu_perfmodel model =
+{
+		.type = STARPU_HISTORY_BASED,
+		.symbol = "history_perf"
+};
+
+static struct starpu_codelet cl =
+{
+		.cpu_funcs = {cpu_mult},
+		.cpu_funcs_name = {"cpu_mult"},
+		//.cuda_funcs = {gpu_mult},
+		.nbuffers = 3,
+		.modes = {STARPU_R, STARPU_R, STARPU_W},
+		.model = &model
+};
+
+
+void multiply_with_starpu(float *A, float *B, float *C,  unsigned xdim,  unsigned ydim,  unsigned zdim, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t A_handle, B_handle, C_handle;
+
+
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
+			ydim, ydim, zdim, sizeof(float));
+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
+			zdim, zdim, xdim, sizeof(float));
+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
+			ydim, ydim, xdim, sizeof(float));
+
+
+	struct starpu_data_filter vert =
+	{
+			.filter_func = starpu_matrix_filter_vertical_block,
+			.nchildren = nslicesx
+	};
+
+	struct starpu_data_filter horiz =
+	{
+			.filter_func = starpu_matrix_filter_block,
+			.nchildren = nslicesy
+	};
+
+
+	starpu_data_partition(B_handle, &vert);
+	starpu_data_partition(A_handle, &horiz);
+	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+
+			struct starpu_task *task = starpu_task_create();
+
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky);
+			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx);
+			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
+
+			if (starpu_task_submit(task)!=0) fprintf(stderr,"submit task error\n");
+
+		}
+	}
+
+	starpu_task_wait_for_all();
+
+
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(A_handle);
+	starpu_data_unregister(B_handle);
+	starpu_data_unregister(C_handle);
+
+}
+
+
+
+void init_rand(float * m, unsigned width, unsigned height)
+{
+	unsigned i,j;
+
+	for (j = 0 ; j < height ; j++){
+		for (i = 0 ; i < width ; i++){
+			m[j+i*height] = (float)(starpu_drand48());
+		}
+	}
+}
+
+
+void init_zero(float * m, unsigned width, unsigned height)
+{
+	memset(m, 0, sizeof(float) * width * height);
+}
+
+
+
+double min_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned i;
+
+	float * A = (float *) malloc(zdim*ydim*sizeof(float));
+	float * B = (float *) malloc(xdim*zdim*sizeof(float));
+	float * C = (float *) malloc(xdim*ydim*sizeof(float));
+
+	double exec_times=-1;
+
+	for (i = 0 ; i < nb_test ; i++){
+
+		double start, stop, exec_t;
+
+		init_rand(A, zdim, ydim);
+		init_rand(B, xdim, zdim);
+		init_zero(C, xdim, ydim);
+
+		start = starpu_timing_now();
+		multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy);
+		stop = starpu_timing_now();
+
+		exec_t = (stop - start)*1.e3; // Put in ns instead of us
+		if (exec_times<0 || exec_times>exec_t) exec_times= exec_t;
+	}
+
+	free(A);
+	free(B);
+	free(C);
+	return exec_times;
+}
+
+
+void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nb_tests, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned dim;
+
+	for (dim = start_dim ; dim <= stop_dim ; dim += step_dim){
+		double t = min_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy);
+		printf("%f %f\n", dim*dim*4.*3./1024./1024, (2.*dim-1.)*dim*dim/t);
+	}
+
+}
+
+
+int main(int argc, char * argv[])
+{
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+	unsigned start_dim = 16*STRIDE;
+	unsigned step_dim = 4*STRIDE;
+	unsigned stop_dim = 4096;
+	unsigned nb_tests = 10;
+	unsigned nsclicesx = 2;
+	unsigned nsclicesy = 2;
+
+	display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy);
+
+	starpu_shutdown();
+
+	return 0;
+}
+

+ 146 - 0
julia/mult/mult.jl

@@ -0,0 +1,146 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@target STARPU_CPU+STARPU_CUDA
+@codelet function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Float32
+
+    width_m2 :: Int32 = width(m2)
+    height_m1 :: Int32 = height(m1)
+    width_m1 :: Int32 = width(m1)
+    # Naive version
+    #@parallel for j in (1 : width_m2)
+    #    @parallel for i in (1 : height_m1)
+    #
+    #          sum :: Float32 = 0.
+
+    #          for k in (1 : width_m1)
+    #              sum = sum + m1[i, k] * m2[k, j]
+    #          end
+    
+    #          m3[i, j] = sum
+    #      end
+    #  end
+    ##### Tiled and unrolled version 
+    for l in (1 : width_m2)
+        for m in (1 : height_m1)
+            m3[m,l] = 0
+        end
+    end
+    @parallel for i in (1 : STRIDE : height_m1)
+        for k in (1 : STRIDE : width_m1 )
+            for j in (1 : STRIDE : width_m2  )
+                for kk in (k : 4 : k+STRIDE-1)
+                    for jj in (j : 2 : j+STRIDE-1)
+                        alpha00 :: Float32 =m2[kk,jj]
+                        alpha01 :: Float32 =m2[kk,jj+1]
+                        alpha10 :: Float32 =m2[kk+1,jj]
+                        alpha11 :: Float32 =m2[kk+1,jj+1]
+                        alpha20 :: Float32 =m2[kk+2,jj]
+                        alpha21 :: Float32 =m2[kk+2,jj+1]
+                        alpha30 :: Float32 =m2[kk+3,jj]
+                        alpha31 :: Float32 =m2[kk+3,jj+1]
+                        for ii in (i : 1 : i+STRIDE-1) 
+                            m3[ii, jj] = m3[ii, jj] + m1[ii, kk] * alpha00 + m1[ii, kk+1] * alpha10 + m1[ii, kk+2] * alpha20 + m1[ii,kk+3]*alpha30
+                            m3[ii, jj+1] = m3[ii, jj+1] + m1[ii, kk] * alpha01 + m1[ii, kk+1] * alpha11 + m1[ii, kk+2]*alpha21 + m1[ii,kk+3]*alpha31 
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return 0. :: Float32
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    scale= 3
+    tmin=0
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
+    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
+    @starpu_block let
+        hA,hB,hC = starpu_data_register(A, B, C)
+        starpu_data_partition(hB, vert)
+        starpu_data_partition(hA, horiz)
+        starpu_data_map_filters(hC, vert, horiz)
+        tmin=0
+        perfmodel = StarpuPerfmodel(
+            perf_type = STARPU_HISTORY_BASED,
+            symbol = "history_perf"
+        )
+        cl = StarpuCodelet(
+            cpu_func = CPU_CODELETS["matrix_mult"],
+            #cuda_func = "matrix_mult",
+            #opencl_func="ocl_matrix_mult",
+            modes = [STARPU_R, STARPU_R, STARPU_W],
+            perfmodel = perfmodel
+        )
+
+        for i in (1 : 10 )
+            t=time_ns()
+            @starpu_sync_tasks begin
+                for taskx in (1 : nslicesx)
+                    for tasky in (1 : nslicesy)
+                        handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
+                        task = StarpuTask(cl = cl, handles = handles)
+                        starpu_task_submit(task)
+                        #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
+                    end
+                end
+            end
+            t=time_ns()-t
+            if (tmin==0 || tmin>t)
+                tmin=t
+            end
+        end
+    end
+    return tmin
+end
+
+
+function approximately_equals(
+    A :: Matrix{Cfloat},
+    B :: Matrix{Cfloat},
+    eps = 1e-2
+)
+    (height, width) = size(A)
+
+    for j in (1 : width)
+        for i in (1 : height)
+            if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j])))
+                println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])")
+                return false
+            end
+        end
+    end
+
+    return true
+end
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_with_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 4 - 0
julia/mult/mult.plot

@@ -0,0 +1,4 @@
+set output "comparison.pdf"
+set term pdf
+plot "julia_native.dat" w l,"cstarpu.dat" w l,"julia_generatedc.dat" w l,"julia_calllib.dat" w l
+quit

+ 44 - 0
julia/mult/mult_native.jl

@@ -0,0 +1,44 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    tmin = 0
+    for i in (1 : 10 )
+        t=time_ns()
+        C = A * B;
+        t=time_ns() - t
+        if (tmin==0 || tmin>t)
+            tmin=t
+        end
+    end
+    return tmin
+end
+
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_without_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 11 - 0
julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.187500 62.469505
+23.730469 65.145783
+34.171875 64.252904
+46.511719 64.856261
+60.750000 61.807355
+76.886719 64.395970
+94.921875 63.789615
+114.855469 64.782028
+136.687500 61.598611
+160.417969 63.266065
+186.046875 62.610491

+ 29 - 0
julia/mult/res/mult_gen_gcc9_1x4.dat

@@ -0,0 +1,29 @@
+3.0 35.095536494941854
+4.6875 41.989376626414035
+6.75 38.862414203751754
+9.1875 42.8149332353783
+12.0 27.869356812008537
+15.1875 40.90318016042991
+18.75 35.225143587305226
+22.6875 40.2855672797496
+27.0 30.428316600694394
+31.6875 39.31411328357
+36.75 32.661610849261855
+42.1875 37.556932447413935
+48.0 23.52181133492872
+54.1875 38.220688331426885
+60.75 30.30144442185247
+67.6875 35.7931848447804
+75.0 27.503675217337065
+82.6875 34.26004394537202
+90.75 30.28248032967485
+99.1875 34.80402996973707
+108.0 23.410338740869793
+117.1875 33.816995170067365
+126.75 28.575495002257274
+136.6875 31.938845714722646
+147.0 26.579028306224597
+157.6875 31.257291229640458
+168.75 29.40371998523363
+180.1875 31.20791496656832
+192.0 22.27977322138876

+ 29 - 0
julia/mult/res/mult_gen_gcc9_4x1.dat

@@ -0,0 +1,29 @@
+3.0 38.576241402554224
+4.6875 45.31099640039931
+6.75 41.33116523247586
+9.1875 41.36870326327288
+12.0 30.726913279900813
+15.1875 40.97070044185806
+18.75 38.00711660927796
+22.6875 41.41500237348445
+27.0 35.60049383550015
+31.6875 39.3274866273195
+36.75 35.89547776075037
+42.1875 40.148418048157694
+48.0 25.328919716005114
+54.1875 38.31273888508681
+60.75 33.40185421821584
+67.6875 36.933804237397716
+75.0 30.424047628770715
+82.6875 35.555311026464885
+90.75 33.520292128428736
+99.1875 36.15390985285586
+108.0 24.69723232685782
+117.1875 35.10082819198454
+126.75 31.92453002780272
+136.6875 35.218675088153915
+147.0 28.57496209979794
+157.6875 34.30832202391309
+168.75 32.28312311135674
+180.1875 33.95765540289391
+192.0 21.87223921165241

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s100_4x1.dat

@@ -0,0 +1,7 @@
+29.296875 40.03209610690769
+45.7763671875 39.7782462738071
+65.91796875 40.919412460071406
+89.7216796875 39.86310075239485
+117.1875 38.57853081024218
+148.3154296875 38.365551586369726
+183.10546875 36.36952308316503

+ 17 - 0
julia/mult/res/mult_gen_gcc9_s50_4x1.dat

@@ -0,0 +1,17 @@
+7.32421875 40.71065910284983
+11.444091796875 38.65943468999069
+16.4794921875 38.47969086769767
+22.430419921875 36.651013048059674
+29.296875 36.83417153820277
+37.078857421875 36.39675792194737
+45.7763671875 35.95397397739203
+55.389404296875 35.01271643062472
+65.91796875 36.615975568007045
+77.362060546875 35.37884613184124
+89.7216796875 35.47361924291173
+102.996826171875 35.19272426462016
+117.1875 33.20635359174189
+132.293701171875 33.85292024547706
+148.3154296875 34.259771254715574
+165.252685546875 33.696634276143286
+183.10546875 32.65563984287101

+ 4 - 0
julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat

@@ -0,0 +1,4 @@
+12.0 36.97738316279322
+48.0 41.92478434152502
+108.0 44.782019377342586
+192.0 43.31174213073912

+ 13 - 0
julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat

@@ -0,0 +1,13 @@
+12.0 46.97244046946638
+18.75 58.871349999359005
+27.0 53.82855830537721
+36.75 56.32008723976445
+48.0 44.67174556090053
+60.75 56.37242731373687
+75.0 50.7326915732461
+90.75 55.4718476922033
+108.0 42.2282159789089
+126.75 53.75672849956793
+147.0 49.58574052835205
+168.75 54.37042916069095
+192.0 41.03783426797551

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 44.99985202955367
+27.0 52.4356051129035
+48.0 43.98871775598096
+75.0 49.80617536221089
+108.0 41.49118338442519
+147.0 48.56364996087375
+192.0 40.62654517348534

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 42.39120888947927
+27.0 54.550913222094984
+48.0 45.49392373655385
+75.0 51.36208021061481
+108.0 43.63013488867403
+147.0 49.68419805912138
+192.0 42.5457713890938

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 47.91450640786182
+23.73046875 70.18415000835407
+34.171875 130.5930513278052
+46.51171875 175.40249695912559
+60.75 55.633587692486856
+76.88671875 69.98450438218035
+94.921875 84.29515755163186
+114.85546875 105.0701279194339
+136.6875 59.010534668180654
+160.41796875 62.33164645892831
+186.046875 71.89788996838325

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.34035046809785
+23.73046875 90.94876643488134
+34.171875 93.57888433733667
+46.51171875 115.24941633539966
+60.75 57.91620969415693
+76.88671875 69.01285319590752
+94.921875 80.1731339374351
+114.85546875 81.88695877919552
+136.6875 59.81485837027026
+160.41796875 62.627542548656216
+186.046875 70.92430858573593

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2.dat

@@ -0,0 +1,11 @@
+15.1875 49.541265548365025
+23.73046875 52.23451345494309
+34.171875 50.51716648682395
+46.51171875 50.85574646979497
+60.75 42.70677762521372
+76.88671875 47.844317018396175
+94.921875 46.812449783935435
+114.85546875 47.63133362143659
+136.6875 42.58453806625135
+160.41796875 43.38142416008992
+186.046875 42.10597070167543

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.85531352216577
+23.73046875 64.18226106386828
+34.171875 63.154717935466564
+46.51171875 63.785224988311604
+60.75 56.23769206133714
+76.88671875 59.67937135482873
+94.921875 58.723057009657836
+114.85546875 59.64876377318175
+136.6875 56.179455752285136
+160.41796875 56.25355249047638
+186.046875 55.86750451373739

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat

@@ -0,0 +1,11 @@
+15.1875 13.064278868379871
+23.73046875 14.756026074641067
+34.171875 13.83982386947793
+46.51171875 14.613968952238817
+60.75 11.783711558597256
+76.88671875 13.931105963251506
+94.921875 12.855632000014735
+114.85546875 13.249002597722258
+136.6875 12.572965056671936
+160.41796875 14.480218135848462
+186.046875 14.040858387720379

+ 5 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat

@@ -0,0 +1,5 @@
+15.1875 14.099898524592977
+23.73046875 14.550933539697333
+34.171875 14.349410926140271
+46.51171875 14.415904334550417
+60.75 13.503051187017753

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x1.dat

@@ -0,0 +1,11 @@
+15.1875 45.92147955991763
+23.73046875 51.36107050546614
+34.171875 49.61930926641356
+46.51171875 49.45181909243622
+60.75 45.15796272895741
+76.88671875 48.36073354687536
+94.921875 44.24184559337953
+114.85546875 47.18178855961836
+136.6875 43.159720253646896
+160.41796875 42.67165627290124
+186.046875 38.537576648017776

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.120354593587074
+23.73046875 64.57108971168358
+34.171875 63.38082696595577
+46.51171875 62.249708571678504
+60.75 54.032191569244006
+76.88671875 58.202057221774425
+94.921875 61.440907627769874
+114.85546875 59.41214640096639
+136.6875 56.7554113833207
+160.41796875 59.19922367910147
+186.046875 58.85342461418237

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 60.852193218742244
+23.73046875 90.34368571742002
+34.171875 64.88259115865708
+46.51171875 75.93301804962653
+60.75 56.570334556680905
+76.88671875 69.56856256378995
+94.921875 61.32231327890172
+114.85546875 65.01978117265061
+136.6875 59.34455850319351
+160.41796875 62.64459678873513
+186.046875 60.73581790609317

+ 9 - 0
julia/mult/res/mult_gen_gcc9_s80_4x1.dat

@@ -0,0 +1,9 @@
+18.75 33.04933833660189
+29.296875 36.73797053791907
+42.1875 38.63857483141544
+57.421875 37.41446097176627
+75.0 37.29798074973393
+94.921875 47.78484008020491
+117.1875 46.920043928079984
+141.796875 44.932797054801505
+168.75 44.2566320371221

+ 3 - 0
julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat

@@ -0,0 +1,3 @@
+15.1875 32.90035486674719
+23.73046875 33.41217720558453
+34.171875 32.65678472416923

+ 11 - 0
julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.840292498287134
+23.73046875 65.60736049798648
+34.171875 63.71211730973094
+46.51171875 64.52944091137051
+60.75 57.21012623951768
+76.88671875 64.30553029147484
+94.921875 63.023853024696905
+114.85546875 61.571602782198134
+136.6875 58.78441267918305
+160.41796875 60.1701272099038
+186.046875 58.911378284423975

+ 62 - 0
julia/mult/res/mult_native.dat

@@ -0,0 +1,62 @@
+0.046875 62.74933655006031
+0.1875 81.76128691363823
+0.421875 56.62036966486339
+0.75 61.59670394349617
+1.171875 78.30779265074588
+1.6875 75.18164244785089
+2.296875 86.16277240336612
+3.0 88.7175896320434
+3.796875 89.94675752869234
+4.6875 91.81258734297542
+5.671875 91.07763358860382
+6.75 89.73871714190392
+7.921875 93.94105347752168
+9.1875 96.0986938110649
+10.546875 86.81427208540494
+12.0 90.48478005441814
+13.546875 87.99680611063411
+15.1875 84.31631390960176
+16.921875 91.48325697666799
+18.75 89.95690606810193
+20.671875 81.49676622230874
+22.6875 81.45233686971325
+24.796875 85.66539502122194
+27.0 87.78572232818597
+29.296875 83.52321886514454
+31.6875 84.0009753910359
+34.171875 86.80205838654649
+36.75 86.99073757030344
+39.421875 81.17955694509793
+42.1875 80.44760372225159
+45.046875 80.56510057628367
+48.0 81.65642955552222
+51.046875 82.11316716249475
+54.1875 82.5995656333009
+57.421875 82.17046140404119
+60.75 82.15768363601998
+64.171875 81.85343346990497
+67.6875 83.50739871185147
+71.296875 81.53545433745259
+75.0 81.48249578388855
+78.796875 83.7802422875762
+82.6875 82.91995474794902
+86.671875 84.37396962418724
+90.75 83.27206282643111
+94.921875 84.42083617832927
+99.1875 84.41380826745248
+103.546875 83.76666519036874
+108.0 83.15578835025194
+112.546875 82.68544528819217
+117.1875 82.44413764522827
+121.921875 75.20336455312317
+126.75 78.74206622501798
+131.671875 78.30429756099845
+136.6875 77.07009004287404
+141.796875 76.79451754647009
+147.0 77.188762171773
+152.296875 83.71882981918472
+157.6875 81.25155945091102
+163.171875 82.77719540968279
+168.75 82.091876711701
+174.421875 80.13244240187723
+180.1875 83.15669561921781

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.12811008957249
+23.73046875 60.33779880359854
+34.171875 58.65251355824629
+46.51171875 59.39753610863093
+60.75 51.71969005913254
+76.88671875 60.41770576441039
+94.921875 55.244712365625155
+114.85546875 59.08425492245039
+136.6875 50.57086001690063
+160.41796875 54.04522394983553
+186.046875 52.97679192814275

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.13411070454958
+23.73046875 61.69285331765543
+34.171875 59.001001016663984
+46.51171875 60.24465674394535
+60.75 52.541458398044604
+76.88671875 62.0774861675393
+94.921875 56.20749571716199
+114.85546875 53.17550437757629
+136.6875 47.70744414255635
+160.41796875 49.931147163084646
+186.046875 47.38560406825662

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.39992483045233
+23.73046875 61.25439146348626
+34.171875 58.97664123460709
+46.51171875 58.949173678252
+60.75 52.78987632022571
+76.88671875 61.569889782842495
+94.921875 56.0234167726132
+114.85546875 59.25765213366246
+136.6875 50.44001190797859
+160.41796875 55.423350523540556
+186.046875 54.219079530491165

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.58835344153732
+23.73046875 61.299696065186076
+34.171875 59.10944402276785
+46.51171875 60.240571021372645
+60.75 52.66550013928241
+76.88671875 61.30385865566083
+94.921875 58.08324611362879
+114.85546875 62.11036688036624
+136.6875 51.309289298267664
+160.41796875 54.50754182628601
+186.046875 52.16594203007848

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.225066705273136
+23.73046875 62.03919972007252
+34.171875 57.31552099956256
+46.51171875 59.10290100154662
+60.75 52.5567601559296
+76.88671875 62.05760119221998
+94.921875 56.85760151243153
+114.85546875 59.990114344500874
+136.6875 51.0662836859927
+160.41796875 54.41003890332101
+186.046875 53.02269691247011

+ 0 - 57
julia/src/Compiler/C/add_for_loop_declarations.jl

@@ -1,57 +0,0 @@
-
-
-"""
-    Returns the list of instruction that will be added before for loop of shape
-        "for for_index_var in set ..."
-"""
-function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
-
-    const decl_pattern = @parse € :: Int64
-    const affect_pattern = @parse € :: Int64 = €
-    const interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-
-    id = set.id
-
-    start_var = starpu_parse(Symbol(:start_, id))
-    start_decl = replace_pattern(affect_pattern, start_var, set.start)
-
-    index_var = starpu_parse(for_index_var)
-    index_decl = replace_pattern(decl_pattern, index_var)
-
-    if isa(set.step, StarpuExprValue)
-
-        stop_var = starpu_parse(Symbol(:stop_, id))
-        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
-
-        return StarpuExpr[start_decl, stop_decl, index_decl]
-    end
-
-    step_var = starpu_parse(Symbol(:step_, id))
-    step_decl = replace_pattern(affect_pattern, step_var, set.step)
-
-    dim_var = starpu_parse(Symbol(:dim_, id))
-    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
-
-    iter_var = starpu_parse(Symbol(:iter_, id))
-    iter_decl = replace_pattern(decl_pattern, iter_var)
-
-
-    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
-end
-
-
-function add_for_loop_declarations(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprFor)
-            return x
-        end
-
-        interval_decl = interval_evaluation_declarations(x.set, x.iter)
-
-        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 15
julia/src/Compiler/C/create_cpu_kernel.jl

@@ -1,15 +0,0 @@
-
-
-
-
-
-function transform_to_cpu_kernel(expr :: StarpuExprFunction)
-
-    output = add_for_loop_declarations(expr)
-    output = substitute_args(output)
-    output = substitute_func_calls(output)
-    output = substitute_indexing(output)
-    output = flatten_blocks(output)
-
-    return output
-end

+ 0 - 27
julia/src/Compiler/C/flatten_blocks.jl

@@ -1,27 +0,0 @@
-
-
-
-function flatten_blocks(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprBlock)
-            return x
-        end
-
-        instrs = StarpuExpr[]
-
-        for sub_expr in x.exprs
-
-            if isa(sub_expr, StarpuExprBlock)
-                push!(instrs, sub_expr.exprs...)
-            else
-                push!(instrs, sub_expr)
-            end
-        end
-
-        return StarpuExprBlock(instrs)
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 76
julia/src/Compiler/C/substitute_args.jl

@@ -1,76 +0,0 @@
-
-
-function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if x == StarpuExprVar(arg_name)
-            return StarpuExprVar(ptr_name)
-        end
-
-        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
-            return x
-        end
-
-        if (length(x.args) != 1)
-            error("Invalid arrity for function $(x.func)")
-        end
-
-        if (x.args[1] != StarpuExprVar(ptr_name))
-            return x
-        end
-
-        new_func = func_substitution[x.func]
-        new_arg = starpu_parse(:($buffer_name[$arg_index]))
-
-        return StarpuExprCall(new_func, [new_arg])
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function substitute_args(expr :: StarpuExprFunction)
-
-    new_body = expr.body
-    func_id = rand_string()
-    buffer_arg_name = Symbol("buffers_", func_id)
-    cl_arg_name = Symbol("cl_arg_", func_id)
-
-    function_start_affectations = StarpuExpr[]
-
-    for i in (1 : length(expr.args))
-
-        var_id = rand_string()
-        ptr = Symbol(:ptr_, var_id)
-
-        if (expr.args[i].typ <: Vector)
-            func_interface = :STARPU_VECTOR_GET_PTR
-
-        elseif (expr.args[i].typ <: Matrix)
-            func_interface = :STARPU_MATRIX_GET_PTR
-            ld_name = Symbol("ld_", var_id)
-            new_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
-            push!(function_start_affectations, new_affect)
-
-        else
-            error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
-        end
-
-        type_in_arg = eltype(expr.args[i].typ)
-        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
-        push!(function_start_affectations, new_affect)
-
-        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, ptr)
-    end
-
-
-    new_args = [
-                    starpu_parse(:($buffer_arg_name :: Matrix{Void})),
-                    starpu_parse(:($cl_arg_name :: Vector{Void}))
-                ]
-    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
-
-    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
-end

+ 0 - 25
julia/src/Compiler/C/substitute_func_calls.jl

@@ -1,25 +0,0 @@
-
-
-
-func_substitution = Dict(
-    :width => :STARPU_MATRIX_GET_NY,
-    :height => :STARPU_MATRIX_GET_NX,
-
-    :length => :STARPU_VECTOR_GET_NX
-)
-
-
-
-function substitute_func_calls(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
-            return x
-        end
-
-        return StarpuExprCall(func_substitution[x.func], x.args)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 52
julia/src/Compiler/C/substitute_indexing.jl

@@ -1,52 +0,0 @@
-
-
-function substitute_indexing(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprRef)
-            return x
-        end
-
-        if !isa(x.ref, StarpuExprVar)
-            error("Only variable indexing is allowed") #TODO allow more ?
-        end
-
-
-        nb_indexes = length(x.indexes)
-
-        if (nb_indexes >= 3)
-            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
-        end
-
-        if (nb_indexes == 0)
-            return x
-
-        elseif nb_indexes == 1
-            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
-                                                                            #TODO : detect when it is a matrix used with one index only
-            return StarpuExprRef(x.ref, [new_index])
-
-        elseif nb_indexes == 2
-
-            var_name = String(x.ref.name)
-
-            if !ismatch(r"ptr_", var_name) || isempty(var_name[5:end])
-                error("Invalid variable ($var_name) for multiple index dereferencing")
-            end
-
-            var_id = var_name[5:end]
-            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
-
-            new_index = x.indexes[2]
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
-            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-
-            return StarpuExprRef(x.ref, [new_index])
-        end
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 179
julia/src/Compiler/Cuda/create_cuda_kernel.jl

@@ -1,179 +0,0 @@
-
-
-function is_indep_for_expr(x :: StarpuExpr)
-    return isa(x, StarpuExprFor) && x.is_independant
-end
-
-
-function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
-                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
-    init = StarpuExpr[]
-    finish = StarpuExpr[]
-
-    if is_indep_for_expr(expr)
-        return init, StarpuIndepFor(expr), finish
-    end
-
-    if !isa(expr, StarpuExprBlock)
-        return [expr], nothing, finish
-    end
-
-    for i in (1 : length(expr.exprs))
-
-        if !is_indep_for_expr(expr.exprs[i])
-            continue
-        end
-
-        init = expr.exprs[1 : i-1]
-        indep = StarpuIndepFor(expr.exprs[i])
-        finish = expr.exprs[i+1 : end]
-
-        if any(is_indep_for_expr, finish)
-            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
-        end
-
-        return init, indep, finish
-    end
-
-    return expr.exprs, nothing, finish
-end
-
-
-
-
-function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
-
-    undefined_variables = Symbol[]
-    defined_variable_names = map((x -> x.name), already_defined)
-    defined_variable_types = map((x -> x.typ), already_defined)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprFunction)
-            error("No function declaration allowed in this section")
-        end
-
-        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
-
-            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
-                push!(undefined_variables, x.name)
-            end
-
-            return x
-        end
-
-        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
-
-            if isa(x, StarpuExprAffect)
-
-                var = x.var
-
-                if !isa(var, StarpuExprTypedVar)
-                    return x
-                end
-
-                name = var.name
-                typ = var.typ
-
-            else
-                name = x.iter
-                typ = Int64
-            end
-
-            if name in defined_variable_names
-                error("Multiple definition of variable $name")
-            end
-
-            filter!((sym -> sym != name), undefined_variables)
-            push!(defined_variable_names, name)
-            push!(defined_variable_types, typ)
-
-            return x
-        end
-
-        return x
-    end
-
-    apply(func_to_apply, expr)
-    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
-
-    return defined_variable, undefined_variables
-end
-
-
-
-function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
-
-    for x in vars
-        if x.name == name
-            return x
-        end
-    end
-
-    return nothing
-end
-
-
-
-function add_device_to_interval_call(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
-            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
-        end
-
-        return x
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function transform_to_cuda_kernel(func :: StarpuExprFunction)
-
-    cpu_func = transform_to_cpu_kernel(func)
-
-    init, indep, finish = extract_init_indep_finish(cpu_func.body)
-
-    if indep == nothing
-        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
-    end
-
-    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
-
-    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
-    prekernel_instr = vcat(init, prekernel_instr)
-    kernel_instr = vcat(kernel_instr, indep.body)
-
-    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
-    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
-
-    for undef_var in indep_for_undef
-
-        found_var = find_variable(undef_var, prekernel_def)
-
-        if found_var == nothing # TODO : error then ?
-            continue
-        end
-
-        push!(kernel_args, found_var)
-    end
-
-    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
-    cuda_call = StarpuExprCudaCall(func.func, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
-    push!(prekernel_instr, cuda_call)
-    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
-    prekernel_instr = vcat(prekernel_instr, finish)
-
-    prekernel_name = Symbol("CUDA_", func.func)
-    prekernel = StarpuExprFunction(Void, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
-    prekernel = flatten_blocks(prekernel)
-
-    kernel = StarpuExprFunction(Void, func.func, kernel_args, StarpuExprBlock(kernel_instr))
-    kernel = add_device_to_interval_call(kernel)
-    kernel = flatten_blocks(kernel)
-    
-    return prekernel, kernel
-end

+ 0 - 49
julia/src/Compiler/Cuda/indep_for.jl

@@ -1,49 +0,0 @@
-
-
-struct StarpuIndepFor
-
-    iters :: Vector{Symbol}
-    sets :: Vector{StarpuExprInterval}
-
-    body :: StarpuExpr
-end
-
-
-function assert_no_indep_for(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if (isa(x, StarpuExprFor) && x.is_independant)
-            error("Invalid usage of intricated @indep for loops")
-        end
-
-        return x
-    end
-
-    return apply(func_to_run, expr)
-end
-
-
-function StarpuIndepFor(expr :: StarpuExprFor)
-
-    if !expr.is_independant
-        error("For expression must be prefixed by @indep")
-    end
-
-    iters = []
-    sets = []
-    for_loop = expr
-
-    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
-
-        push!(iters, for_loop.iter)
-        push!(sets, for_loop.set)
-        for_loop = for_loop.body
-
-        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
-            for_loop = for_loop.exprs[1]
-        end
-    end
-
-    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
-end

+ 0 - 121
julia/src/Compiler/Cuda/indep_for_kernel_ids.jl

@@ -1,121 +0,0 @@
-
-
-function translate_index_code(dims :: Vector{StarpuExprVar})
-
-    ndims = length(dims)
-
-    if ndims == 0
-        error("No dimension specified")
-    end
-
-    prod = StarpuExprValue(1)
-    output = StarpuExpr[]
-    reversed_dim = reverse(dims)
-    thread_index_patern = @parse € :: Int64 = (€ / €) % €
-    thread_id = @parse THREAD_ID
-
-    for i in (1 : ndims)
-        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
-        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
-        push!(output, expr)
-
-        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
-    end
-
-    thread_id_pattern = @parse begin
-
-        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
-
-        if (€ >= €)
-            return
-        end
-    end
-
-    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
-    push!(output, bound_verif)
-
-    return reverse(output)
-end
-
-
-
-
-
-
-
-function kernel_index_declarations(ind_for :: StarpuIndepFor)
-
-    pre_kernel_instr = StarpuExpr[]
-    kernel_args = StarpuExprTypedVar[]
-    kernel_instr = StarpuExpr[]
-
-    decl_pattern = @parse € :: Int64 = €
-    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-    iter_pattern = @parse € :: Int64 = € + € * €
-
-    dims = StarpuExprVar[]
-    ker_instr_to_add_later_on = StarpuExpr[]
-
-    for k in (1 : length(ind_for.sets))
-
-        set = ind_for.sets[k]
-
-        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
-        start_decl = replace_pattern(decl_pattern, start_var, set.start)
-
-        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
-        step_decl = replace_pattern(decl_pattern, step_var, set.step)
-
-        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
-        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
-
-        push!(dims, dim_var)
-
-        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
-        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
-
-        iter_var = starpu_parse(ind_for.iters[k])
-        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
-        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
-
-        push!(ker_instr_to_add_later_on, iter_decl)
-    end
-
-
-    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
-end
-
-
-
-function analyse_sets(ind_for :: StarpuIndepFor)
-
-
-    decl_pattern = @parse € :: Int64 = €
-    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
-
-    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
-
-    dim_prod = @parse 1
-
-    for d in dims
-        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
-    end
-
-    nthreads_var = @parse nthreads
-    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
-    push!(pre_kernel_instr, nthreads_decl)
-
-    nblocks_var = @parse nblocks
-    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
-    push!(pre_kernel_instr, nblocks_decl)
-
-
-    index_decomposition = translate_index_code(dims)
-
-    push!(kernel_instr, index_decomposition...)
-    push!(kernel_instr, ker_instr_to_add...)
-
-    return pre_kernel_instr, kernel_args, kernel_instr
-end

+ 0 - 60
julia/src/Compiler/Expressions/affect.jl

@@ -1,60 +0,0 @@
-
-#======================================================
-                AFFECTATION
-======================================================#
-
-
-struct StarpuExprAffect <: StarpuExpr
-    var :: StarpuExpr
-    expr :: StarpuExpr
-end
-
-function starpu_parse_affect(x :: Expr)
-
-    if (x.head != :(=))
-        error("Invalid \"affectation\" expression")
-    end
-
-    var = starpu_parse(x.args[1])
-    expr = starpu_parse(x.args[2])
-
-    return StarpuExprAffect(var, expr)
-end
-
-
-function equals(x :: StarpuExprAffect, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprAffect
-        return false
-    end
-
-    return equals(x.var, y.var) && equals(x.expr, y.expr)
-end
-
-
-function print(io :: IO, x :: StarpuExprAffect ; indent = 0)
-
-    print(io, x.var, indent = indent)
-    print(io, " = ")
-
-    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
-
-    if need_to_transtyp
-        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
-    end
-
-    print(io, x.expr, indent = indent)
-
-    if need_to_transtyp
-        print(io, ")")
-    end
-
-end
-
-function apply(func :: Function, expr :: StarpuExprAffect)
-
-    var = apply(func, expr.var)
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprAffect(var, new_expr))
-end

+ 0 - 68
julia/src/Compiler/Expressions/block.jl

@@ -1,68 +0,0 @@
-
-#======================================================
-                BLOCK
-(series of instruction, not C variable scoping block)
-======================================================#
-
-
-struct StarpuExprBlock <: StarpuExpr
-    exprs :: Vector{StarpuExpr}
-end
-
-
-function is_unwanted(x :: Symbol)
-    return false
-end
-
-function is_unwanted(x :: Expr)
-
-    if (x.head == :line)
-        return true
-    end
-
-    return false
-end
-
-
-function starpu_parse_block(x :: Expr)
-
-    if (x.head != :block)
-        error("Invalid \"block\" expression")
-    end
-
-    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
-
-    #=for y in x.args
-
-        if (is_unwanted(y))
-            continue
-        end
-
-        push!(exprs, starpu_parse(y))
-    end
-    =#
-    #if (length(exprs) == 1)
-    #    return exprs[1]  #TODO : let 1 instruction blocks be a thing ?
-    #end
-
-    return StarpuExprBlock(exprs)
-end
-
-
-function print(io :: IO, x :: StarpuExprBlock ; indent = 0)
-    for i in (1 : length(x.exprs))
-        print(io, x.exprs[i], indent = indent)
-        print(io, ";")
-        if (i != length(x.exprs))
-            print_newline(io, indent)
-        end
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprBlock)
-
-    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
-end

+ 0 - 75
julia/src/Compiler/Expressions/call.jl

@@ -1,75 +0,0 @@
-
-#======================================================
-                FUNCTION CALL
-======================================================#
-
-
-struct StarpuExprCall <: StarpuExpr
-    func :: Symbol
-    args :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_call(x :: Expr)
-
-    if (x.head != :call)
-        error("Invalid \"call\" expression")
-    end
-
-    func = starpu_parse(x.args[1])
-
-    if (!isa(func, StarpuExprVar))
-        error("Invalid \"call\" expression : function must be a variable")
-    end
-
-    args = map(starpu_parse, x.args[2:end])
-
-    return StarpuExprCall(func.name, args)
-end
-
-
-starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
-
-
-function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    print(io, x.func, "(")
-
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, ", ")
-        end
-        print(io, x.args[i], indent = indent)
-    end
-
-    print(io, ")")
-end
-
-
-function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0)
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, " ", x.func, " ")
-        end
-        print(io, "(")
-        print(io, x.args[i], indent = indent)
-        print(io, ")")
-    end
-end
-
-function print(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
-        print_infix(io, x, indent = indent)
-    else
-        print_prefix(io, x, indent = indent)
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprCall)
-
-    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
-end

+ 0 - 60
julia/src/Compiler/Expressions/cuda_call.jl

@@ -1,60 +0,0 @@
-
-
-#======================================================
-                CUDA KERNEL CALL
-======================================================#
-
-
-
-struct StarpuExprCudaCall <: StarpuExpr
-
-    ker_name :: Symbol
-
-    nblocks :: StarpuExpr
-    threads_per_block :: StarpuExpr
-
-    args :: Vector{StarpuExpr}
-
-end
-
-
-function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, expr.ker_name)
-    print_newline(io, indent + starpu_indent_size)
-    print(io, "<<< ")
-    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
-    print(io, ", ")
-    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
-    print(io, ", 0, starpu_cuda_get_local_stream()")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, ">>> (")
-
-    for i in (1 : length(expr.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + 2 * starpu_indent_size + 1)
-            end
-        end
-
-        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
-
-    end
-
-    print(io, ");")
-    print_newline(io, indent)
-
-end
-
-
-function apply(func :: Function, expr :: StarpuExprCudaCall)
-
-    nblocks = func(expr.nblocks)
-    threads_per_block = func(expr.threads_per_block)
-    args = map((x -> apply(func, x)), expr.args)
-
-    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
-end

+ 0 - 44
julia/src/Compiler/Expressions/field.jl

@@ -1,44 +0,0 @@
-
-
-#======================================================
-                STRUCTURE FIELDS
-======================================================#
-
-
-
-struct StarpuExprField <: StarpuExpr
-
-    left :: StarpuExpr
-    field :: Symbol
-
-    is_an_arrow :: Bool
-end
-
-
-function starpu_parse_field(x :: Expr)
-
-    if x.head != :(.) || length(x.args) != 2
-        error("Invalid parsing of dot expression")
-    end
-
-    left = starpu_parse(x.args[1])
-
-    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
-        error("Invalid parsing of dot expression")
-    end
-
-    return StarpuExprField(left, x.args[2].value, false)
-end
-
-
-function print(io :: IO, x :: StarpuExprField ; indent = 0)
-    print(io, "(")
-    print(io, x.left, indent = indent)
-    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprField)
-    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
-end

+ 0 - 100
julia/src/Compiler/Expressions/for.jl

@@ -1,100 +0,0 @@
-
-#======================================================
-                FOR LOOPS
-======================================================#
-
-
-struct StarpuExprFor <: StarpuExpr
-
-    iter :: Symbol
-    set:: StarpuExprInterval
-    body :: StarpuExpr
-
-    is_independant :: Bool
-    set_declarations :: Vector{StarpuExpr}
-
-end
-
-
-
-function starpu_parse_for(x :: Expr; is_independant = false)
-
-    if (x.head != :for)
-        error("Invalid \"for\" expression")
-    end
-
-    affect = x.args[1]
-
-    if (affect.head != :(=))
-        error("Invalid \"for\" iterator affectation")
-    end
-
-    iter = starpu_parse(affect.args[1])
-
-    if (!isa(iter, StarpuExprVar))
-        error("Invalid \"for\" iterator")
-    end
-
-    set = starpu_parse(affect.args[2])
-
-    if (!isa(set, StarpuExprInterval))
-        error("Set of values in \"for\" loop must be an interval")
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
-end
-
-
-
-
-
-function print(io :: IO, x :: StarpuExprFor ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
-
-    id = x.set.id
-
-    start = "start_" * id
-    stop = "stop_" * id
-    step = "step_" * id
-    dim = "dim_" * id
-    iter = "iter_" * id
-
-    print_newline(io, indent, 2)
-
-    if isa(x.set.step, StarpuExprValue)
-        print(io, "for ($(x.iter) = $start ; ")
-        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
-        print(io, "$(x.iter) $comparison_op $stop ; ")
-        print(io, "$(x.iter) += $(x.set.step.value))")
-
-    else
-        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
-        print(io, "$iter < $dim ; ")
-        print(io, "$iter += 1, $(x.iter) += $step)")
-
-    end
-
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFor)
-
-    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
-    set = apply(func, expr.set)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
-end

+ 0 - 85
julia/src/Compiler/Expressions/function.jl

@@ -1,85 +0,0 @@
-
-
-#======================================================
-                FUNCTION DECLARATION
-======================================================#
-
-
-struct StarpuExprFunction <: StarpuExpr
-    ret_type :: Type
-    func :: Symbol
-    args :: Vector{StarpuExprTypedVar}
-    body :: StarpuExpr
-end
-
-
-function starpu_parse_function(x :: Expr)
-
-    if (x.head != :function)
-        error("Invalid \"function\" expression")
-    end
-
-    typed_decl = starpu_parse(x.args[1])
-
-    if (!isa(typed_decl, StarpuExprTypedExpr))
-        error("Invalid \"function\" prototype : a return type must me explicited")
-    end
-
-    prototype = typed_decl.expr
-
-    if (!isa(prototype, StarpuExprCall))
-        error("Invalid \"function\" prototype")
-    end
-
-    arg_list = StarpuExprTypedVar[]
-
-    for type_arg in prototype.args
-        if (!isa(type_arg, StarpuExprTypedVar))
-            error("Invalid \"function\" argument list")
-        end
-        push!(arg_list, type_arg)
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
-end
-
-
-
-function print(io :: IO, x :: StarpuExprFunction ; indent = 0)
-
-    print(io, starpu_type_traduction(x.ret_type), " ")
-    print(io, x.func, '(')
-
-    for i in (1 : length(x.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
-            end
-        end
-
-        print(io, x.args[i], indent = indent + starpu_indent_size)
-    end
-
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}\n\n")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFunction)
-
-    args = map((x -> apply(func, x)), expr.args)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
-end

+ 0 - 94
julia/src/Compiler/Expressions/if.jl

@@ -1,94 +0,0 @@
-
-
-#======================================================
-                IF STATEMENT
-======================================================#
-
-
-
-struct StarpuExprIf <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-end
-
-
-struct StarpuExprIfElse <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-    else_statement :: StarpuExpr
-end
-
-
-function starpu_parse_if(x :: Expr)
-
-    if (x.head != :if)
-        error("Invalid \"if\" expression")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"if\" statement")
-    end
-
-    cond = starpu_parse(x.args[1])
-    then_statement = starpu_parse(x.args[2])
-
-    if (len == 2)
-        return StarpuExprIf(cond, then_statement)
-    end
-
-    else_statement = starpu_parse(x.args[3])
-
-    return StarpuExprIfElse(cond, then_statement, else_statement)
-end
-
-
-function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0)
-
-    print_newline(io, indent)
-    print(io, "if (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.then_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-
-    if (!isa(x, StarpuExprIfElse))
-        return
-    end
-
-    print(io, " else")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.else_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIf)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-
-    return func(StarpuExprIf(cond, then_statement))
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIfElse)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-    else_statement = apply(func, expr.else_statement)
-
-    return func(StarpuExprIfElse(cond, then_statement, else_statement))
-end

+ 0 - 48
julia/src/Compiler/Expressions/interval.jl

@@ -1,48 +0,0 @@
-
-#======================================================
-                INTERVALS
-======================================================#
-
-
-struct StarpuExprInterval <: StarpuExpr
-    start :: StarpuExpr
-    step :: StarpuExpr
-    stop :: StarpuExpr
-
-    id :: String
-
-    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
-        return new(start, step, stop, id)
-    end
-
-end
-
-
-function starpu_parse_interval(x :: Expr)
-
-    if (x.head != :(:))
-        error("Invalid \"interval\" expression")
-    end
-
-    start = starpu_parse(x.args[1])
-    steop = starpu_parse(x.args[2])
-
-    if (length(x.args) == 2)
-        return StarpuExprInterval(start, StarpuExprValue(1), steop)
-    end
-
-    stop = starpu_parse(x.args[3])
-
-    return StarpuExprInterval(start, steop, stop)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprInterval)
-
-    start = apply(func, expr.start)
-    step = apply(func, expr.step)
-    stop = apply(func, expr.stop)
-
-    return func(StarpuExprInterval(start, step, stop, id = expr.id))
-end

+ 0 - 70
julia/src/Compiler/Expressions/ref.jl

@@ -1,70 +0,0 @@
-
-#======================================================
-                ARRAYS AND REFERENCES
-======================================================#
-
-
-struct StarpuExprRef <: StarpuExpr
-    ref :: StarpuExpr
-    indexes :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_ref(x :: Expr)
-
-    if (x.head != :ref)
-        error("Invalid \"reference\" expression")
-    end
-
-    ref = starpu_parse(x.args[1])
-    indexes = map(starpu_parse, x.args[2:end])
-
-    #=
-    StarpuExpr[]
-
-    for i in (2 : length(x.args))
-        push!(indexes, starpu_parse(x.args[i]))
-    end=#
-
-    return StarpuExprRef(ref, indexes)
-end
-
-
-
-function equals(x :: StarpuExprRef, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprRef
-        return false
-    end
-
-    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
-        return false
-    end
-
-    return all(map(equals, x.indexes, y.indexes))
-end
-
-
-
-
-function print(io :: IO, x :: StarpuExprRef ; indent = 0)
-
-    print(io, x.ref, indent = indent)
-
-    for i in (1 : length(x.indexes))
-        print(io, "[")
-        print(io, x.indexes[i], indent = indent)
-        print(io, "]")
-    end
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprRef)
-
-    ref = apply(func, expr.ref)
-    indexes = map((x -> apply(func, x)), expr.indexes)
-
-    return func(StarpuExprRef(ref, indexes))
-end

+ 0 - 33
julia/src/Compiler/Expressions/return.jl

@@ -1,33 +0,0 @@
-
-#======================================================
-                RETURN EXPRESSION
-======================================================#
-
-
-struct StarpuExprReturn <: StarpuExpr
-    value :: StarpuExpr
-end
-
-function starpu_parse_return(x :: Expr)
-
-    if (x.head != :return)
-        error("Invalid \"return\" expression")
-    end
-
-    value = starpu_parse(x.args[1])
-
-    return StarpuExprReturn(value)
-end
-
-
-function print(io :: IO, x :: StarpuExprReturn ; indent = 0)
-    print(io, "return ")
-    print(io, x.value, indent = indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprReturn)
-
-    return func(StarpuExprReturn(apply(func, expr.value)))
-end

+ 0 - 63
julia/src/Compiler/Expressions/simple_expressions.jl

@@ -1,63 +0,0 @@
-
-
-abstract type StarpuExpr end
-
-
-function apply(func :: Function, expr :: StarpuExpr)
-    return func(expr)
-end
-
-
-
-
-struct StarpuExprVar <: StarpuExpr
-    name :: Symbol
-end
-
-print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
-
-
-
-struct StarpuExprValue <: StarpuExpr
-    value :: Any
-end
-
-
-function print(io :: IO, x :: StarpuExprValue ; indent = 0)
-
-    value = x.value
-
-    if value == nothing
-        return
-    end
-
-    if isa(value, AbstractString)
-        print(io, '"', value, '"')
-        return
-    end
-
-    if isa(value, Char)
-        print(io, '\'', value, '\'')
-        return
-    end
-
-    print(io, value)
-end
-
-
-
-
-struct StarpuExprInvalid <: StarpuExpr
-end
-
-print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
-
-
-
-function starpu_parse(raw_value :: Any)
-    return StarpuExprValue(raw_value)
-end
-
-function starpu_parse(sym :: Symbol)
-    return StarpuExprVar(sym)
-end

+ 0 - 109
julia/src/Compiler/Expressions/typed.jl

@@ -1,109 +0,0 @@
-
-#======================================================
-                TYPED EXPRESSION
-======================================================#
-
-
-
-
-
-
-
-abstract type StarpuExprTyped <: StarpuExpr end
-
-
-struct StarpuExprTypedVar <: StarpuExprTyped
-    name :: Symbol
-    typ :: Type
-end
-
-struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
-    expr :: StarpuExpr
-    typ :: Type
-end
-
-
-function starpu_parse_typed(x :: Expr)
-
-    if (x.head != :(::))
-        error("Invalid type assigned expression")
-    end
-
-    expr = starpu_parse(x.args[1])
-    typ = nothing
-
-    try
-        typ = eval(x.args[2]) :: Type
-    catch
-        error("Invalid type in type assigned expression")
-    end
-
-    if (isa(expr, StarpuExprVar))
-        return StarpuExprTypedVar(expr.name, typ)
-    end
-
-    return StarpuExprTypedExpr(expr, typ)
-end
-
-
-
-
-
-starpu_type_traduction_dict = Dict(
-    Void => "void",
-    Int32 => "int32_t",
-    UInt32 => "uint32_t",
-    Float32 => "float",
-    Int64 => "int64_t",
-    UInt64 => "uint64_t",
-    Float64 => "double"
-)
-
-
-
-function starpu_type_traduction(x)
-
-    if x <: Array
-        return starpu_type_traduction_array(x)
-    end
-
-    if x <: Ptr
-        return starpu_type_traduction(eltype(x)) * "*"
-    end
-
-    return starpu_type_traduction_dict[x]
-
-end
-
-
-function starpu_type_traduction_array(x :: Type{Array{T,N}}) where {T,N}
-
-    output = starpu_type_traduction(T)
-
-    for i in (1 : N)
-        output *= "*"
-    end
-
-    return output
-end
-
-
-
-function print(io :: IO, x :: StarpuExprTyped ; indent = 0)
-
-    if (isa(x, StarpuExprTypedVar))
-        print(io, starpu_type_traduction(x.typ), " ")
-        print(io, x.name)
-    else
-        print(io, x.expr, indent = indent)
-    end
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprTypedExpr)
-
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprTypedExpr(new_expr, expr.typ))
-end

+ 0 - 53
julia/src/Compiler/Expressions/while.jl

@@ -1,53 +0,0 @@
-
-#======================================================
-                While loop
-======================================================#
-
-
-struct StarpuExprWhile <: StarpuExpr
-    cond :: StarpuExpr
-    body :: StarpuExpr
-end
-
-function starpu_parse_while(x :: Expr)
-
-    if (x.head != :while)
-        error("Invalid \"while\" loop")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"while\" loop")
-    end
-
-    cond = starpu_parse(x.args[1])
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprWhile(cond, body)
-end
-
-
-function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
-    print_newline(io, indent)
-    print(io, "while (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprWhile)
-
-    cond = apply(func, expr.cond)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprWhile(cond, body))
-end

+ 0 - 69
julia/src/Compiler/Generate_files/c_files.jl

@@ -1,69 +0,0 @@
-
-
-
-global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-const cpu_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-"
-
-
-"""
-	Opens a new C source file, where generated CPU kernels will be written
-"""
-function starpu_new_cpu_kernel_file(file_name :: String)
-
-    global generated_cpu_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cpu_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU C compiler to the following function declaration.
-	If no call to starpu_new_cpu_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cpu_kernel_file was called.
-"""
-macro cpu_kernel(x)
-
-    starpu_expr = transform_to_cpu_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $starpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end

+ 0 - 134
julia/src/Compiler/Generate_files/cuda_files.jl

@@ -1,134 +0,0 @@
-
-
-
-global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-const cuda_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-#define THREADS_PER_BLOCK 64
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-
-__device__ static inline long long jlstarpu_max__device(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
-{
-	if (stop >= start){
-		return jlstarpu_max__device(0, (stop - start + 1) / step);
-	} else {
-		return jlstarpu_max__device(0, (stop - start - 1) / step);
-	}
-}
-
-
-"
-
-"""
-	Opens a new Cuda source file, where generated GPU kernels will be written
-"""
-function starpu_new_cuda_kernel_file(file_name :: String)
-
-    global generated_cuda_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cuda_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU Cuda compiler to the following function declaration.
-	If no call to starpu_new_cuda_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cuda_kernel_file was called.
-"""
-macro cuda_kernel(x)
-
-    prekernel, kernel = transform_to_cuda_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end
-
-
-
-"""
-	Executes @cuda_kernel and @cpu_kernel
-"""
-macro cpu_cuda_kernel(x)
-
-	parsed = starpu_parse(x)
-	cpu_expr = transform_to_cpu_kernel(parsed)
-	prekernel, kernel = transform_to_cuda_kernel(parsed)
-
-	return quote
-
-		to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $cpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-
-
-		to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-	end
-end

+ 0 - 54
julia/src/Compiler/Generate_files/so_files.jl

@@ -1,54 +0,0 @@
-
-
-"""
-	Compiles C source file opened by starpu_new_cpu_kernel_file
-    and filled by @cpu_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cpu_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = "-O3 -shared -fPIC"
-
-    system("gcc $generated_cpu_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-	Compiles Cuda source file opened by starpu_new_cuda_kernel_file
-    and filled by @cuda_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cuda_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = " -O3 --shared --compiler-options \'-fPIC\' "
-
-    system("nvcc $generated_cuda_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-    Combines several shared library into a new one.
-    Can be used to have both CPU and Cuda kernels (from compile_cpu_kernels
-    compile_cuda_kernels) accessible from the same library.
-"""
-function combine_kernel_files(output_file :: String, input_files :: Vector{String})
-
-    input_str = (*)(map((x -> x * " "), input_files)...)
-
-    system("gcc -shared -fPIC $input_str -o $output_file")
-
-end

+ 0 - 39
julia/src/Compiler/include.jl

@@ -1,39 +0,0 @@
-
-
-
-include("utils.jl")
-
-include("Expressions/simple_expressions.jl")
-include("Expressions/affect.jl")
-include("Expressions/block.jl")
-include("Expressions/call.jl")
-include("Expressions/cuda_call.jl")
-include("Expressions/field.jl")
-include("Expressions/interval.jl")
-include("Expressions/for.jl")
-include("Expressions/typed.jl")
-include("Expressions/function.jl")
-include("Expressions/if.jl")
-include("Expressions/ref.jl")
-include("Expressions/return.jl")
-include("Expressions/while.jl")
-
-include("parsing.jl")
-
-include("expression_manipulation.jl")
-
-include("C/substitute_args.jl")
-include("C/substitute_func_calls.jl")
-include("C/substitute_indexing.jl")
-include("C/add_for_loop_declarations.jl")
-include("C/flatten_blocks.jl")
-include("C/create_cpu_kernel.jl")
-
-include("Cuda/indep_for.jl")
-include("Cuda/indep_for_kernel_ids.jl")
-include("Cuda/create_cuda_kernel.jl")
-
-
-include("Generate_files/c_files.jl")
-include("Generate_files/cuda_files.jl")
-include("Generate_files/so_files.jl")

+ 0 - 146
julia/src/Wrapper/Julia/starpu_codelet.jl

@@ -1,146 +0,0 @@
-
-
-export StarpuDataAccessMode
-export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH
-export STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY
-export STARPU_ACCESS_MODE_MAX
-
-@enum(StarpuDataAccessMode,
-
-    STARPU_NONE = 0,
-    STARPU_R = (1 << 0),
-    STARPU_W = (1 << 1),
-    STARPU_RW = ((1 << 0) | (1 << 1)),
-    STARPU_SCRATCH = (1 << 2),
-    STARPU_REDUX = (1 << 3),
-    STARPU_COMMUTE = (1 << 4),
-    STARPU_SSEND = (1 << 5),
-    STARPU_LOCALITY = (1 << 6),
-    STARPU_ACCESS_MODE_MAX = (1 << 7)
-
-)
-
-
-export StarpuCodelet
-struct StarpuCodelet
-
-    where_to_execute :: UInt32
-
-    cpu_func :: String
-    gpu_func :: String
-
-    modes :: Vector{StarpuDataAccessMode}
-
-    perfmodel :: StarpuPerfmodel
-
-    c_codelet :: Ptr{Void}
-
-
-    function StarpuCodelet(;
-        cpu_func :: String = "",
-        gpu_func :: String = "",
-        modes :: Vector{StarpuDataAccessMode} = StarpuDataAccessMode[],
-        perfmodel :: StarpuPerfmodel = StarpuPerfmodel(),
-        where_to_execute :: Union{Void, UInt32} = nothing
-    )
-
-        if (length(modes) > STARPU_NMAXBUFS)
-            error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)")
-        end
-
-        real_c_codelet_ptr = @starpucall jlstarpu_new_codelet Ptr{Void} ()
-        push!(jlstarpu_allocated_structures, real_c_codelet_ptr)
-
-        if (where_to_execute == nothing)
-            real_where = ((cpu_func != "") * STARPU_CPU) | ((gpu_func != "") * STARPU_CUDA)
-        else
-            real_where = where_to_execute
-        end
-
-        output = new(real_where, cpu_func, gpu_func, modes, perfmodel, real_c_codelet_ptr)
-
-        starpu_c_codelet_update(output)
-
-        return output
-    end
-end
-
-
-
-function starpu_c_codelet_update(cl :: StarpuCodelet)
-
-    translating_cl = StarpuCodeletTranslator(cl)
-
-    @starpucall(jlstarpu_codelet_update,
-                Void, (Ptr{Void}, Ptr{Void}),
-                Ref{StarpuCodeletTranslator}(translating_cl),
-                cl.c_codelet
-            )
-end
-
-
-
-function load_starpu_function_pointer(func_name :: String)
-
-    if (isempty(func_name))
-        return C_NULL
-    end
-
-    func_pointer = Libdl.dlsym(starpu_tasks_library_handle, func_name)
-
-    if (func_pointer == C_NULL)
-        error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library")
-    end
-
-    return func_pointer
-end
-
-
-
-mutable struct StarpuCodeletTranslator
-
-    where_to_execute :: UInt32
-
-    cpu_func :: Ptr{Void}
-    cpu_func_name :: Cstring
-
-    gpu_func :: Ptr{Void}
-
-    nbuffers :: Cint
-    modes :: Ptr{Void}
-
-    perfmodel :: Ptr{Void}
-
-
-
-    function StarpuCodeletTranslator(cl :: StarpuCodelet)
-
-        output = new()
-
-        if (iszero(cl.where_to_execute))
-            error("StarpuCodelet field \"where_to_execute\" is empty")
-        end
-
-        output.where_to_execute = cl.where_to_execute
-
-        cpu_func_ptr = load_starpu_function_pointer(cl.cpu_func)
-        gpu_func_ptr = load_starpu_function_pointer(cl.gpu_func)
-
-        if (cpu_func_ptr == C_NULL && gpu_func_ptr == C_NULL)
-            error("No function specified inside codelet")
-        end
-
-        output.cpu_func = cpu_func_ptr
-        output.cpu_func_name = Cstring_from_String(cl.cpu_func)
-
-        output.gpu_func = gpu_func_ptr
-
-        output.nbuffers = Cint(length(cl.modes))
-        output.modes = pointer(cl.modes)
-
-        output.perfmodel = cl.perfmodel.c_perfmodel
-
-        return output
-    end
-
-end

+ 0 - 234
julia/src/Wrapper/Julia/starpu_data_handle.jl

@@ -1,234 +0,0 @@
-
-
-STARPU_MAIN_RAM = 0 #TODO: ENUM
-
-
-const StarpuDataHandlePointer = Ptr{Void}
-
-
-
-StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer}
-
-
-
-function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle
-    return StarpuDestructible(ptr, destr...)
-end
-
-
-
-function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unregister, Void, (Ptr{Void},), ptr)
-end
-
-
-export starpu_data_unregister
-function starpu_data_unregister(handles :: StarpuDataHandle...)
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unregister_pointer)
-    end
-end
-
-
-
-export starpu_data_register
-
-function starpu_data_register(v :: Vector{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(v)
-
-    @starpucall(starpu_vector_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                length(v), sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(m :: Matrix{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(m)
-    (height, width) = size(m)
-
-    @starpucall(starpu_matrix_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height, width, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(block :: Array{T,3}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(block)
-    (height, width, depth) = size(block)
-
-    @starpucall(starpu_block_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, UInt32,
-                    UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height * width,
-                height, width, depth,
-                sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(ref :: Ref{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-
-    @starpucall(starpu_variable_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, Csize_t),
-                output, STARPU_MAIN_RAM, ref, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(x1, x2, next_args...)
-
-    handle_1 = starpu_data_register(x1)
-    handle_2 = starpu_data_register(x2)
-
-    next_handles = map(starpu_data_register, next_args)
-
-    return [handle_1, handle_2, next_handles...]
-end
-
-
-
-
-export starpu_data_get_sub_data
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, id)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint),
-                        root_data.object, 1, id - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, idx, idy)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint, Cuint),
-                        root_data.object, 2, idx - 1, idy - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-import Base.getindex
-
-
-
-function Base.getindex(handle :: StarpuDataHandle, indexes...)
-     starpu_data_get_sub_data(handle, indexes...)
- end
-
-
-
-
-export StarpuDataFilterFunc
-export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK
-
-@enum(StarpuDataFilterFunc,
-
-    STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    STARPU_MATRIX_FILTER_BLOCK = 1
-)
-
-export StarpuDataFilter
-"""
-    TODO : use real function pointers loaded from starpu shared library
-"""
-mutable struct StarpuDataFilter
-
-    filter_func :: StarpuDataFilterFunc
-    nchildren :: Cuint
-
-    function StarpuDataFilter(filter_func, nchildren)
-        output = new()
-        output.filter_func = filter_func
-        output.nchildren = Cuint(nchildren)
-        return output
-    end
-
-end
-
-
-function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unpartition, Void, (Ptr{Void}, Cuint), ptr, STARPU_MAIN_RAM)
-end
-
-export starpu_data_partition
-function starpu_data_partition(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_partition,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-        )
-end
-
-
-export starpu_data_unpartition
-function starpu_data_unpartition(handles :: StarpuDataHandle...)
-
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unpartition_pointer)
-    end
-
-    return nothing
-end
-
-
-
-export starpu_data_map_filters
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_1_arg,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-    )
-end
-
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: StarpuDataFilter, filter_2 :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_2_arg,
-            Void, (Ptr{Void}, Ptr{Void}, Ptr{Void}),
-            handle.object,
-            Ref{StarpuDataFilter}(filter_1),
-            Ref{StarpuDataFilter}(filter_2)
-    )
-
-end

+ 0 - 49
julia/src/Wrapper/Julia/starpu_define.jl

@@ -1,49 +0,0 @@
-
-
-
-
-STARPU_MAXIMPLEMENTATIONS = 1 # TODO : These must be the same values as defined in C macros !
-STARPU_NMAXBUFS = 8 # TODO : find a way to make it automatically match
-
-
-STARPU_CPU = 1 << 1
-STARPU_CUDA = 1 << 3
-
-macro starpufunc(symbol)
-    :($symbol, "libjlstarpu_c_wrapper")
-end
-
-"""
-    Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so"
-    Works as ccall function
-"""
-macro starpucall(func, ret_type, arg_types, args...)
-    return Expr(:call, :ccall, (func, "libjlstarpu_c_wrapper"), esc(ret_type), esc(arg_types), map(esc, args)...)
-end
-
-
-export @debugprint
-macro debugprint(x...)
-
-    expr = Expr(:call, :println, "\x1b[32m", map(esc, x)..., "\x1b[0m")
-
-    quote
-        $expr
-        flush(STDOUT)
-    end
-end
-
-
-
-function Cstring_from_String(str :: String)
-    return Cstring(pointer(str))
-end
-
-
-
-function jlstarpu_set_to_zero(x :: T) :: Ptr{Void} where {T}
-    @starpucall(memset,
-          Ptr{Void}, (Ptr{Void}, Cint, Csize_t),
-          Ref{T}(x), 0, sizeof(x)
-        )
-end

+ 0 - 125
julia/src/Wrapper/Julia/starpu_destructible.jl

@@ -1,125 +0,0 @@
-
-
-
-"""
-    Object used to store a lost of function which must
-    be applied to and object
-"""
-mutable struct StarpuDestructible{T}
-
-    object :: T
-    destructors :: LinkedList{Function}
-
-end
-
-starpu_block_list = Vector{LinkedList{StarpuDestructible}}()
-
-
-
-function StarpuDestructible(obj :: T, destructors :: Function...) where T
-
-    if (isempty(starpu_block_list))
-        error("Creation of a StarpuDestructible object while not beeing in a @starpu_block")
-    end
-
-    l = LinkedList{Function}()
-
-    for destr in destructors
-        add_to_tail!(l, destr)
-    end
-
-    output = StarpuDestructible{T}(obj, l)
-    add_to_head!(starpu_block_list[end], output)
-
-    return output
-end
-
-
-
-
-function starpu_enter_new_block()
-
-    push!(starpu_block_list, LinkedList{StarpuDestructible}())
-end
-
-"""
-    Applies every stored destructores to the StarpuDestructible stored object
-"""
-function starpu_destruct!(x :: StarpuDestructible)
-
-    for destr in x.destructors
-        destr(x.object)
-    end
-
-    empty!(x.destructors)
-
-    return nothing
-end
-
-
-function starpu_exit_block()
-
-    destr_list = pop!(starpu_block_list)
-
-    for x in destr_list
-        starpu_destruct!(x)
-    end
-
-end
-
-"""
-    Adds new destructors to the list of function. They will be executed before
-        already stored ones when calling starpu_destruct!
-"""
-function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...)
-
-    for d in destrs
-        add_to_head!(x.destructors, d)
-    end
-
-    return nothing
-end
-
-"""
-    Removes detsructor without executing it
-"""
-function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    @foreach_asc x.destructors lnk begin
-
-        if (lnk.data == destr)
-            remove_link!(lnk)
-            break
-        end
-    end
-
-    return nothing
-end
-
-
-"""
-    Executes "destr" function. If it was one of the stored destructors, it
-    is removed.
-    This function can be used to allow user to execute a specific action manually
-        (ex : explicit call to starpu_data_unpartition() without unregistering)
-"""
-function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    starpu_remove_destructor!(x, destr)
-    return destr(x.object)
-end
-
-
-export @starpu_block
-
-"""
-    Declares a block of code. Every declared StarpuDestructible in this code
-    will execute its destructors on its object, once the block is exited
-"""
-macro starpu_block(expr)
-    quote
-        starpu_enter_new_block()
-        $(esc(expr))
-        starpu_exit_block()
-    end
-end

+ 0 - 20
julia/src/Wrapper/Julia/starpu_include.jl

@@ -1,20 +0,0 @@
-
-__precompile__()
-module StarPU
-
-
-    include("starpu_define.jl")
-    include("static_structures.jl")
-    include("starpu_simple_functions.jl")
-    include("starpu_perfmodel.jl")
-    include("starpu_codelet.jl")
-
-    include("linked_list.jl")
-    include("starpu_destructible.jl")
-    include("starpu_data_handle.jl")
-
-    include("starpu_task.jl")
-    include("starpu_task_submit.jl")
-    include("starpu_init_shutdown.jl")
-
-end

+ 0 - 0
julia/src/Wrapper/Julia/starpu_init_shutdown.jl


Some files were not shown because too many files changed in this diff