Bläddra i källkod

Merge branch 'master' into fpga

Samuel Thibault 5 år sedan
förälder
incheckning
99abc1661e
100 ändrade filer med 4437 tillägg och 2849 borttagningar
  1. 2 0
      .gitignore
  2. 1 0
      ChangeLog
  3. 161 251
      configure.ac
  4. 1 1
      doc/doxygen/chapters/310_data_management.doxy
  5. 4 3
      doc/doxygen/chapters/320_scheduling.doxy
  6. 2 1
      doc/doxygen/chapters/350_scheduling_policy_definition.doxy
  7. 5 0
      doc/doxygen/chapters/370_online_performance_tools.doxy
  8. 2 2
      examples/mult/sgemm.sh
  9. 9 0
      include/fstarpu_mod.f90
  10. 8 1
      include/starpu_perfmodel.h
  11. 14 0
      include/starpu_scheduler.h
  12. 4 0
      julia/StarPU.jl/Manifest.toml
  13. 7 0
      julia/StarPU.jl/Project.toml
  14. 2 0
      julia/StarPU.jl/REQUIRE
  15. 1230 0
      julia/StarPU.jl/src/StarPU.jl
  16. 261 0
      julia/StarPU.jl/src/compiler/c.jl
  17. 349 0
      julia/StarPU.jl/src/compiler/cuda.jl
  18. 0 0
      julia/StarPU.jl/src/compiler/expression_manipulation.jl
  19. 928 0
      julia/StarPU.jl/src/compiler/expressions.jl
  20. 132 0
      julia/StarPU.jl/src/compiler/file_generation.jl
  21. 13 0
      julia/StarPU.jl/src/compiler/include.jl
  22. 5 8
      julia/src/Compiler/parsing.jl
  23. 0 12
      julia/src/Compiler/utils.jl
  24. 1 7
      julia/src/Wrapper/C/jlstarpu.h
  25. 21 66
      julia/src/Wrapper/C/jlstarpu_data_handles.c
  26. 1 7
      julia/src/Wrapper/C/jlstarpu_simple_functions.c
  27. 2 7
      julia/src/Wrapper/C/jlstarpu_task.h
  28. 9 9
      julia/src/Wrapper/C/jlstarpu_task_submit.c
  29. 1 2
      julia/src/Wrapper/C/jlstarpu_utils.h
  30. 23 26
      julia/src/Wrapper/Julia/linked_list.jl
  31. 0 0
      julia/black_scholes/black_scholes.c
  32. 83 13
      julia/tst/black_scholes/cpu_cuda_black_scholes.jl
  33. 263 0
      julia/mandelbrot/mandelbrot.c
  34. 30 0
      julia/mandelbrot/mandelbrot.jl
  35. 20 0
      julia/mult/README
  36. 55 0
      julia/mult/makefile
  37. 237 0
      julia/mult/mult.c
  38. 146 0
      julia/mult/mult.jl
  39. 4 0
      julia/mult/mult.plot
  40. 44 0
      julia/mult/mult_native.jl
  41. 11 0
      julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat
  42. 29 0
      julia/mult/res/mult_gen_gcc9_1x4.dat
  43. 29 0
      julia/mult/res/mult_gen_gcc9_4x1.dat
  44. 7 0
      julia/mult/res/mult_gen_gcc9_s100_4x1.dat
  45. 17 0
      julia/mult/res/mult_gen_gcc9_s50_4x1.dat
  46. 4 0
      julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat
  47. 13 0
      julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat
  48. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat
  49. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat
  50. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat
  51. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat
  52. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2.dat
  53. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat
  54. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat
  55. 5 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat
  56. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x1.dat
  57. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat
  58. 11 0
      julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat
  59. 9 0
      julia/mult/res/mult_gen_gcc9_s80_4x1.dat
  60. 3 0
      julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat
  61. 11 0
      julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat
  62. 62 0
      julia/mult/res/mult_native.dat
  63. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat
  64. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat
  65. 11 0
      julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat
  66. 11 0
      julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat
  67. 11 0
      julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat
  68. 0 57
      julia/src/Compiler/C/add_for_loop_declarations.jl
  69. 0 15
      julia/src/Compiler/C/create_cpu_kernel.jl
  70. 0 27
      julia/src/Compiler/C/flatten_blocks.jl
  71. 0 76
      julia/src/Compiler/C/substitute_args.jl
  72. 0 25
      julia/src/Compiler/C/substitute_func_calls.jl
  73. 0 52
      julia/src/Compiler/C/substitute_indexing.jl
  74. 0 179
      julia/src/Compiler/Cuda/create_cuda_kernel.jl
  75. 0 49
      julia/src/Compiler/Cuda/indep_for.jl
  76. 0 121
      julia/src/Compiler/Cuda/indep_for_kernel_ids.jl
  77. 0 60
      julia/src/Compiler/Expressions/affect.jl
  78. 0 68
      julia/src/Compiler/Expressions/block.jl
  79. 0 75
      julia/src/Compiler/Expressions/call.jl
  80. 0 60
      julia/src/Compiler/Expressions/cuda_call.jl
  81. 0 44
      julia/src/Compiler/Expressions/field.jl
  82. 0 100
      julia/src/Compiler/Expressions/for.jl
  83. 0 85
      julia/src/Compiler/Expressions/function.jl
  84. 0 94
      julia/src/Compiler/Expressions/if.jl
  85. 0 48
      julia/src/Compiler/Expressions/interval.jl
  86. 0 70
      julia/src/Compiler/Expressions/ref.jl
  87. 0 33
      julia/src/Compiler/Expressions/return.jl
  88. 0 63
      julia/src/Compiler/Expressions/simple_expressions.jl
  89. 0 109
      julia/src/Compiler/Expressions/typed.jl
  90. 0 53
      julia/src/Compiler/Expressions/while.jl
  91. 0 69
      julia/src/Compiler/Generate_files/c_files.jl
  92. 0 134
      julia/src/Compiler/Generate_files/cuda_files.jl
  93. 0 54
      julia/src/Compiler/Generate_files/so_files.jl
  94. 0 39
      julia/src/Compiler/include.jl
  95. 0 146
      julia/src/Wrapper/Julia/starpu_codelet.jl
  96. 0 234
      julia/src/Wrapper/Julia/starpu_data_handle.jl
  97. 0 49
      julia/src/Wrapper/Julia/starpu_define.jl
  98. 0 125
      julia/src/Wrapper/Julia/starpu_destructible.jl
  99. 0 20
      julia/src/Wrapper/Julia/starpu_include.jl
  100. 0 0
      julia/src/Wrapper/Julia/starpu_init_shutdown.jl

+ 2 - 0
.gitignore

@@ -7,6 +7,8 @@
 /build
 /build
 /build2
 /build2
 /build-aux
 /build-aux
+/build_starpu
+/install
 /GPATH
 /GPATH
 /GRTAGS
 /GRTAGS
 /GTAGS
 /GTAGS

+ 1 - 0
ChangeLog

@@ -29,6 +29,7 @@ New features:
   * New number_events.data trace file which monitors number of events in trace
   * New number_events.data trace file which monitors number of events in trace
     files. This file can be parsed by the new script
     files. This file can be parsed by the new script
     starpu_fxt_number_events_to_names.py to convert event keys to event names.
     starpu_fxt_number_events_to_names.py to convert event keys to event names.
+  * New STARPU_PER_WORKER perfmodel.
 
 
 Small changes:
 Small changes:
   * Use the S4U interface of Simgrid instead of xbt and MSG.
   * Use the S4U interface of Simgrid instead of xbt and MSG.

+ 161 - 251
configure.ac

@@ -92,6 +92,7 @@ if test x$enable_perf_debug = xyes; then
     enable_shared=no
     enable_shared=no
 fi
 fi
 default_enable_mpi_check=maybe
 default_enable_mpi_check=maybe
+default_enable_mpi=yes
 
 
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
@@ -306,6 +307,9 @@ if test x$enable_simgrid = xyes ; then
         # want that by default
         # want that by default
 	default_enable_mpi_check=no
 	default_enable_mpi_check=no
 
 
+	# disable MPI support by default
+	default_enable_mpi=no
+
 	# Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++...
 	# Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++...
 	AC_LANG_PUSH([C++])
 	AC_LANG_PUSH([C++])
 	if test x$enable_shared = xno ; then
 	if test x$enable_shared = xno ; then
@@ -370,145 +374,138 @@ fi
 
 
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
-#                                    MPI                                      #
+#                                LIBTOOLS                                     #
 #                                                                             #
 #                                                                             #
 ###############################################################################
 ###############################################################################
 
 
-AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi],
+#c++11 detection
-                              [Disable StarPU MPI library generation])],
+AX_CXX_COMPILE_STDCXX(11,noext,optional)
-            [enable_mpi=$enableval],
-            [enable_mpi=yes])
 
 
-AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave],
+AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11)
-                              [Enable StarPU to run with the master-slave mode])],
+AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
-            use_mpi_master_slave=$enableval,
+if test $HAVE_CXX11 -eq 1; then
-            use_mpi_master_slave=no)
+  AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])
+fi
 
 
-#Check MPICC
+LT_PREREQ([2.2])
-AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc[=<path to mpicc>]],
+LT_INIT([win32-dll])
-           [Path of the mpicc compiler])],
-   [
-       if test x$withval = xyes; then
-           AC_MSG_ERROR(--with-mpicc must be given a pathname)
-       elif test x$withval = xno ; then
-           mpi_requested=no
-	   mpicc_path=""
-	   use_mpi=no
-       else
-	   mpi_requested=yes
-           mpicc_path=$withval
-       fi
-   ],
-   [
-       mpi_requested=maybe
-       if test x$enable_simgrid = xyes ; then
-           DEFAULT_MPICC=smpicc
-       else
-           DEFAULT_MPICC=mpicc
-       fi
-       # nothing was specified: default value is used
-       AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH])
-   ])
 
 
-# in case MPI was explicitely required, but is not available, this is an error
+AC_HEADER_STDC
-if test x$mpi_requested = xyes -a ! -x "$mpicc_path"; then
-   AC_MSG_ERROR([Compiler MPI not valid])
-fi
 
 
-if test x$mpi_requested != xno ; then
+AC_C_RESTRICT
-   # We test if the MPICC compiler exists
+
-     if test ! -x $mpicc_path; then
+# Check if bash is available
-         #MPICC does not exists or is not executable
+AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH])
-	 AC_MSG_RESULT(The mpicc compiler '$mpicc_path' does not have the execute permission)
+
-	 use_mpi=no
+# Record git version
-     else
+AC_PATH_PROG(gitcommand, git)
-	 use_mpi=yes
+if test "$gitcommand" = "" ; then
-	 if test x$enable_simgrid = xyes ; then
+   if test -f $srcdir/STARPU-REVISION ; then
-             AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<path to smpirun>]],
+      cp $srcdir/STARPU-REVISION .
-						  [Path of the smpirun helper])],
+   else
-			 [
+      echo "unknown" > ./STARPU-REVISION
-			     if test x$withval = xyes; then
+   fi
-				 AC_MSG_ERROR(--with-smpirun must be given a pathname)
+else
-			     else
+   git log -n 1 --pretty="%H" $srcdir > ./STARPU-REVISION
-				 smpirun_path=$withval
-			     fi
-			 ],
-			 [
-			     # nothing was specified: default value is used
-			     AC_PATH_PROG(smpirun_path, smpirun, [no], [$simgrid_dir/bin:$PATH])
-			 ])
-	 fi
-     fi
 fi
 fi
 
 
-AC_MSG_CHECKING(mpicc path)
+AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
+
+###############################################################################
+#                                                                             #
+#                           MPI compilers                                     #
+#                                                                             #
+###############################################################################
+
+#Check MPICC
+if test x$enable_simgrid = xyes ; then
+    DEFAULT_MPICC=smpicc
+else
+    DEFAULT_MPICC=mpicc
+fi
+AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc=<path to mpicc>], [Path of the mpicc compiler])], [DEFAULT_MPICC=$withval])
+AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH])
+AC_MSG_CHECKING(whether mpicc is available)
 AC_MSG_RESULT($mpicc_path)
 AC_MSG_RESULT($mpicc_path)
 AC_SUBST(MPICC, $mpicc_path)
 AC_SUBST(MPICC, $mpicc_path)
 
 
-
 #Check MPICXX/MPIC++
 #Check MPICXX/MPIC++
-AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx[=<path to mpicxx>]],
+if test x$enable_simgrid = xyes ; then
-           [Path of the mpicxx/mpic++ compiler])],
+    DEFAULT_MPICXX=smpicxx
-   [
+else
-       if test x$withval = xyes; then
+    DEFAULT_MPICXX=mpicxx
-           AC_MSG_ERROR(--with-mpicxx must be given a pathname)
+fi
-       else
+AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx=<path to mpicxx>], [Path of the mpicxx/mpic++ compiler])], [DEFAULT_MPICXX=$withval])
-           mpicxx_path=$withval
+AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
-       fi
-   ],
-   [
-       if test x$enable_simgrid = xyes ; then
-           DEFAULT_MPICXX=smpicxx
-       else
-           DEFAULT_MPICXX=mpicxx
-       fi
-       # nothing was specified: default value is used
-       AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
 
 
-       # try with mpic++ if mpicxx was not found
+# try with mpic++ if mpicxx was not found
-       if test x$mpicxx_path = xno ; then
+if test x$mpicxx_path = xno ; then
-            DEFAULT_MPICXX=mpic++
+    DEFAULT_MPICXX=mpic++
-            AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
+    AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$simgrid_dir/bin:$PATH])
-       fi
+fi
-   ])
 
 
 # We test if the MPICXX/MPIC++ compiler exists
 # We test if the MPICXX/MPIC++ compiler exists
 if test ! -x $mpicxx_path; then
 if test ! -x $mpicxx_path; then
-    #MPICXX/MPIC++ does not exists or is not executable
     AC_MSG_RESULT(The mpicxx compiler '$mpicxx_path' does not have the execute permission)
     AC_MSG_RESULT(The mpicxx compiler '$mpicxx_path' does not have the execute permission)
-    use_mpicxx=no
+    mpicxx_path=no
-else
-    use_mpicxx=yes
 fi
 fi
 
 
-AC_MSG_CHECKING(mpicxx/mpic++ path)
+AC_MSG_CHECKING(whether mpicxx is available)
 AC_MSG_RESULT($mpicxx_path)
 AC_MSG_RESULT($mpicxx_path)
 AC_SUBST(MPICXX, $mpicxx_path)
 AC_SUBST(MPICXX, $mpicxx_path)
 
 
+###############################################################################
+#                                                                             #
+#                                    MPI                                      #
+#                                                                             #
+###############################################################################
 
 
-if test x$use_mpi = xyes -a \( x$enable_mpi = xyes -o x$use_mpi_master_slave = xyes \) ; then
+AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi],
-    cc_or_mpicc=$mpicc_path
+                              [Disable StarPU MPI library generation])],
-        # For some reason, libtool uses gcc instead of mpicc when linking
+            [enable_mpi=$enableval],
-        # libstarpumpi.
+            [enable_mpi=$default_enable_mpi])
-        # On Darwin (and maybe other systems ?) the linker will fail (undefined
+
-        # references to MPI_*). We manually add the required flags to fix this
+# in case MPI was explicitely required, but mpicc is not available, this is an error
-        # issue.
+if test x$enable_mpi = xyes -a ! -x "$mpicc_path"; then
-        AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
+   AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid])
-else
-    cc_or_mpicc=$CC
 fi
 fi
 
 
-AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+build_mpi_lib=$enable_mpi
 
 
-AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend],
+###############################################################################
-				   [Prevent StarPU MPI from reading buffers while being sent over MPI])],
+#                                                                             #
-				   enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no)
+#                                NEW MADELEINE                                #
-if test x$enable_mpi_pedantic_isend = xyes; then
+#                                                                             #
-	AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend])
+###############################################################################
+
+AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad],
+		                    [Enable StarPU MPI library generation using the new madeleine backend])],
+            [enable_nmad=$enableval],
+            [enable_nmad=no])
+
+build_nmad_lib=no
+AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+#We can only build StarPU MPI Library if User wants it and MPI is available
+if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then
+    build_nmad_lib=yes
+    build_mpi_lib=no
+    PKG_CHECK_MODULES([NMAD],[nmad])
+else
+    build_nmad_lib=no
 fi
 fi
 
 
-#We can only build MPI Master Slave if User wants it and MPI is available
+###############################################################################
-if test x$use_mpi_master_slave = xyes -a x$use_mpi = xyes -a x$use_mpicxx = xyes; then
+#                                                                             #
+#                             MPI Master Slave                                #
+#                                                                             #
+###############################################################################
+
+AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave],
+                              [Enable StarPU to run with the master-slave mode])],
+              use_mpi_master_slave=$enableval,
+              use_mpi_master_slave=no)
+#We can only build MPI Master Slave if User wants it and MPI compiler are available
+if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then
     build_mpi_master_slave=yes
     build_mpi_master_slave=yes
 else
 else
     build_mpi_master_slave=no
     build_mpi_master_slave=no
@@ -517,7 +514,9 @@ fi
 #users cannot use both at the same time
 #users cannot use both at the same time
 if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then
 if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then
     AC_MSG_WARN(StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI...)
     AC_MSG_WARN(StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI...)
-	enable_mpi=no
+    build_mpi_lib=no
+    build_nmad_lib=no
+    enable_mpi=no
 fi
 fi
 
 
 if test x$build_mpi_master_slave = xyes; then
 if test x$build_mpi_master_slave = xyes; then
@@ -549,95 +548,19 @@ AC_ARG_ENABLE(maxmpidev, [AS_HELP_STRING([--enable-maxmpidev=<number>],
 AC_MSG_RESULT($nmaxmpidev)
 AC_MSG_RESULT($nmaxmpidev)
 AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices])
 AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices])
 
 
-
-###############################################################################
-#                                                                             #
-#                                NEW MADELEINE                                #
-#                                                                             #
-###############################################################################
-
-AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad],
-                              [Enable StarPU MPI library generation using new madeleine instead of mpi])],
-            [enable_nmad=$enableval],
-            [enable_nmad=no])
-
-if test x$use_mpi = xyes -a \( x$enable_nmad \) ; then
-    cc_or_mpicc=$mpicc_path
-        # For some reason, libtool uses gcc instead of mpicc when linking
-        # libstarpumpi.
-        # On Darwin (and maybe other systems ?) the linker will fail (undefined
-        # references to MPI_*). We manually add the required flags to fix this
-        # issue.
-        AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
-else
-    cc_or_mpicc=$CC
-fi
-
-build_nmad_lib=no
-AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
-#We can only build StarPU MPI Library if User wants it and MPI is available
-if test x$use_mpi = xyes -a x$enable_nmad = xyes ; then
-    build_nmad_lib=yes
-    enable_mpi=no
-    PKG_CHECK_MODULES([NMAD],[nmad])
-else
-    build_nmad_lib=no
-fi
-
-# in case NMAD was explicitely required, but the compiler MPI, this is an error
-if test x$enable_nmad = xyes -a ! -x "$mpicc_path"; then
-   AC_MSG_ERROR([Compiler MPI not valid])
-fi
-
-
-AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated)
-AC_MSG_RESULT($build_nmad_lib)
-
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
-#                                LIBTOOLS                                     #
+#                       Miscellaneous things for MPI                          #
 #                                                                             #
 #                                                                             #
 ###############################################################################
 ###############################################################################
 
 
-#c++11 detection
+AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend],
-AX_CXX_COMPILE_STDCXX(11,noext,optional)
+				   [Prevent StarPU MPI from reading buffers while being sent over MPI])],
-
+				   enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no)
-AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11)
+if test x$enable_mpi_pedantic_isend = xyes; then
-AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
+	AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend])
-if test $HAVE_CXX11 -eq 1; then
-  AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])
-fi
-
-LT_PREREQ([2.2])
-LT_INIT([win32-dll])
-
-AC_HEADER_STDC
-
-AC_C_RESTRICT
-
-# Check if bash is available
-AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH])
-
-# Record git version
-AC_PATH_PROG(gitcommand, git)
-if test "$gitcommand" = "" ; then
-   if test -f $srcdir/STARPU-REVISION ; then
-      cp $srcdir/STARPU-REVISION .
-   else
-      echo "unknown" > ./STARPU-REVISION
-   fi
-else
-   git log -n 1 --pretty="%H" $srcdir > ./STARPU-REVISION
 fi
 fi
 
 
-AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
-
-###############################################################################
-#                                                                             #
-#                       Miscellaneous things for MPI                          #
-#                                                                             #
-###############################################################################
-
 # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check
 # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check
 AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]),
 AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]),
 	      [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check])
 	      [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check])
@@ -651,68 +574,45 @@ fi
 if test x$enable_mpi_check = xno ; then
 if test x$enable_mpi_check = xno ; then
     running_mpi_check=no
     running_mpi_check=no
 fi
 fi
+if test x$enable_mpi = xno ; then
+    running_mpi_check=no
+fi
 
 
-
+if test x$enable_mpi = xyes -a x$running_mpi_check = xyes ; then
-if test x$enable_simgrid = xno ; then
     # Check if mpiexec is available
     # Check if mpiexec is available
-    AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec[=<path to mpiexec>]],
+    if test x$enable_simgrid = xyes ; then
-                [Path of mpiexec])],
+	DEFAULT_MPIEXEC=smpirun
-        [
+        AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<path to smpirun>]], [Path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval])
-            if test x$withval = xyes; then
+	AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$simgrid_dir/bin:$PATH])
-                AC_MSG_ERROR(--with-mpiexec must be given a pathname)
+    else
-            else
+	DEFAULT_MPIEXEC=mpiexec
-                mpiexec_path=$withval
+	AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=<path to mpiexec>], [Path of mpiexec])], [DEFAULT_MPIEXEC=$withval])
-            fi
+	if test x$mpicc_path = x ; then
-        ],
+	    AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$PATH])
-        [
+	else
-            # nothing was specified: look in the path
+	    AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$(dirname $mpicc_path):$PATH])
-	    if test x$mpicc_path = x ; then
+	fi
-		AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$PATH])
+    fi
-	    else
-		AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$(dirname $mpicc_path):$PATH])
-	    fi
-        ])
-
     AC_MSG_CHECKING(whether mpiexec is available)
     AC_MSG_CHECKING(whether mpiexec is available)
     AC_MSG_RESULT($mpiexec_path)
     AC_MSG_RESULT($mpiexec_path)
 
 
     # We test if MPIEXEC exists
     # We test if MPIEXEC exists
     if test ! -x $mpiexec_path; then
     if test ! -x $mpiexec_path; then
-        # if it's not valid, it could be the parameter given to configure.ac was not a full path, let's look for it
+        AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid)
-	if test x$mpicc_path = x ; then
+        running_mpi_check=no
-            AC_PATH_PROG(mpiexec_path_bis, $mpiexec_path, [no], [$PATH])
+        mpiexec_path=""
-	else
-            AC_PATH_PROG(mpiexec_path_bis, $mpiexec_path, [no], [$(dirname $mpicc_path):$PATH])
-	fi
-        AC_MSG_CHECKING(whether mpiexec is available (2nd try))
-        AC_MSG_RESULT($mpiexec_path_bis)
-	if test -x $mpiexec_path_bis; then
-	   mpiexec_path=$mpiexec_path_bis
-	else
-           #MPIEXEC does not exists or is not executable
-           AC_MSG_RESULT(The mpiexec script is not valid)
-           running_mpi_check=no
-           mpiexec_path=""
-	fi
     fi
     fi
     AC_SUBST(MPIEXEC,$mpiexec_path)
     AC_SUBST(MPIEXEC,$mpiexec_path)
 fi
 fi
 
 
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
-if test x$use_mpi = xyes ; then
+AC_MSG_CHECKING(whether MPI tests should be run)
-    AC_MSG_CHECKING(whether MPI tests should be run)
+AC_MSG_RESULT($running_mpi_check)
-    AC_MSG_RESULT($running_mpi_check)
-fi
-
-#We can only build StarPU MPI Library if User wants it and MPI is available
-if test x$use_mpi = xyes -a x$enable_mpi = xyes ; then
-    build_mpi_lib=yes
-else
-    build_mpi_lib=no
-fi
 
 
 AC_MSG_CHECKING(whether the StarPU MPI library should be generated)
 AC_MSG_CHECKING(whether the StarPU MPI library should be generated)
 AC_MSG_RESULT($build_mpi_lib)
 AC_MSG_RESULT($build_mpi_lib)
+AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated)
+AC_MSG_RESULT($build_nmad_lib)
 
 
 AM_CONDITIONAL(USE_MPI, test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes)
 AM_CONDITIONAL(USE_MPI, test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes)
 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
@@ -722,14 +622,12 @@ if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
 	else
 	else
 		AC_DEFINE(STARPU_USE_MPI_NMAD,[1],[whether the StarPU MPI library (with a NewMadeleine implementation) is available])
 		AC_DEFINE(STARPU_USE_MPI_NMAD,[1],[whether the StarPU MPI library (with a NewMadeleine implementation) is available])
 	fi
 	fi
-else
-	running_mpi_check=no
 fi
 fi
 
 
-if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
+if test x$enable_mpi = xyes ; then
     if test x$enable_simgrid = xyes ; then
     if test x$enable_simgrid = xyes ; then
         if test x$enable_shared = xyes ; then
         if test x$enable_shared = xyes ; then
-	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this, or disable MPI with --disable-mpi])
+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, theb use --disable-shared to fix this, else disable MPI with --disable-mpi])
         else
         else
 	    CFLAGS="$CFLAGS -fPIC"
 	    CFLAGS="$CFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
@@ -744,17 +642,16 @@ AM_CONDITIONAL(STARPU_USE_MPI_NMAD, test x$build_nmad_lib = xyes)
 AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes)
 AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes)
 
 
 AC_ARG_WITH(mpiexec-args, [AS_HELP_STRING([--with-mpiexec-args[=<arguments to give when running mpiexec>]],
 AC_ARG_WITH(mpiexec-args, [AS_HELP_STRING([--with-mpiexec-args[=<arguments to give when running mpiexec>]],
-			[Arguments for mpiexec])],
+					  [Arguments for mpiexec])],
-	[
+	    [
 		mpiexec_args=$withval
 		mpiexec_args=$withval
-	])
+	    ])
 AC_SUBST(MPIEXEC_ARGS,$mpiexec_args)
 AC_SUBST(MPIEXEC_ARGS,$mpiexec_args)
 
 
-
 AC_MSG_CHECKING(whether MPI debug messages should be displayed)
 AC_MSG_CHECKING(whether MPI debug messages should be displayed)
 AC_ARG_ENABLE(mpi-verbose, [AS_HELP_STRING([--enable-mpi-verbose],
 AC_ARG_ENABLE(mpi-verbose, [AS_HELP_STRING([--enable-mpi-verbose],
-			[display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])],
+					   [display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])],
-			enable_mpi_verbose=$enableval, enable_mpi_verbose=no)
+	      enable_mpi_verbose=$enableval, enable_mpi_verbose=no)
 AC_MSG_RESULT($enable_mpi_verbose)
 AC_MSG_RESULT($enable_mpi_verbose)
 if test x$enable_mpi_verbose = xyes; then
 if test x$enable_mpi_verbose = xyes; then
 	AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages])
 	AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages])
@@ -764,6 +661,19 @@ if test x$enable_mpi_verbose = xextra; then
 	AC_DEFINE(STARPU_MPI_EXTRA_VERBOSE, [1], [display MPI verbose debug messages])
 	AC_DEFINE(STARPU_MPI_EXTRA_VERBOSE, [1], [display MPI verbose debug messages])
 fi
 fi
 
 
+if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then
+    cc_or_mpicc=$mpicc_path
+    # For some reason, libtool uses gcc instead of mpicc when linking
+    # libstarpumpi.
+    # On Darwin (and maybe other systems ?) the linker will fail (undefined
+    # references to MPI_*). We manually add the required flags to fix this
+    # issue.
+    AC_SUBST(MPICC_LDFLAGS, `$mpicc_path --showme:link`)
+else
+    cc_or_mpicc=$CC
+fi
+AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
+
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
 #                           MIC device compilation                            #
 #                           MIC device compilation                            #
@@ -3676,7 +3586,7 @@ AC_OUTPUT([
 	Makefile
 	Makefile
 	src/Makefile
 	src/Makefile
 	tools/Makefile
 	tools/Makefile
-	tools/replay/Makefile
+	tools/replay-mpi/Makefile
 	tools/starpu_env
 	tools/starpu_env
 	tools/starpu_codelet_profile
 	tools/starpu_codelet_profile
 	tools/starpu_codelet_histo_profile
 	tools/starpu_codelet_histo_profile

+ 1 - 1
doc/doxygen/chapters/310_data_management.doxy

@@ -602,7 +602,7 @@ whole machine, it would not be efficient to accumulate them in only one place,
 incurring data transmission each and access concurrency.
 incurring data transmission each and access concurrency.
 
 
 StarPU provides a mode ::STARPU_REDUX, which permits to optimize
 StarPU provides a mode ::STARPU_REDUX, which permits to optimize
-this case: it will allocate a buffer on each memory node, and accumulate
+this case: it will allocate a buffer on each worker (lazily), and accumulate
 intermediate results there. When the data is eventually accessed in the normal
 intermediate results there. When the data is eventually accessed in the normal
 mode ::STARPU_R, StarPU will collect the intermediate results in just one
 mode ::STARPU_R, StarPU will collect the intermediate results in just one
 buffer.
 buffer.

+ 4 - 3
doc/doxygen/chapters/320_scheduling.doxy

@@ -190,9 +190,10 @@ single task gives the consumption of the task in Joules, which can be given to
 starpu_perfmodel_update_history().
 starpu_perfmodel_update_history().
 
 
 Another way to provide the energy performance is to define a
 Another way to provide the energy performance is to define a
-perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or
-starpu_perfmodel::arch_cost_function field to a function which shall return the
+::STARPU_PER_WORKER , and set the starpu_perfmodel::arch_cost_function or
-estimated consumption of the task in Joules. Such a function can for instance
+starpu_perfmodel::worker_cost_function field to a function which shall return
+the estimated consumption of the task in Joules. Such a function can for instance
 use starpu_task_expected_length() on the task (in µs), multiplied by the
 use starpu_task_expected_length() on the task (in µs), multiplied by the
 typical power consumption of the device, e.g. in W, and divided by 1000000. to
 typical power consumption of the device, e.g. in W, and divided by 1000000. to
 get Joules.
 get Joules.

+ 2 - 1
doc/doxygen/chapters/350_scheduling_policy_definition.doxy

@@ -45,7 +45,8 @@ provides a complete list of the functions available for writing advanced schedul
 This includes getting an estimation for a task computation completion with
 This includes getting an estimation for a task computation completion with
 starpu_task_expected_length(), for the required data transfers with
 starpu_task_expected_length(), for the required data transfers with
 starpu_task_expected_data_transfer_time_for(), for the required energy with
 starpu_task_expected_data_transfer_time_for(), for the required energy with
-starpu_task_expected_energy(), etc. Other
+starpu_task_expected_energy(), etc. Per-worker variants are also available with
+starpu_task_worker_expected_length(), etc. Other
 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
 starpu_transfer_predict(), ...
 starpu_transfer_predict(), ...
 One can also directly test the presence of a data handle with starpu_data_is_on_node().
 One can also directly test the presence of a data handle with starpu_data_is_on_node().

+ 5 - 0
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -401,6 +401,11 @@ filled with pointers to functions which return the expected duration
 of the task in micro-seconds, one per architecture, see for instance
 of the task in micro-seconds, one per architecture, see for instance
 <c>tests/datawizard/locality.c</c>
 <c>tests/datawizard/locality.c</c>
 </li>
 </li>
+
+<li>
+Provided explicitly by the application (model type ::STARPU_PER_WORKER)
+similarly with the starpu_perfmodel::worker_cost_function field.
+</li>
 </ul>
 </ul>
 
 
 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and
 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and

+ 2 - 2
examples/mult/sgemm.sh

@@ -67,8 +67,8 @@ then
 	$PREFIX/../../tools/starpu_codelet_histo_profile distrib.data || true
 	$PREFIX/../../tools/starpu_codelet_histo_profile distrib.data || true
 	[ -f distrib.data.starpu_sgemm_gemm.0.492beed5.33177600.pdf ] || true
 	[ -f distrib.data.starpu_sgemm_gemm.0.492beed5.33177600.pdf ] || true
 
 
-	if [ -x $PREFIX/../../tools/replay/starpu_replay ]; then
+	if [ -x $PREFIX/../../tools/starpu_replay ]; then
-		$STARPU_LAUNCH $PREFIX/../../tools/replay/starpu_replay tasks.rec
+		$STARPU_LAUNCH $PREFIX/../../tools/starpu_replay tasks.rec
 	fi
 	fi
 
 
 	[ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump tasks.rec -o perfs2.rec
 	[ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump tasks.rec -o perfs2.rec

+ 9 - 0
include/fstarpu_mod.f90

@@ -1626,6 +1626,15 @@ module fstarpu_mod
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: prio
                 end subroutine fstarpu_data_idle_prefetch_on_node_prio
                 end subroutine fstarpu_data_idle_prefetch_on_node_prio
 
 
+                !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
+                function fstarpu_data_is_on_node(dh, node) &
+                                bind(C,name="starpu_data_is_on_node")
+                        use iso_c_binding, only: c_ptr, c_int
+                        integer(c_int)                 :: fstarpu_data_is_on_node
+                        type(c_ptr), value, intent(in) :: dh
+                        integer(c_int), value, intent(in) :: node
+                end function fstarpu_data_is_on_node
+
                 ! void starpu_data_wont_use(starpu_data_handle_t handle);
                 ! void starpu_data_wont_use(starpu_data_handle_t handle);
                 subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use")
                 subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use")
                         use iso_c_binding, only: c_ptr
                         use iso_c_binding, only: c_ptr

+ 8 - 1
include/starpu_perfmodel.h

@@ -165,6 +165,7 @@ struct starpu_perfmodel_per_arch
 enum starpu_perfmodel_type
 enum starpu_perfmodel_type
 {
 {
         STARPU_PERFMODEL_INVALID=0,
         STARPU_PERFMODEL_INVALID=0,
+	STARPU_PER_WORKER,                /**< Application-provided per-worker cost model function */
 	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
 	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
 	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
 	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
 	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
 	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
@@ -226,11 +227,17 @@ struct starpu_perfmodel
 	*/
 	*/
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 	/**
 	/**
-	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
+	   Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
 	   number, and must return a task duration estimation in
 	   number, and must return a task duration estimation in
 	   micro-seconds on that arch.
 	   micro-seconds on that arch.
 	*/
 	*/
 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
+	/**
+	   Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
+	   number, and must return a task duration estimation in
+	   micro-seconds on that worker.
+	*/
+	double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
 
 
 	/**
 	/**
 	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
 	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and

+ 14 - 0
include/starpu_scheduler.h

@@ -110,6 +110,10 @@ struct starpu_sched_policy
 	   to be executed by the worker. This method therefore permits
 	   to be executed by the worker. This method therefore permits
 	   to keep the state of the scheduler coherent even when
 	   to keep the state of the scheduler coherent even when
 	   StarPU bypasses the scheduling strategy.
 	   StarPU bypasses the scheduling strategy.
+
+	   Note: to get an estimation of the task duration, \p perf_workerid
+	   needs to be used rather than \p workerid, for the case of parallel
+	   tasks.
 	*/
 	*/
 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
 
 
@@ -366,6 +370,11 @@ uint32_t starpu_task_data_footprint(struct starpu_task *task);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 
 
 /**
 /**
+   Same as starpu_task_expected_length() but for a precise worker.
+*/
+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
+
+/**
    Return an estimated speedup factor relative to CPU speed
    Return an estimated speedup factor relative to CPU speed
 */
 */
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
@@ -395,6 +404,11 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 
 
 /**
 /**
+   Same as starpu_task_expected_energy but for a precise worker
+*/
+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
+
+/**
    Return expected conversion time in ms (multiformat interface only)
    Return expected conversion time in ms (multiformat interface only)
 */
 */
 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);

+ 4 - 0
julia/StarPU.jl/Manifest.toml

@@ -0,0 +1,4 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 7 - 0
julia/StarPU.jl/Project.toml

@@ -0,0 +1,7 @@
+name = "StarPU"
+uuid = "3e36cc6e-3f67-11e9-3531-2137bfe575e8"
+authors = ["barthou "]
+version = "0.1.0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 2 - 0
julia/StarPU.jl/REQUIRE

@@ -0,0 +1,2 @@
+julia 1.0
+Libdl

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 1230 - 0
julia/StarPU.jl/src/StarPU.jl


+ 261 - 0
julia/StarPU.jl/src/compiler/c.jl

@@ -0,0 +1,261 @@
+
+
+"""
+    Returns the list of instruction that will be added before for loop of shape
+        "for for_index_var in set ..."
+"""
+function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
+
+    decl_pattern = @parse € :: Int64
+    affect_pattern = @parse € :: Int64 = €
+    interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+
+    id = set.id
+
+    start_var = starpu_parse(Symbol(:start_, id))
+    start_decl = replace_pattern(affect_pattern, start_var, set.start)
+
+    index_var = starpu_parse(for_index_var)
+    index_decl = replace_pattern(decl_pattern, index_var)
+
+    if isa(set.step, StarpuExprValue)
+
+        stop_var = starpu_parse(Symbol(:stop_, id))
+        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
+
+        return StarpuExpr[start_decl, stop_decl, index_decl]
+    end
+
+    step_var = starpu_parse(Symbol(:step_, id))
+    step_decl = replace_pattern(affect_pattern, step_var, set.step)
+
+    dim_var = starpu_parse(Symbol(:dim_, id))
+    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
+
+    iter_var = starpu_parse(Symbol(:iter_, id))
+    iter_decl = replace_pattern(decl_pattern, iter_var)
+
+
+    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
+end
+
+
+function add_for_loop_declarations(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprFor)
+            return x
+        end
+
+        interval_decl = interval_evaluation_declarations(x.set, x.iter)
+
+        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+
+
+function transform_to_cpu_kernel(expr :: StarpuExprFunction)
+
+    output = add_for_loop_declarations(expr)
+    output = substitute_args(output)
+    output = substitute_func_calls(output)
+    output = substitute_indexing(output)
+    output = flatten_blocks(output)
+
+    return output
+end
+
+
+
+function flatten_blocks(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprBlock)
+            return x
+        end
+
+        instrs = StarpuExpr[]
+
+        for sub_expr in x.exprs
+
+            if isa(sub_expr, StarpuExprBlock)
+                push!(instrs, sub_expr.exprs...)
+            else
+                push!(instrs, sub_expr)
+            end
+        end
+
+        return StarpuExprBlock(instrs)
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
+    function func_to_apply(x :: StarpuExpr)
+
+        if x == StarpuExprVar(arg_name)
+            return StarpuExprVar(ptr_name)
+        end
+
+        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
+            return x
+        end
+
+        if (length(x.args) != 1)
+            error("Invalid arity for function $(x.func)")
+        end
+
+        if (x.args[1] != StarpuExprVar(ptr_name))
+            return x
+        end
+
+        new_func = func_substitution[x.func]
+        new_arg = starpu_parse(:($buffer_name[$arg_index]))
+
+        return StarpuExprCall(new_func, [new_arg])
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function substitute_args(expr :: StarpuExprFunction)
+
+    new_body = expr.body
+    func_id = rand_string()
+    buffer_arg_name = Symbol("buffers_", func_id)
+    cl_arg_name = Symbol("cl_arg_", func_id)
+    post = false
+    function_start_affectations = StarpuExpr[]
+
+    for i in (1 : length(expr.args))
+
+        var_id = rand_string()
+        ptr = Symbol(:ptr_, var_id)
+        var_name = ptr
+        
+        if (expr.args[i].typ <: Vector)
+            func_interface = :STARPU_VECTOR_GET_PTR
+        elseif (expr.args[i].typ <: Matrix)
+            func_interface = :STARPU_MATRIX_GET_PTR
+            ld_name = Symbol("ld_", var_id)
+            post_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
+            post=true
+            
+        elseif (expr.args[i].typ <: Float32)
+            func_interface = :STARPU_VARIABLE_GET_PTR
+            var_name = Symbol("scal_", var_id)
+            post_affect = starpu_parse( :($var_name :: Float32 = ($ptr[0])) )
+            post = true
+            
+        end
+        #else
+            #error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
+        #end
+
+        type_in_arg = eltype(expr.args[i].typ)
+        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
+        push!(function_start_affectations, new_affect)
+        if (post)
+            push!(function_start_affectations, post_affect)
+        end
+        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, var_name)
+
+    end
+
+
+    new_args = [
+                    starpu_parse(:($buffer_arg_name :: Matrix{Nothing})),
+                    starpu_parse(:($cl_arg_name :: Vector{Nothing}))
+                ]
+    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
+
+    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
+end
+
+
+
+func_substitution = Dict(
+    :width => :STARPU_MATRIX_GET_NY,
+    :height => :STARPU_MATRIX_GET_NX,
+
+    :length => :STARPU_VECTOR_GET_NX
+)
+
+
+
+function substitute_func_calls(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
+            return x
+        end
+
+        return StarpuExprCall(func_substitution[x.func], x.args)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+function substitute_indexing(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprRef)
+            return x
+        end
+
+        #if !isa(x.ref, StarpuExprVar)
+        #    error("Only variable indexing is allowed") #TODO allow more ?
+        #end
+
+
+        nb_indexes = length(x.indexes)
+
+        if (nb_indexes >= 3)
+            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
+        end
+
+        if (nb_indexes == 0)
+            return x
+
+        elseif nb_indexes == 1
+            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
+                                                                            #TODO : detect when it is a matrix used with one index only
+            return StarpuExprRef(x.ref, [new_index])
+
+        elseif nb_indexes == 2
+
+            var_name = String(x.ref.name)
+
+            if !occursin(r"ptr_", var_name) || isempty(var_name[5:end])
+                error("Invalid variable ($var_name) for multiple index dereferencing")
+            end
+
+            var_id = var_name[5:end]
+            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
+
+            new_index = x.indexes[2]
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
+            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+
+            return StarpuExprRef(x.ref, [new_index])
+        end
+    end
+
+    return apply(func_to_run, expr)
+end

+ 349 - 0
julia/StarPU.jl/src/compiler/cuda.jl

@@ -0,0 +1,349 @@
+
+
+function is_indep_for_expr(x :: StarpuExpr)
+    return isa(x, StarpuExprFor) && x.is_independant
+end
+
+
+function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
+                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
+    init = StarpuExpr[]
+    finish = StarpuExpr[]
+
+    if is_indep_for_expr(expr)
+        return init, StarpuIndepFor(expr), finish
+    end
+
+    if !isa(expr, StarpuExprBlock)
+        return [expr], nothing, finish
+    end
+
+    for i in (1 : length(expr.exprs))
+
+        if !is_indep_for_expr(expr.exprs[i])
+            continue
+        end
+
+        init = expr.exprs[1 : i-1]
+        indep = StarpuIndepFor(expr.exprs[i])
+        finish = expr.exprs[i+1 : end]
+
+        if any(is_indep_for_expr, finish)
+            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
+        end
+
+        return init, indep, finish
+    end
+
+    return expr.exprs, nothing, finish
+end
+
+
+
+
+function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
+
+    undefined_variables = Symbol[]
+    defined_variable_names = map((x -> x.name), already_defined)
+    defined_variable_types = map((x -> x.typ), already_defined)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprFunction)
+            error("No function declaration allowed in this section")
+        end
+
+        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
+
+            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
+                push!(undefined_variables, x.name)
+            end
+
+            return x
+        end
+
+        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
+
+            if isa(x, StarpuExprAffect)
+
+                var = x.var
+
+                if !isa(var, StarpuExprTypedVar)
+                    return x
+                end
+
+                name = var.name
+                typ = var.typ
+
+            else
+                name = x.iter
+                typ = Int64
+            end
+
+            if name in defined_variable_names
+                error("Multiple definition of variable $name")
+            end
+
+            filter!((sym -> sym != name), undefined_variables)
+            push!(defined_variable_names, name)
+            push!(defined_variable_types, typ)
+
+            return x
+        end
+
+        return x
+    end
+
+    apply(func_to_apply, expr)
+    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
+
+    return defined_variable, undefined_variables
+end
+
+
+
+function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
+
+    for x in vars
+        if x.name == name
+            return x
+        end
+    end
+
+    return nothing
+end
+
+
+
+function add_device_to_interval_call(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
+            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function transform_to_cuda_kernel(func :: StarpuExprFunction)
+
+    cpu_func = transform_to_cpu_kernel(func)
+
+    init, indep, finish = extract_init_indep_finish(cpu_func.body)
+
+    if indep == nothing
+        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
+    end
+
+    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
+
+    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
+    prekernel_instr = vcat(init, prekernel_instr)
+    kernel_instr = vcat(kernel_instr, indep.body)
+
+    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
+    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
+
+    for undef_var in indep_for_undef
+
+        found_var = find_variable(undef_var, prekernel_def)
+
+        if found_var == nothing # TODO : error then ?
+            continue
+        end
+
+        push!(kernel_args, found_var)
+    end
+
+    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
+    kernelname=Symbol("KERNEL_",func.func);
+    cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
+    push!(prekernel_instr, cuda_call)
+    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
+    prekernel_instr = vcat(prekernel_instr, finish)
+
+    prekernel_name = Symbol("CUDA_", func.func)
+    prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
+    prekernel = flatten_blocks(prekernel)
+
+    kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr))
+    kernel = add_device_to_interval_call(kernel)
+    kernel = flatten_blocks(kernel)
+    
+    return prekernel, kernel
+end
+
+
+struct StarpuIndepFor
+
+    iters :: Vector{Symbol}
+    sets :: Vector{StarpuExprInterval}
+
+    body :: StarpuExpr
+end
+
+
+function assert_no_indep_for(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+        if (isa(x, StarpuExprFor) && x.is_independant)
+            error("Invalid usage of intricated @indep for loops")
+        end
+
+        return x
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function StarpuIndepFor(expr :: StarpuExprFor)
+
+    if !expr.is_independant
+        error("For expression must be prefixed by @indep")
+    end
+
+    iters = []
+    sets = []
+    for_loop = expr
+
+    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
+
+        push!(iters, for_loop.iter)
+        push!(sets, for_loop.set)
+        for_loop = for_loop.body
+
+        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
+            for_loop = for_loop.exprs[1]
+        end
+    end
+
+    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
+end
+
+
+function translate_index_code(dims :: Vector{StarpuExprVar})
+
+    ndims = length(dims)
+
+    if ndims == 0
+        error("No dimension specified")
+    end
+
+    prod = StarpuExprValue(1)
+    output = StarpuExpr[]
+    reversed_dim = reverse(dims)
+    thread_index_patern = @parse € :: Int64 = (€ / €) % €
+    thread_id = @parse THREAD_ID
+
+    for i in (1 : ndims)
+        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
+        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
+        push!(output, expr)
+
+        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
+    end
+
+    thread_id_pattern = @parse begin
+
+        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
+
+        if (€ >= €)
+            return
+        end
+    end
+
+    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
+    push!(output, bound_verif)
+
+    return reverse(output)
+end
+
+
+
+
+
+
+
+function kernel_index_declarations(ind_for :: StarpuIndepFor)
+
+    pre_kernel_instr = StarpuExpr[]
+    kernel_args = StarpuExprTypedVar[]
+    kernel_instr = StarpuExpr[]
+
+    decl_pattern = @parse € :: Int64 = €
+    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+    iter_pattern = @parse € :: Int64 = € + € * €
+
+    dims = StarpuExprVar[]
+    ker_instr_to_add_later_on = StarpuExpr[]
+
+    for k in (1 : length(ind_for.sets))
+
+        set = ind_for.sets[k]
+
+        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
+        start_decl = replace_pattern(decl_pattern, start_var, set.start)
+
+        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
+        step_decl = replace_pattern(decl_pattern, step_var, set.step)
+
+        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
+        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
+
+        push!(dims, dim_var)
+
+        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
+        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
+
+        iter_var = starpu_parse(ind_for.iters[k])
+        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
+        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
+
+        push!(ker_instr_to_add_later_on, iter_decl)
+    end
+
+
+    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
+end
+
+
+
+function analyse_sets(ind_for :: StarpuIndepFor)
+
+
+    decl_pattern = @parse € :: Int64 = €
+    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
+
+    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
+
+    dim_prod = @parse 1
+
+    for d in dims
+        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
+    end
+
+    nthreads_var = @parse nthreads
+    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
+    push!(pre_kernel_instr, nthreads_decl)
+
+    nblocks_var = @parse nblocks
+    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
+    push!(pre_kernel_instr, nblocks_decl)
+
+
+    index_decomposition = translate_index_code(dims)
+
+    push!(kernel_instr, index_decomposition...)
+    push!(kernel_instr, ker_instr_to_add...)
+
+    return pre_kernel_instr, kernel_args, kernel_instr
+end

julia/src/Compiler/expression_manipulation.jl → julia/StarPU.jl/src/compiler/expression_manipulation.jl


+ 928 - 0
julia/StarPU.jl/src/compiler/expressions.jl

@@ -0,0 +1,928 @@
+
+#======================================================
+                AFFECTATION
+======================================================#
+abstract type StarpuExpr end
+abstract type StarpuExprTyped <: StarpuExpr end
+
+
+struct StarpuExprTypedVar <: StarpuExprTyped
+    name :: Symbol
+    typ :: Type
+end
+
+struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
+    expr :: StarpuExpr
+    typ :: Type
+end
+
+struct StarpuExprAffect <: StarpuExpr
+    var :: StarpuExpr
+    expr :: StarpuExpr
+end
+
+struct StarpuExprBlock <: StarpuExpr
+    exprs :: Vector{StarpuExpr}
+end
+
+struct StarpuExprCall <: StarpuExpr
+    func :: Symbol
+    args :: Vector{StarpuExpr}
+end
+struct StarpuExprCudaCall <: StarpuExpr
+
+    ker_name :: Symbol
+
+    nblocks :: StarpuExpr
+    threads_per_block :: StarpuExpr
+
+    args :: Vector{StarpuExpr}
+
+end
+struct StarpuExprField <: StarpuExpr
+
+    left :: StarpuExpr
+    field :: Symbol
+
+    is_an_arrow :: Bool
+end
+struct StarpuExprInterval <: StarpuExpr
+    start :: StarpuExpr
+    step :: StarpuExpr
+    stop :: StarpuExpr
+
+    id :: String
+
+    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
+        return new(start, step, stop, id)
+    end
+
+end
+struct StarpuExprFor <: StarpuExpr
+
+    iter :: Symbol
+    set:: StarpuExprInterval
+    body :: StarpuExpr
+
+    is_independant :: Bool
+    set_declarations :: Vector{StarpuExpr}
+
+end
+struct StarpuExprFunction <: StarpuExpr
+    ret_type :: Type
+    func :: Symbol
+    args :: Vector{StarpuExprTypedVar}
+    body :: StarpuExpr
+end
+struct StarpuExprIf <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+end
+
+
+struct StarpuExprIfElse <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+    else_statement :: StarpuExpr
+end
+
+struct StarpuExprRef <: StarpuExpr
+    ref :: StarpuExpr
+    indexes :: Vector{StarpuExpr}
+end
+struct StarpuExprReturn <: StarpuExpr
+    value :: StarpuExpr
+end
+struct StarpuExprVar <: StarpuExpr
+    name :: Symbol
+end
+struct StarpuExprInvalid <: StarpuExpr
+end
+
+struct StarpuExprValue <: StarpuExpr
+    value :: Any
+end
+
+struct StarpuExprWhile <: StarpuExpr
+    cond :: StarpuExpr
+    body :: StarpuExpr
+end
+
+
+function starpu_parse_affect(x :: Expr)
+
+    if (x.head != :(=))
+        error("Invalid \"affectation\" expression")
+    end
+
+    var = starpu_parse(x.args[1])
+    expr = starpu_parse(x.args[2])
+
+    return StarpuExprAffect(var, expr)
+end
+
+
+function equals(x :: StarpuExprAffect, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprAffect
+        return false
+    end
+
+    return equals(x.var, y.var) && equals(x.expr, y.expr)
+end
+
+
+function print(io :: IO, x :: StarpuExprAffect ; indent = 0, restrict = false)
+
+    print(io, x.var, indent = indent)
+    print(io, " = ")
+
+    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
+
+    if need_to_transtyp
+        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
+    end
+
+    print(io, x.expr, indent = indent)
+
+    if need_to_transtyp
+        print(io, ")")
+    end
+
+end
+
+function apply(func :: Function, expr :: StarpuExprAffect)
+
+    var = apply(func, expr.var)
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprAffect(var, new_expr))
+end
+
+#======================================================
+                BLOCK
+(series of instruction, not C variable scoping block)
+======================================================#
+
+
+
+
+function is_unwanted(x :: Symbol)
+    return false
+end
+
+function is_unwanted(x :: LineNumberNode)
+    return true
+end
+
+function is_unwanted(x :: Expr)
+    return false
+end
+
+function starpu_parse_block(x :: Expr)
+    if (x.head != :block)
+        error("Invalid \"block\" expression")
+    end    
+    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
+
+    return StarpuExprBlock(exprs)
+end
+
+
+function print(io :: IO, x :: StarpuExprBlock ; indent = 0, restrict=false)
+    for i in (1 : length(x.exprs))
+        print(io, x.exprs[i], indent = indent)
+        print(io, ";")
+        if (i != length(x.exprs))
+            print_newline(io, indent)
+        end
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprBlock)
+
+    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
+end
+
+#======================================================
+                FUNCTION CALL
+======================================================#
+
+
+
+
+function starpu_parse_call(x :: Expr)
+
+    if (x.head != :call)
+        error("Invalid \"call\" expression")
+    end
+
+    func = starpu_parse(x.args[1])
+    if (x.args[1] == Symbol(":"))
+        return starpu_parse_interval(x)
+    end
+    if (!isa(func, StarpuExprVar))
+        error("Invalid \"call\" expression : function must be a variable")
+    end
+
+    args = map(starpu_parse, x.args[2:end])
+
+    return StarpuExprCall(func.name, args)
+end
+
+
+starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
+
+
+function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0, restrict=false)
+
+    print(io, x.func, "(")
+
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, ", ")
+        end
+        print(io, x.args[i], indent = indent)
+    end
+
+    print(io, ")")
+end
+
+
+function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, " ", x.func, " ")
+        end
+        print(io, "(")
+        print(io, x.args[i], indent = indent)
+        print(io, ")")
+    end
+end
+
+function print(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+
+    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
+        print_infix(io, x, indent = indent)
+    else
+        print_prefix(io, x, indent = indent)
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprCall)
+
+    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
+end
+
+
+#======================================================
+                CUDA KERNEL CALL
+======================================================#
+
+
+
+
+
+function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, expr.ker_name)
+    print_newline(io, indent + starpu_indent_size)
+    print(io, "<<< ")
+    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
+    print(io, ", ")
+    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
+    print(io, ", 0, starpu_cuda_get_local_stream()")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, ">>> (")
+
+    for i in (1 : length(expr.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + 2 * starpu_indent_size + 1)
+            end
+        end
+
+        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
+
+    end
+
+    print(io, ");")
+    print_newline(io, indent)
+
+end
+
+
+function apply(func :: Function, expr :: StarpuExprCudaCall)
+
+    nblocks = func(expr.nblocks)
+    threads_per_block = func(expr.threads_per_block)
+    args = map((x -> apply(func, x)), expr.args)
+
+    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
+end
+
+
+#======================================================
+                STRUCTURE FIELDS
+======================================================#
+
+
+
+
+
+function starpu_parse_field(x :: Expr)
+
+    if x.head != :(.) || length(x.args) != 2
+        error("Invalid parsing of dot expression")
+    end
+
+    left = starpu_parse(x.args[1])
+
+    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
+        error("Invalid parsing of dot expression")
+    end
+
+    return StarpuExprField(left, x.args[2].value, false)
+end
+
+
+function print(io :: IO, x :: StarpuExprField ; indent = 0,restrict=false)
+    print(io, "(")
+    print(io, x.left, indent = indent)
+    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprField)
+    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
+end
+
+#======================================================
+                FOR LOOPS
+======================================================#
+
+
+
+
+
+function starpu_parse_for(x :: Expr; is_independant = false)
+
+    if (x.head != :for)
+        error("Invalid \"for\" expression")
+    end
+    affect = x.args[1]
+
+    if (affect.head != :(=))
+        error("Invalid \"for\" iterator affectation")
+    end
+
+    iter = starpu_parse(affect.args[1])
+
+    if (!isa(iter, StarpuExprVar))
+        error("Invalid \"for\" iterator")
+    end
+
+    set = starpu_parse(affect.args[2])
+    if (!isa(set, StarpuExprInterval))
+        error("Set of values in \"for\" loop must be an interval")
+    end
+
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
+end
+
+
+
+
+
+function print(io :: IO, x :: StarpuExprFor ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
+
+    id = x.set.id
+
+    start = "start_" * id
+    stop = "stop_" * id
+    step = "step_" * id
+    dim = "dim_" * id
+    iter = "iter_" * id
+
+    print_newline(io, indent, 2)
+
+    if isa(x.set.step, StarpuExprValue)
+        print(io, "for ($(x.iter) = $start ; ")
+        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
+        print(io, "$(x.iter) $comparison_op $stop ; ")
+        print(io, "$(x.iter) += $(x.set.step.value))")
+
+    else
+        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
+        print(io, "$iter < $dim ; ")
+        print(io, "$iter += 1, $(x.iter) += $step)")
+
+    end
+
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFor)
+
+    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
+    set = apply(func, expr.set)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
+end
+
+
+#======================================================
+                FUNCTION DECLARATION
+======================================================#
+
+
+
+
+function starpu_parse_function(x :: Expr)
+
+    if (x.head != :function)
+        error("Invalid \"function\" expression")
+    end
+
+    typed_decl = starpu_parse(x.args[1])
+
+    if (!isa(typed_decl, StarpuExprTypedExpr))
+        error("Invalid \"function\" prototype : a return type must me explicited")
+    end
+
+    prototype = typed_decl.expr
+
+    if (!isa(prototype, StarpuExprCall))
+        error("Invalid \"function\" prototype")
+    end
+
+    arg_list = StarpuExprTypedVar[]
+
+    for type_arg in prototype.args
+        if (!isa(type_arg, StarpuExprTypedVar))
+            error("Invalid \"function\" argument list")
+        end
+        push!(arg_list, type_arg)
+    end
+
+    body = starpu_parse(x.args[2])
+    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
+end
+
+
+
+function print(io :: IO, x :: StarpuExprFunction ; indent = 0,restrict=false)
+
+    print(io, starpu_type_traduction(x.ret_type), " ")
+    print(io, x.func, '(')
+
+    for i in (1 : length(x.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
+            end
+        end
+       print(io, x.args[i], indent = indent + starpu_indent_size, restrict = true)
+    end
+
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}\n\n")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFunction)
+
+    args = map((x -> apply(func, x)), expr.args)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
+end
+
+
+#======================================================
+                IF STATEMENT
+======================================================#
+
+
+
+
+
+function starpu_parse_if(x :: Expr)
+
+    if (x.head != :if)
+        error("Invalid \"if\" expression")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"if\" statement")
+    end
+
+    cond = starpu_parse(x.args[1])
+    then_statement = starpu_parse(x.args[2])
+
+    if (len == 2)
+        return StarpuExprIf(cond, then_statement)
+    end
+
+    else_statement = starpu_parse(x.args[3])
+
+    return StarpuExprIfElse(cond, then_statement, else_statement)
+end
+
+
+function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, "if (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.then_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+
+    if (!isa(x, StarpuExprIfElse))
+        return
+    end
+
+    print(io, " else")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.else_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIf)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+
+    return func(StarpuExprIf(cond, then_statement))
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIfElse)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+    else_statement = apply(func, expr.else_statement)
+
+    return func(StarpuExprIfElse(cond, then_statement, else_statement))
+end
+
+#======================================================
+                INTERVALS
+======================================================#
+
+
+
+
+function starpu_parse_interval(x :: Expr)
+
+    if (x.head != :(call))
+        error("Invalid \"interval\" expression")
+    end
+    start = starpu_parse(x.args[2])
+    steop = starpu_parse(x.args[3])
+
+    if (length(x.args) == 3)
+        return StarpuExprInterval(start, StarpuExprValue(1), steop)
+    end
+
+    stop = starpu_parse(x.args[4])
+
+    return StarpuExprInterval(start, steop, stop)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprInterval)
+
+    start = apply(func, expr.start)
+    step = apply(func, expr.step)
+    stop = apply(func, expr.stop)
+
+    return func(StarpuExprInterval(start, step, stop, id = expr.id))
+end
+
+#======================================================
+                ARRAYS AND REFERENCES
+======================================================#
+
+
+
+
+function starpu_parse_ref(x :: Expr)
+
+    if (x.head != :ref)
+        error("Invalid \"reference\" expression")
+    end
+
+    ref = starpu_parse(x.args[1])
+    indexes = map(starpu_parse, x.args[2:end])
+
+    #=
+    StarpuExpr[]
+
+    for i in (2 : length(x.args))
+        push!(indexes, starpu_parse(x.args[i]))
+    end=#
+
+    return StarpuExprRef(ref, indexes)
+end
+
+
+
+function equals(x :: StarpuExprRef, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprRef
+        return false
+    end
+
+    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
+        return false
+    end
+
+    return all(map(equals, x.indexes, y.indexes))
+end
+
+
+
+
+function print(io :: IO, x :: StarpuExprRef ; indent = 0,restrict=false)
+
+    print(io, x.ref, indent = indent)
+
+    for i in (1 : length(x.indexes))
+        print(io, "[")
+        print(io, x.indexes[i], indent = indent)
+        print(io, "]")
+    end
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprRef)
+
+    ref = apply(func, expr.ref)
+    indexes = map((x -> apply(func, x)), expr.indexes)
+
+    return func(StarpuExprRef(ref, indexes))
+end
+
+#======================================================
+                RETURN EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_return(x :: Expr)
+    if (x.head != :return)
+        error("Invalid \"return\" expression")
+    end
+
+    value = starpu_parse(x.args[1])
+    # Remove type associated to a single, for a return
+    # allows matching with ExprVar
+    if (isa(value, StarpuExprTypedVar))
+        value = StarpuExprVar(value.name)
+    end
+
+    return StarpuExprReturn(value)
+end
+
+function print(io :: IO, x :: StarpuExprReturn ; indent = 0,restrict=false)
+    print(io, "return ")
+    print(io, x.value, indent = indent)
+end
+
+function apply(func :: Function, expr :: StarpuExprReturn)
+
+    return func(StarpuExprReturn(apply(func, expr.value)))
+end
+
+function apply(func :: Function, expr :: StarpuExpr)
+    return func(expr)
+end
+
+print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
+
+function print(io :: IO, x :: StarpuExprValue ; indent = 0,restrict=false)
+
+    value = x.value
+
+    if value == nothing
+        return
+    end
+
+    if isa(value, AbstractString)
+        print(io, '"', value, '"')
+        return
+    end
+
+    if isa(value, Char)
+        print(io, '\'', value, '\'')
+        return
+    end
+
+    print(io, value)
+end
+
+
+
+
+
+print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
+
+
+
+function starpu_parse(raw_value :: Any)
+    return StarpuExprValue(raw_value)
+end
+
+function starpu_parse(sym :: Symbol)
+    return StarpuExprVar(sym)
+end
+
+#======================================================
+                TYPED EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_typed(x :: Expr)
+
+    if (x.head != :(::))
+        error("Invalid type assigned expression")
+    end
+
+    expr = starpu_parse(x.args[1])
+    typ = nothing
+
+    try
+        typ = eval(x.args[2]) :: Type
+    catch
+        print(x.args[2])
+        error("Invalid type in type assigned expression")
+    end
+
+    if (isa(expr, StarpuExprVar))
+        return StarpuExprTypedVar(expr.name, typ)
+    end
+
+    return StarpuExprTypedExpr(expr, typ)
+end
+
+
+
+
+
+starpu_type_traduction_dict = Dict(
+    Int32 => "int32_t",
+    UInt32 => "uint32_t",
+    Float32 => "float",
+    Int64 => "int64_t",
+    UInt64 => "uint64_t",
+    Float64 => "double",
+    Nothing => "void"
+)
+
+
+
+function starpu_type_traduction(x)
+    if x <: Array
+        return starpu_type_traduction_array(x)
+    end
+
+    if x <: Ptr
+        return starpu_type_traduction(eltype(x)) * "*"
+    end
+
+    return starpu_type_traduction_dict[x]
+
+end
+
+function starpu_type_traduction_array(x :: Type{Array{T,N}})  where {T,N}
+    output = starpu_type_traduction(T)
+    for i in (1 : N)
+        output *= "*"
+    end
+
+    return output
+end
+
+function print(io :: IO, x :: StarpuExprTyped ; indent = 0,restrict=false)
+
+    if (isa(x, StarpuExprTypedVar))
+        print(io,starpu_type_traduction(x.typ), " ")
+        #if (restrict)
+        #    print(io,"restrict ");
+        #end
+        print(io, x.name)
+    else
+        print(io, x.expr, indent = indent)
+    end
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprTypedExpr)
+
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprTypedExpr(new_expr, expr.typ))
+end
+
+#======================================================
+                While loop
+======================================================#
+
+
+function starpu_parse_while(x :: Expr)
+
+    if (x.head != :while)
+        error("Invalid \"while\" loop")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"while\" loop")
+    end
+
+    cond = starpu_parse(x.args[1])
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprWhile(cond, body)
+end
+
+
+function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
+    print_newline(io, indent)
+    print(io, "while (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprWhile)
+
+    cond = apply(func, expr.cond)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprWhile(cond, body))
+end

+ 132 - 0
julia/StarPU.jl/src/compiler/file_generation.jl

@@ -0,0 +1,132 @@
+
+
+
+global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+
+
+
+global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+const cpu_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+"
+
+const cuda_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+#define THREADS_PER_BLOCK 64
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+
+__device__ static inline long long jlstarpu_max__device(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
+{
+	if (stop >= start){
+		return jlstarpu_max__device(0, (stop - start + 1) / step);
+	} else {
+		return jlstarpu_max__device(0, (stop - start - 1) / step);
+	}
+}
+
+
+"
+
+"""
+	Opens a new Cuda source file, where generated GPU kernels will be written
+"""
+function starpu_new_cuda_kernel_file(file_name :: String)
+
+    global generated_cuda_kernel_file_name = file_name
+
+    kernel_file = open(file_name, "w")
+    print(kernel_file, cuda_kernel_file_start)
+    close(kernel_file)
+
+    return nothing
+end
+
+export target
+macro target(x)
+    targets = eval(x)
+    return quote
+        starpu_target=$targets
+        global starpu_target
+    end
+end
+
+export CPU_CODELETS
+global CPU_CODELETS=Dict{String,String}()
+export CUDA_CODELETS
+global CUDA_CODELETS=Dict{String,String}()
+
+"""
+	    Executes @cuda_kernel and @cpu_kernel
+        """
+macro codelet(x)
+    parsed = starpu_parse(x)
+    name=string(x.args[1].args[1].args[1]);
+    dump(name)
+    cpu_expr = transform_to_cpu_kernel(parsed)
+    prekernel, kernel = transform_to_cuda_kernel(parsed)
+    generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c")
+    generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu")
+    targets=starpu_target
+    return quote
+        
+        if ($targets&$STARPU_CPU!=0)
+            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cpu_kernel_file_name)
+            print(kernel_file, $(esc(cpu_kernel_file_start)))
+            print(kernel_file, $cpu_expr)
+            close(kernel_file)
+            CPU_CODELETS[$name]=$name
+        end
+        
+        if ($targets&$STARPU_CUDA!=0)
+            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cuda_kernel_file_name)
+            print(kernel_file, $(esc(cuda_kernel_file_start)))
+            print(kernel_file, "__global__ ", $kernel)
+            print(kernel_file, "\nextern \"C\" ", $prekernel)
+            close(kernel_file)
+            CUDA_CODELETS[$name]="CUDA_"*$name
+        end
+        print("end generation")
+        #starpu_task_library_name="generated_tasks"
+        #global starpu_task_library_name
+    end
+end

+ 13 - 0
julia/StarPU.jl/src/compiler/include.jl

@@ -0,0 +1,13 @@
+export starpu_new_cpu_kernel_file
+export starpu_new_cuda_kernel_file
+export @codelet
+export @target
+
+include("utils.jl")
+include("expressions.jl")
+include("parsing.jl")
+include("expression_manipulation.jl")
+include("c.jl")
+include("cuda.jl")
+include("file_generation.jl")
+

+ 5 - 8
julia/src/Compiler/parsing.jl

@@ -14,19 +14,16 @@ starpu_parse_key_word_parsing_function = Dict{Symbol, Function}()
 function starpu_parse(x :: Expr)
 function starpu_parse(x :: Expr)
 
 
     if (x.head == :macrocall)
     if (x.head == :macrocall)
-
+        if (x.args[1] != Symbol("@parallel"))
-        if (x.args[1] != Symbol("@indep"))
+            error("Only @parallel macro, used before a for loop, is allowed ($(x.args[1]) was found)")
-            error("Only @indep macro, used before a for loop, is allowed ($(x.args[1]) was found)")
         end
         end
 
 
-        if (length(x.args) != 2)
+        if (length(x.args) != 3)
-            error("Invalid usage of @indep macro")
+            error("Invalid usage of @parallel macro", length(x.args))
         end
         end
-
+        return starpu_parse_for(x.args[3], is_independant = true)
-        return starpu_parse_for(x.args[2], is_independant = true)
     end
     end
 
 
-
     if !(x.head in keys(starpu_parse_key_word_parsing_function))
     if !(x.head in keys(starpu_parse_key_word_parsing_function))
         return StarpuExprInvalid() #TODO error ?
         return StarpuExprInvalid() #TODO error ?
     end
     end

+ 0 - 12
julia/src/Compiler/utils.jl

@@ -1,9 +1,6 @@
-
 import Base.print
 import Base.print
 
 
-
 function print_newline(io :: IO, indent = 0, n_lines = 1)
 function print_newline(io :: IO, indent = 0, n_lines = 1)
-
     for i in (1 : n_lines)
     for i in (1 : n_lines)
         print(io, "\n")
         print(io, "\n")
     end
     end
@@ -15,11 +12,7 @@ end
 
 
 starpu_indent_size = 4
 starpu_indent_size = 4
 
 
-
-
-
 function rand_char()
 function rand_char()
-
     r = rand(UInt) % 62
     r = rand(UInt) % 62
 
 
     if (0 <= r < 10)
     if (0 <= r < 10)
@@ -32,19 +25,14 @@ function rand_char()
 end
 end
 
 
 function rand_string(size = 8)
 function rand_string(size = 8)
-
     output = ""
     output = ""
 
 
     for i in (1 : size)
     for i in (1 : size)
         output *= string(rand_char())
         output *= string(rand_char())
     end
     end
-
     return output
     return output
 end
 end
 
 
-
-
-
 function system(cmd :: String)
 function system(cmd :: String)
     ccall((:system, "libc"), Cint, (Cstring,), cmd)
     ccall((:system, "libc"), Cint, (Cstring,), cmd)
 end
 end

+ 1 - 7
julia/src/Wrapper/C/jlstarpu.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,12 +28,7 @@
 #include <starpu.h>
 #include <starpu.h>
 #include <pthread.h>
 #include <pthread.h>
 
 
-
 #include "jlstarpu_utils.h"
 #include "jlstarpu_utils.h"
 #include "jlstarpu_task.h"
 #include "jlstarpu_task.h"
 
 
-
-
-
-
 #endif /* JLSTARPU_H_ */
 #endif /* JLSTARPU_H_ */

+ 21 - 66
julia/src/Wrapper/C/jlstarpu_data_handles.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,31 +16,13 @@
 
 
 #include "jlstarpu.h"
 #include "jlstarpu.h"
 
 
-
-
-
-#if 0
-void print_vector_interface(struct starpu_vector_interface * i)
-{
-	printf("Vector interface at %p\n", i);
-	printf("\tdev_handle : %p\n", i->dev_handle);
-	printf("\telement_size : %u\n", i->elemsize);
-	printf("\tnx : %u\n", i->nx);
-	printf("\toffset : %u\n", i->offset);
-	printf("\tptr : %p\n", i->ptr);
-	printf("\tslide_base : %u\n", i->slice_base);
-}
-#endif
-
-
 enum jlstarpu_data_filter_func
 enum jlstarpu_data_filter_func
 {
 {
-    JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+	JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    JLSTARPU_MATRIX_FILTER_BLOCK
+	JLSTARPU_MATRIX_FILTER_BLOCK,
+	JLSTARPU_VECTOR_FILTER_BLOCK,
 };
 };
 
 
-
-
 struct jlstarpu_data_filter
 struct jlstarpu_data_filter
 {
 {
 	enum jlstarpu_data_filter_func func;
 	enum jlstarpu_data_filter_func func;
@@ -52,63 +33,37 @@ struct jlstarpu_data_filter
 
 
 void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
 void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
 {
 {
-
 	switch (func){
 	switch (func){
-
 	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
 	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
 		return starpu_matrix_filter_vertical_block;
 		return starpu_matrix_filter_vertical_block;
-
 	case JLSTARPU_MATRIX_FILTER_BLOCK:
 	case JLSTARPU_MATRIX_FILTER_BLOCK:
 		return starpu_matrix_filter_block;
 		return starpu_matrix_filter_block;
-
+	case JLSTARPU_VECTOR_FILTER_BLOCK:
+		return starpu_vector_filter_block;
 	default:
 	default:
 		return NULL;
 		return NULL;
-
 	}
 	}
 
 
 }
 }
 
 
-
+void jlstarpu_translate_data_filter(const struct jlstarpu_data_filter * const input,struct starpu_data_filter * output)
-void jlstarpu_translate_data_filter
-(
-		const struct jlstarpu_data_filter * const input,
-		struct starpu_data_filter * output
-)
 {
 {
 	memset(output, 0, sizeof(struct starpu_data_filter));
 	memset(output, 0, sizeof(struct starpu_data_filter));
-
 	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
 	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
 	output->nchildren = input->nchildren;
 	output->nchildren = input->nchildren;
-
 }
 }
 
 
-
+void jlstarpu_data_partition(starpu_data_handle_t handle,const struct jlstarpu_data_filter * const jl_filter)
-
-
-
-
-
-
-
-void jlstarpu_data_partition
-(
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter
-)
 {
 {
 	struct starpu_data_filter filter;
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
 	jlstarpu_translate_data_filter(jl_filter, &filter);
-
 	starpu_data_partition(handle, &filter);
 	starpu_data_partition(handle, &filter);
-
 }
 }
 
 
 
 
-void jlstarpu_data_map_filters_1_arg
+void jlstarpu_data_map_filters_1_arg(starpu_data_handle_t handle,
-(
+	const struct jlstarpu_data_filter * const jl_filter
-		starpu_data_handle_t handle,
+	)
-		const struct jlstarpu_data_filter * const jl_filter
-)
 {
 {
 	struct starpu_data_filter filter;
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
 	jlstarpu_translate_data_filter(jl_filter, &filter);
@@ -120,10 +75,10 @@ void jlstarpu_data_map_filters_1_arg
 
 
 void jlstarpu_data_map_filters_2_arg
 void jlstarpu_data_map_filters_2_arg
 (
 (
-		starpu_data_handle_t handle,
+	starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter_1,
+	const struct jlstarpu_data_filter * const jl_filter_1,
-		const struct jlstarpu_data_filter * const jl_filter_2
+	const struct jlstarpu_data_filter * const jl_filter_2
-)
+	)
 {
 {
 	struct starpu_data_filter filter_1;
 	struct starpu_data_filter filter_1;
 	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
 	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
@@ -139,12 +94,12 @@ void jlstarpu_data_map_filters_2_arg
 
 
 
 
 
 
-#define JLSTARPU_GET(interface, field, ret_type)\
+#define JLSTARPU_GET(interface, field, ret_type)			\
-	\
+									\
-	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x)\
+	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x) \
-	{\
+	{								\
-		return (ret_type) x->field;\
+		return (ret_type) x->field;				\
-	}\
+	}								\
 
 
 
 
 
 

+ 1 - 7
julia/src/Wrapper/C/jlstarpu_simple_functions.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,16 +15,11 @@
  */
  */
 #include "jlstarpu.h"
 #include "jlstarpu.h"
 
 
-
-
-
 int jlstarpu_init(void)
 int jlstarpu_init(void)
 {
 {
 	return starpu_init(NULL);
 	return starpu_init(NULL);
 }
 }
 
 
-
-
 void jlstarpu_set_to_zero(void * ptr, unsigned int size)
 void jlstarpu_set_to_zero(void * ptr, unsigned int size)
 {
 {
 	memset(ptr, 0, size);
 	memset(ptr, 0, size);

+ 2 - 7
julia/src/Wrapper/C/jlstarpu_task.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +26,6 @@
 
 
 #include "jlstarpu.h"
 #include "jlstarpu.h"
 
 
-
-
-
-
-
 struct jlstarpu_codelet
 struct jlstarpu_codelet
 {
 {
 	uint32_t where;
 	uint32_t where;
@@ -40,6 +34,7 @@ struct jlstarpu_codelet
 	char * cpu_func_name;
 	char * cpu_func_name;
 
 
 	starpu_cuda_func_t cuda_func;
 	starpu_cuda_func_t cuda_func;
+	starpu_opencl_func_t opencl_func;
 
 
 	int nbuffer;
 	int nbuffer;
 	enum starpu_data_access_mode * modes;
 	enum starpu_data_access_mode * modes;

+ 9 - 9
julia/src/Wrapper/C/jlstarpu_task_submit.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,14 +43,12 @@ struct starpu_codelet * jlstarpu_translate_codelet(struct jlstarpu_codelet * con
 
 
 	starpu_codelet_init(output);
 	starpu_codelet_init(output);
 
 
-
-
 	output->where = input->where;
 	output->where = input->where;
-
 	output->cpu_funcs[0] = input->cpu_func;
 	output->cpu_funcs[0] = input->cpu_func;
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 
 	output->cuda_funcs[0] = input->cuda_func;
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 
 	output->nbuffers = input->nbuffer;
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -70,6 +67,7 @@ void jlstarpu_codelet_update(const struct jlstarpu_codelet * const input, struct
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 
 	output->cuda_funcs[0] = input->cuda_func;
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 
 	output->nbuffers = input->nbuffer;
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -84,7 +82,9 @@ void jlstarpu_free_codelet(struct starpu_codelet * cl)
 }
 }
 #endif
 #endif
 
 
-
+void jlstarpu_hello() {
+	fprintf(stderr,"coucou !");
+}
 
 
 #if 0
 #if 0
 struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
 struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
@@ -104,7 +104,9 @@ struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const
 }
 }
 #endif
 #endif
 
 
-
+char *starpu_find_function(char *name, char *device) {
+	return NULL;
+}
 
 
 void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
 void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
 {
 {
@@ -115,8 +117,6 @@ void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starp
 	output->cl_arg_size = input->cl_arg_size;
 	output->cl_arg_size = input->cl_arg_size;
 }
 }
 
 
-
-
 /*
 /*
 
 
 void print_perfmodel(struct starpu_perfmodel * p)
 void print_perfmodel(struct starpu_perfmodel * p)

+ 1 - 2
julia/src/Wrapper/C/jlstarpu_utils.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2018                                     Alexis Juven
- * Copyright (C) 2018       Alexis Juven
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by

+ 23 - 26
julia/src/Wrapper/Julia/linked_list.jl

@@ -1,21 +1,18 @@
-
-
-
     export Link
     export Link
     mutable struct Link{T}
     mutable struct Link{T}
 
 
         data :: T
         data :: T
 
 
-        previous :: Union{Nullable{Link{T}}, Link{T}}
+        previous :: Union{Nothing, Link{T}}
-        next :: Union{Nullable{Link{T}}, Link{T}}
+        next :: Union{Nothing, Link{T}}
 
 
         list
         list
 
 
         function Link{T}(x :: T, l) where {T}
         function Link{T}(x :: T, l) where {T}
             output = new()
             output = new()
             output.data = x
             output.data = x
-            output.previous = Nullable{Link{T}}()
+            output.previous = Nothing()
-            output.next = Nullable{Link{T}}()
+            output.next = Nothing()
             output.list = l
             output.list = l
             return output
             return output
         end
         end
@@ -27,14 +24,14 @@
 
 
         nelement :: Int64
         nelement :: Int64
 
 
-        first :: Union{Nullable{Link{T}}, Link{T}}
+        first :: Union{Nothing, Link{T}}
-        last :: Union{Nullable{Link{T}}, Link{T}}
+        last :: Union{Nothing, Link{T}}
 
 
         function LinkedList{T}() where {T}
         function LinkedList{T}() where {T}
             output = new()
             output = new()
             output.nelement = 0
             output.nelement = 0
-            output.first = Nullable{Link{T}}()
+            output.first = Nothing()
-            output.last = Nullable{Link{T}}()
+            output.last = Nothing()
 
 
             return output
             return output
         end
         end
@@ -50,7 +47,7 @@
         l.first = new_first
         l.first = new_first
         new_first.next = old_first
         new_first.next = old_first
 
 
-        if (isnull(old_first))
+        if (isnothing(old_first))
             l.last = new_first
             l.last = new_first
         else
         else
             old_first.previous = new_first
             old_first.previous = new_first
@@ -71,7 +68,7 @@
         l.last = new_last
         l.last = new_last
         new_last.previous = old_last
         new_last.previous = old_last
 
 
-        if (isnull(old_last))
+        if (isnothing(old_last))
             l.first = new_last
             l.first = new_last
         else
         else
             old_last.next = new_last
             old_last.next = new_last
@@ -106,13 +103,13 @@
         next = lnk.next
         next = lnk.next
         previous = lnk.previous
         previous = lnk.previous
 
 
-        if (isnull(next))
+        if (isnothing(next))
             l.last = previous
             l.last = previous
         else
         else
             next.previous = previous
             next.previous = previous
         end
         end
 
 
-        if (isnull(previous))
+        if (isnothing(previous))
             l.first = next
             l.first = next
         else
         else
             previous.next = next
             previous.next = next
@@ -140,7 +137,7 @@
         quote
         quote
             $(esc(lnk_iterator)) = $(esc(list)).first
             $(esc(lnk_iterator)) = $(esc(list)).first
 
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).next
                 __next_lnk_iterator = $(esc(lnk_iterator)).next
                 $(esc(expression))
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -155,7 +152,7 @@
         quote
         quote
             $(esc(lnk_iterator)) = $(esc(list)).last
             $(esc(lnk_iterator)) = $(esc(list)).last
 
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).previous
                 __next_lnk_iterator = $(esc(lnk_iterator)).previous
                 $(esc(expression))
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -173,7 +170,7 @@
 
 
         print(io, " ; previous: ")
         print(io, " ; previous: ")
 
 
-        if (isnull(lnk.previous))
+        if (isnothing(lnk.previous))
             print(io, "NONE")
             print(io, "NONE")
         else
         else
             print(io, lnk.previous.data)
             print(io, lnk.previous.data)
@@ -181,7 +178,7 @@
 
 
         print(io, " ; next: ")
         print(io, " ; next: ")
 
 
-        if (isnull(lnk.next))
+        if (isnothing(lnk.next))
             print(io, "NONE")
             print(io, "NONE")
         else
         else
             print(io, lnk.next.data)
             print(io, lnk.next.data)
@@ -199,7 +196,7 @@
 
 
         @foreach_asc l lnk begin
         @foreach_asc l lnk begin
 
 
-            if (!isnull(lnk.previous))
+            if (!isnothing(lnk.previous))
                 print(io, ", ")
                 print(io, ", ")
             end
             end
 
 
@@ -213,24 +210,24 @@
 
 
 
 
 
 
-    import Base.start
+    #import Base.start
     function start(l :: LinkedList)
     function start(l :: LinkedList)
         return nothing
         return nothing
     end
     end
 
 
 
 
-    import Base.done
+    #import Base.done
     function done(l :: LinkedList, state)
     function done(l :: LinkedList, state)
 
 
         if (state == nothing)
         if (state == nothing)
-            return isnull(l.first)
+            return isnothing(l.first)
         end
         end
 
 
-        return isnull(state.next)
+        return isnothing(state.next)
     end
     end
 
 
 
 
-    import Base.next
+    #import Base.next
     function next(l :: LinkedList, state)
     function next(l :: LinkedList, state)
 
 
         if (state == nothing)
         if (state == nothing)
@@ -243,7 +240,7 @@
     end
     end
 
 
 
 
-    import Base.endof
+    #import Base.endof
     function endof(l :: LinkedList)
     function endof(l :: LinkedList)
         return l.nelement
         return l.nelement
     end
     end

julia/tst/black_scholes/black_scholes.c → julia/black_scholes/black_scholes.c


+ 83 - 13
julia/tst/black_scholes/cpu_cuda_black_scholes.jl

@@ -1,13 +1,8 @@
-include("../../src/Compiler/include.jl")
+import Libdl
+using StarPU
 
 
-starpu_new_cpu_kernel_file("../build/generated_cpu_black_scholes.c")
+@target STARPU_CPU+STARPU_CUDA
-starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
+@codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32
-
-
-
-
-
-@cpu_cuda_kernel function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) ::Void
     
     
     widthn ::Int64 = width(data)
     widthn ::Int64 = width(data)
         
         
@@ -25,7 +20,7 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
     b5 ::Float64 = 1.330274428
     b5 ::Float64 = 1.330274428
 
 
     
     
-    @indep for i = 1:widthn
+    @parallel for i = 1:widthn
         
         
 
 
         d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
         d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
@@ -117,8 +112,83 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
         res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
         res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
         
         
     end
     end
+    return 0
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
+
+    @starpu_block let
+        dat_handle, res_handle = starpu_data_register(data, res)
+
+        starpu_data_partition(dat_handle, vert)
+        starpu_data_partition(res_handle, vert)
+        
+        #Compute the price of call and put option in the res matrix
+        @starpu_sync_tasks for task in (1:nslices)
+            @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] 
+        end
+    end
+    return 0
 end
 end
 
 
-compile_cpu_kernels("../build/generated_cpu_black_scholes.so")
+
-compile_cuda_kernels("../build/generated_cuda_black_scholes.so")
+function init_data(data, data_nbr);
-combine_kernel_files("../build/generated_tasks_black_scholes.so", ["../build/generated_cpu_black_scholes.so", "../build/generated_cuda_black_scholes.so"])
+    for i in 1:data_nbr
+        data[1,i] = rand(Float64) * 100
+        data[2,i] = rand(Float64) * 100
+        data[3,i] = rand(Float64)
+        data[4,i] = rand(Float64) * 10
+        data[5,i] = rand(Float64) * 10
+    end
+    return data
+end
+        
+
+
+function median_times(data_nbr, nslices, nbr_tests)
+
+    data ::Matrix{Float64} = zeros(5, data_nbr)
+    # data[1,1] = 100.0
+    # data[2,1] = 100.0
+    # data[3,1] = 0.05
+    # data[4,1] = 1.0
+    # data[5,1] = 0.2
+
+
+    res ::Matrix{Float64} = zeros(2, data_nbr)
+
+    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
+
+    for i = 1:nbr_tests
+        
+        init_data(data, data_nbr)
+
+        tic()
+        black_scholes_starpu(data, res, nslices);
+        t = toq()
+
+        exec_times[i] = t
+    end
+    sort!(exec_times)
+    # println(data)
+    # println(res)
+    
+    return exec_times[1 + div(nbr_tests - 1, 2)]
+end
+
+function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
+    i = 1
+    open("black_scholes_times.dat", "w") do f 
+        for data_nbr in (start_nbr : step_nbr : stop_nbr)
+            t = median_times(data_nbr, nslices, nbr_tests)
+            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
+            write(f, "$data_nbr $(t)\n")
+            i = i + 1
+        end
+    end
+end

+ 263 - 0
julia/mandelbrot/mandelbrot.c

@@ -0,0 +1,263 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2019       Mael Keryell
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include "../display.h"
+
+void cpu_mandelbrot(void **, void *);
+void gpu_mandelbrot(void **, void *);
+
+struct Params
+{
+	float cr;
+	float ci;
+	unsigned taskx;
+	unsigned tasky;
+	unsigned width;
+	unsigned height;
+};
+
+
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_mandelbrot},
+	.cuda_funcs = {gpu_mandelbrot},
+	.nbuffers = 1,
+	.modes = {STARPU_RW}
+};
+
+
+void mandelbrot_with_starpu(int *pixels, float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t p_handle;
+
+	starpu_matrix_data_register(&p_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, width, width, height, sizeof(int));
+
+	struct starpu_data_filter vert =
+	{
+		.filter_func = starpu_matrix_filter_vertical_block,
+		.nchildren = nslicesy
+	};
+
+	struct starpu_data_filter horiz =
+	{
+		.filter_func = starpu_matrix_filter_block,
+		.nchildren = nslicesx
+	};
+
+	starpu_data_map_filters(p_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	struct Params *params = malloc(nslicesx*nslicesy*sizeof(struct Params));
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+			struct starpu_task *task = starpu_task_create();
+			
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(p_handle, 2, tasky, taskx);
+			struct Params param = {cr, ci, taskx, tasky, width, height};
+
+			params[taskx + tasky*nslicesx] = param;
+
+			task->cl_arg = (params + taskx + tasky * nslicesx);
+			task->cl_arg_size = sizeof(struct Params);
+			
+			starpu_task_submit(task);
+		}
+	}
+	starpu_task_wait_for_all();
+
+	starpu_data_unpartition(p_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(p_handle);
+
+	free(params);
+}
+
+void init_zero(int * pixels, unsigned width, unsigned height)
+{
+	unsigned i,j;
+	for (i = 0; i < height; i++){
+		for (j = 0; j < width; j++){
+			pixels[j + i*width] = 0;
+		}
+	}
+}
+
+void sort(double *arr, unsigned nbr_tests)
+{
+	unsigned j;
+	
+	int is_sort = 0;
+	
+	while (!is_sort){
+
+		is_sort = 1;
+		
+		for (j = 0; j < nbr_tests - 1; j++){
+			if (arr[j] > arr[j+1]){
+				is_sort = 0;
+				double tmp = arr[j];
+				arr[j] = arr[j+1];
+				arr[j+1] = tmp;
+			}
+		}
+	}
+}
+double median_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double exec_times[nbr_tests];
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+	}
+	char filename[30];
+	sprintf(filename, "PPM/mandelbrot%d.ppm", width);
+	printf("%s\n", filename);
+
+	mandelbrot_graph(filename, Pixels, width, height);
+
+	free(Pixels);
+
+	sort(exec_times, nbr_tests);
+
+	return exec_times[nbr_tests/2];	
+}
+
+void fluctuation_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests, double *exec_times)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+
+		/* char filename[33]; */
+		/* sprintf(filename, "../PPM/mandelbrot%d.ppm", i + 1); */
+		/* printf("%s\n", filename); */
+		/* mandelbrot_graph(filename, Pixels, width, height); */
+	}
+
+
+	free(Pixels);
+
+
+
+	
+}
+
+
+void display_times(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_struct_times.dat", "w");
+
+	for (dim = start_dim; dim <= stop_dim; dim += step_dim){
+		printf("Dimension: %u...\n", dim);
+		double t = median_time(cr, ci, dim, dim, nslices, nslices, nbr_tests);
+		
+		printf("w = %u ; h = %u ; t = %f\n", dim, dim, t);
+		
+		fprintf(myfile, "%f\n", t);
+		}
+	
+	fclose(myfile);
+}
+
+void display_fluctuations(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_fluctuation.dat", "w");
+
+	double *exec_times = malloc(nbr_tests * sizeof(double));
+	fluctuation_time(cr, ci, start_dim, start_dim, nslices, nslices, nbr_tests, exec_times);
+		
+	/* printf("w = %u ; h = %u ; t = %f\n", dim, dim, t); */
+	unsigned i;
+	for (i = 0; i < nbr_tests; i++){
+		printf("test %u: %f seconds\n", i, exec_times[i]);
+		fprintf(myfile, "%u %f\n", i, exec_times[i]);
+	}
+	
+	fclose(myfile);
+	free(exec_times);
+}
+
+
+int main(int argc, char **argv)
+{
+
+	if (argc != 8){
+		printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) nbr_tests\n", argv[0]);
+		return 1;
+	}
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+
+	
+	float cr = (float) atof(argv[1]);
+	float ci = (float) atof(argv[2]);
+	unsigned start_dim = (unsigned) atoi(argv[3]);
+	unsigned step_dim = (unsigned) atoi(argv[4]);	
+	unsigned stop_dim = (unsigned) atoi(argv[5]);
+	unsigned nslices = (unsigned) atoi(argv[6]);
+	unsigned nbr_tests = (unsigned) atoi(argv[7]);
+
+	display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests);
+	
+	
+	/* display_fluctuations(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests); */
+
+
+	starpu_shutdown();
+
+
+	return 0;
+}

+ 30 - 0
julia/mandelbrot/mandelbrot.jl

@@ -0,0 +1,30 @@
+function mandelbrotjl(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64)
+    height,width = size(pixels)
+    zoom = width * 0.25296875
+    val_diverge = 2.0
+    max_iterations = (width/2) * 0.049715909 * log10(zoom);
+
+
+    for y = 1:height
+        for x = 1:width
+            cr = centerr + (x - (width / 2))/zoom
+            zr = cr
+            ci = centeri + (y - (height / 2))/zoom
+            zi = ci
+
+            n = 0
+            while ((n < max_iterations) && (zr*zr + zi*zi < val_diverge*val_diverge))
+                tmp = zr*zr - zi*zi + cr
+                zi = 2*zr*zi + ci
+                zr = tmp
+                n = n+1
+            end
+            
+            if (n < max_iterations)
+                pixels[y,x] = round(255 * n / max_iterations)
+            else
+                pixels[y,x] = 0
+            end
+        end
+    end
+end

+ 20 - 0
julia/mult/README

@@ -0,0 +1,20 @@
+You first need to compile StarPU.jl
+Now, type
+> make
+> make test
+> gnuplot mult.plot
+
+During the test, several versions are compared:
+- mult.c: this is the original C+starpu code
+- mult_native.jl: this is the native julia way of multiplying 2 matrices. There is a call to openBLAS
+- mult_generatedc.jl: generates a C file, genc_matrix_mult.c. This file is compiled into a library, that is loaded and a function is executed.
+- mult_calllib.jl: the env variable JULIA_TASK_LIB should point to a library containing the function matrix_mult. This function is called.
+
+Examples of how to launch mult.jl are given in test rules.
+	 
+A CUDA version is generated too. Untested so far (no nvidia gpu on my laptop!)
+
+
+
+
+

+ 55 - 0
julia/mult/makefile

@@ -0,0 +1,55 @@
+# tile size. Should be changed in mult.jl as well
+STRIDE=72
+
+# ICC compiler
+#CC =icc
+#CFLAGS=-restrict -unroll4 -ipo -falign-loops=256 -O3 -DSTRIDE=${STRIDE} -march=native $(shell pkg-config --cflags starpu-1.3)
+# GCC compiler
+CC=gcc-9
+CFLAGS += -O3 -DSTRIDE=${STRIDE} -mavx -mfma -fomit-frame-pointer -march=native -ffast-math $(shell pkg-config --cflags starpu-1.3)
+
+LDFLAGS +=$(shell pkg-config --libs starpu-1.3)
+EXTERNLIB=extern_tasks.dylib
+GENERATEDLIB=generated_tasks.dylib
+OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c))
+LIBPATH=${PWD}/../StarPU.jl/lib
+
+all: ${EXTERNLIB} 
+
+mult: mult.c cpu_mult.o #gpu_mult.o 
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)	
+
+gpu_mult.o: gpu_mult.cu
+	nvcc -c $(CFLAGS) $^ -o $@
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $^ -o $@
+
+${EXTERNLIB}: cpu_mult.o
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@  
+
+gpu_mult.so: gpu_mult.o
+	nvcc $(CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS)
+
+cpu_mult_sa: cpu_mult_sa.o
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
+
+${GENERATEDLIB}: ${OBJECTS}
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+clean:
+	rm *.so *.o *.dylib c_*.genc gencuda_*.cu *.dat
+
+# Performance Tests
+cstarpu.dat: mult
+	STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 ./mult > $@
+julia_generatedc.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl $@
+julia_native.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult_native.jl $@
+julia_calllib.dat: ${EXTERNLIB}
+	LD_LIBRARY_PATH+=${LIBPATH} JULIA_TASK_LIB="${EXTERNLIB}" STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl julia_calllib.dat
+
+test: cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat
+
+

+ 237 - 0
julia/mult/mult.c

@@ -0,0 +1,237 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     Alexis Juven
+ * Copyright (C) 2012,2013                                Inria
+ * Copyright (C) 2009-2011,2013-2015                      Université de Bordeaux
+ * Copyright (C) 2010                                     Mehdi Juhoor
+ * Copyright (C) 2010-2013,2015,2017                      CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example shows a simple implementation of a blocked matrix
+ * multiplication. Note that this is NOT intended to be an efficient
+ * implementation of sgemm! In this example, we show:
+ *  - how to declare dense matrices (starpu_matrix_data_register)
+ *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
+ *  - how to use filters to partition the matrices into blocks
+ *    (starpu_data_partition and starpu_data_map_filters)
+ *  - how to unpartition data (starpu_data_unpartition) and how to stop
+ *    monitoring data (starpu_data_unregister)
+ *  - how to manipulate subsets of data (starpu_data_get_sub_data)
+ *  - how to construct an autocalibrated performance model (starpu_perfmodel)
+ *  - how to submit asynchronous tasks
+ */
+
+#include <string.h>
+#include <math.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include <starpu.h>
+
+
+
+/*
+ * That program should compute C = A * B
+ *
+ *   A of size (z,y)
+ *   B of size (x,z)
+ *   C of size (x,y)
+
+              |---------------|
+            z |       B       |
+              |---------------|
+       z              x
+     |----|   |---------------|
+     |    |   |               |
+     |    |   |               |
+     | A  | y |       C       |
+     |    |   |               |
+     |    |   |               |
+     |----|   |---------------|
+
+ */
+
+
+
+
+
+//void gpu_mult(void **, void *);
+void cpu_mult(void **, void *);
+
+
+static struct starpu_perfmodel model =
+{
+		.type = STARPU_HISTORY_BASED,
+		.symbol = "history_perf"
+};
+
+static struct starpu_codelet cl =
+{
+		.cpu_funcs = {cpu_mult},
+		.cpu_funcs_name = {"cpu_mult"},
+		//.cuda_funcs = {gpu_mult},
+		.nbuffers = 3,
+		.modes = {STARPU_R, STARPU_R, STARPU_W},
+		.model = &model
+};
+
+
+void multiply_with_starpu(float *A, float *B, float *C,  unsigned xdim,  unsigned ydim,  unsigned zdim, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t A_handle, B_handle, C_handle;
+
+
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
+			ydim, ydim, zdim, sizeof(float));
+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
+			zdim, zdim, xdim, sizeof(float));
+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
+			ydim, ydim, xdim, sizeof(float));
+
+
+	struct starpu_data_filter vert =
+	{
+			.filter_func = starpu_matrix_filter_vertical_block,
+			.nchildren = nslicesx
+	};
+
+	struct starpu_data_filter horiz =
+	{
+			.filter_func = starpu_matrix_filter_block,
+			.nchildren = nslicesy
+	};
+
+
+	starpu_data_partition(B_handle, &vert);
+	starpu_data_partition(A_handle, &horiz);
+	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+
+			struct starpu_task *task = starpu_task_create();
+
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky);
+			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx);
+			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
+
+			if (starpu_task_submit(task)!=0) fprintf(stderr,"submit task error\n");
+
+		}
+	}
+
+	starpu_task_wait_for_all();
+
+
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(A_handle);
+	starpu_data_unregister(B_handle);
+	starpu_data_unregister(C_handle);
+
+}
+
+
+
+void init_rand(float * m, unsigned width, unsigned height)
+{
+	unsigned i,j;
+
+	for (j = 0 ; j < height ; j++){
+		for (i = 0 ; i < width ; i++){
+			m[j+i*height] = (float)(starpu_drand48());
+		}
+	}
+}
+
+
+void init_zero(float * m, unsigned width, unsigned height)
+{
+	memset(m, 0, sizeof(float) * width * height);
+}
+
+
+
+double min_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned i;
+
+	float * A = (float *) malloc(zdim*ydim*sizeof(float));
+	float * B = (float *) malloc(xdim*zdim*sizeof(float));
+	float * C = (float *) malloc(xdim*ydim*sizeof(float));
+
+	double exec_times=-1;
+
+	for (i = 0 ; i < nb_test ; i++){
+
+		double start, stop, exec_t;
+
+		init_rand(A, zdim, ydim);
+		init_rand(B, xdim, zdim);
+		init_zero(C, xdim, ydim);
+
+		start = starpu_timing_now();
+		multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy);
+		stop = starpu_timing_now();
+
+		exec_t = (stop - start)*1.e3; // Put in ns instead of us
+		if (exec_times<0 || exec_times>exec_t) exec_times= exec_t;
+	}
+
+	free(A);
+	free(B);
+	free(C);
+	return exec_times;
+}
+
+
+void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nb_tests, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned dim;
+
+	for (dim = start_dim ; dim <= stop_dim ; dim += step_dim){
+		double t = min_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy);
+		printf("%f %f\n", dim*dim*4.*3./1024./1024, (2.*dim-1.)*dim*dim/t);
+	}
+
+}
+
+
+int main(int argc, char * argv[])
+{
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+	unsigned start_dim = 16*STRIDE;
+	unsigned step_dim = 4*STRIDE;
+	unsigned stop_dim = 4096;
+	unsigned nb_tests = 10;
+	unsigned nsclicesx = 2;
+	unsigned nsclicesy = 2;
+
+	display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy);
+
+	starpu_shutdown();
+
+	return 0;
+}
+

+ 146 - 0
julia/mult/mult.jl

@@ -0,0 +1,146 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@target STARPU_CPU+STARPU_CUDA
+@codelet function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Float32
+
+    width_m2 :: Int32 = width(m2)
+    height_m1 :: Int32 = height(m1)
+    width_m1 :: Int32 = width(m1)
+    # Naive version
+    #@parallel for j in (1 : width_m2)
+    #    @parallel for i in (1 : height_m1)
+    #
+    #          sum :: Float32 = 0.
+
+    #          for k in (1 : width_m1)
+    #              sum = sum + m1[i, k] * m2[k, j]
+    #          end
+    
+    #          m3[i, j] = sum
+    #      end
+    #  end
+    ##### Tiled and unrolled version 
+    for l in (1 : width_m2)
+        for m in (1 : height_m1)
+            m3[m,l] = 0
+        end
+    end
+    @parallel for i in (1 : STRIDE : height_m1)
+        for k in (1 : STRIDE : width_m1 )
+            for j in (1 : STRIDE : width_m2  )
+                for kk in (k : 4 : k+STRIDE-1)
+                    for jj in (j : 2 : j+STRIDE-1)
+                        alpha00 :: Float32 =m2[kk,jj]
+                        alpha01 :: Float32 =m2[kk,jj+1]
+                        alpha10 :: Float32 =m2[kk+1,jj]
+                        alpha11 :: Float32 =m2[kk+1,jj+1]
+                        alpha20 :: Float32 =m2[kk+2,jj]
+                        alpha21 :: Float32 =m2[kk+2,jj+1]
+                        alpha30 :: Float32 =m2[kk+3,jj]
+                        alpha31 :: Float32 =m2[kk+3,jj+1]
+                        for ii in (i : 1 : i+STRIDE-1) 
+                            m3[ii, jj] = m3[ii, jj] + m1[ii, kk] * alpha00 + m1[ii, kk+1] * alpha10 + m1[ii, kk+2] * alpha20 + m1[ii,kk+3]*alpha30
+                            m3[ii, jj+1] = m3[ii, jj+1] + m1[ii, kk] * alpha01 + m1[ii, kk+1] * alpha11 + m1[ii, kk+2]*alpha21 + m1[ii,kk+3]*alpha31 
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return 0. :: Float32
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    scale= 3
+    tmin=0
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
+    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
+    @starpu_block let
+        hA,hB,hC = starpu_data_register(A, B, C)
+        starpu_data_partition(hB, vert)
+        starpu_data_partition(hA, horiz)
+        starpu_data_map_filters(hC, vert, horiz)
+        tmin=0
+        perfmodel = StarpuPerfmodel(
+            perf_type = STARPU_HISTORY_BASED,
+            symbol = "history_perf"
+        )
+        cl = StarpuCodelet(
+            cpu_func = CPU_CODELETS["matrix_mult"],
+            #cuda_func = "matrix_mult",
+            #opencl_func="ocl_matrix_mult",
+            modes = [STARPU_R, STARPU_R, STARPU_W],
+            perfmodel = perfmodel
+        )
+
+        for i in (1 : 10 )
+            t=time_ns()
+            @starpu_sync_tasks begin
+                for taskx in (1 : nslicesx)
+                    for tasky in (1 : nslicesy)
+                        handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
+                        task = StarpuTask(cl = cl, handles = handles)
+                        starpu_task_submit(task)
+                        #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
+                    end
+                end
+            end
+            t=time_ns()-t
+            if (tmin==0 || tmin>t)
+                tmin=t
+            end
+        end
+    end
+    return tmin
+end
+
+
+function approximately_equals(
+    A :: Matrix{Cfloat},
+    B :: Matrix{Cfloat},
+    eps = 1e-2
+)
+    (height, width) = size(A)
+
+    for j in (1 : width)
+        for i in (1 : height)
+            if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j])))
+                println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])")
+                return false
+            end
+        end
+    end
+
+    return true
+end
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_with_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 4 - 0
julia/mult/mult.plot

@@ -0,0 +1,4 @@
+set output "comparison.pdf"
+set term pdf
+plot "julia_native.dat" w l,"cstarpu.dat" w l,"julia_generatedc.dat" w l,"julia_calllib.dat" w l
+quit

+ 44 - 0
julia/mult/mult_native.jl

@@ -0,0 +1,44 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    tmin = 0
+    for i in (1 : 10 )
+        t=time_ns()
+        C = A * B;
+        t=time_ns() - t
+        if (tmin==0 || tmin>t)
+            tmin=t
+        end
+    end
+    return tmin
+end
+
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_without_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 11 - 0
julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.187500 62.469505
+23.730469 65.145783
+34.171875 64.252904
+46.511719 64.856261
+60.750000 61.807355
+76.886719 64.395970
+94.921875 63.789615
+114.855469 64.782028
+136.687500 61.598611
+160.417969 63.266065
+186.046875 62.610491

+ 29 - 0
julia/mult/res/mult_gen_gcc9_1x4.dat

@@ -0,0 +1,29 @@
+3.0 35.095536494941854
+4.6875 41.989376626414035
+6.75 38.862414203751754
+9.1875 42.8149332353783
+12.0 27.869356812008537
+15.1875 40.90318016042991
+18.75 35.225143587305226
+22.6875 40.2855672797496
+27.0 30.428316600694394
+31.6875 39.31411328357
+36.75 32.661610849261855
+42.1875 37.556932447413935
+48.0 23.52181133492872
+54.1875 38.220688331426885
+60.75 30.30144442185247
+67.6875 35.7931848447804
+75.0 27.503675217337065
+82.6875 34.26004394537202
+90.75 30.28248032967485
+99.1875 34.80402996973707
+108.0 23.410338740869793
+117.1875 33.816995170067365
+126.75 28.575495002257274
+136.6875 31.938845714722646
+147.0 26.579028306224597
+157.6875 31.257291229640458
+168.75 29.40371998523363
+180.1875 31.20791496656832
+192.0 22.27977322138876

+ 29 - 0
julia/mult/res/mult_gen_gcc9_4x1.dat

@@ -0,0 +1,29 @@
+3.0 38.576241402554224
+4.6875 45.31099640039931
+6.75 41.33116523247586
+9.1875 41.36870326327288
+12.0 30.726913279900813
+15.1875 40.97070044185806
+18.75 38.00711660927796
+22.6875 41.41500237348445
+27.0 35.60049383550015
+31.6875 39.3274866273195
+36.75 35.89547776075037
+42.1875 40.148418048157694
+48.0 25.328919716005114
+54.1875 38.31273888508681
+60.75 33.40185421821584
+67.6875 36.933804237397716
+75.0 30.424047628770715
+82.6875 35.555311026464885
+90.75 33.520292128428736
+99.1875 36.15390985285586
+108.0 24.69723232685782
+117.1875 35.10082819198454
+126.75 31.92453002780272
+136.6875 35.218675088153915
+147.0 28.57496209979794
+157.6875 34.30832202391309
+168.75 32.28312311135674
+180.1875 33.95765540289391
+192.0 21.87223921165241

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s100_4x1.dat

@@ -0,0 +1,7 @@
+29.296875 40.03209610690769
+45.7763671875 39.7782462738071
+65.91796875 40.919412460071406
+89.7216796875 39.86310075239485
+117.1875 38.57853081024218
+148.3154296875 38.365551586369726
+183.10546875 36.36952308316503

+ 17 - 0
julia/mult/res/mult_gen_gcc9_s50_4x1.dat

@@ -0,0 +1,17 @@
+7.32421875 40.71065910284983
+11.444091796875 38.65943468999069
+16.4794921875 38.47969086769767
+22.430419921875 36.651013048059674
+29.296875 36.83417153820277
+37.078857421875 36.39675792194737
+45.7763671875 35.95397397739203
+55.389404296875 35.01271643062472
+65.91796875 36.615975568007045
+77.362060546875 35.37884613184124
+89.7216796875 35.47361924291173
+102.996826171875 35.19272426462016
+117.1875 33.20635359174189
+132.293701171875 33.85292024547706
+148.3154296875 34.259771254715574
+165.252685546875 33.696634276143286
+183.10546875 32.65563984287101

+ 4 - 0
julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat

@@ -0,0 +1,4 @@
+12.0 36.97738316279322
+48.0 41.92478434152502
+108.0 44.782019377342586
+192.0 43.31174213073912

+ 13 - 0
julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat

@@ -0,0 +1,13 @@
+12.0 46.97244046946638
+18.75 58.871349999359005
+27.0 53.82855830537721
+36.75 56.32008723976445
+48.0 44.67174556090053
+60.75 56.37242731373687
+75.0 50.7326915732461
+90.75 55.4718476922033
+108.0 42.2282159789089
+126.75 53.75672849956793
+147.0 49.58574052835205
+168.75 54.37042916069095
+192.0 41.03783426797551

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 44.99985202955367
+27.0 52.4356051129035
+48.0 43.98871775598096
+75.0 49.80617536221089
+108.0 41.49118338442519
+147.0 48.56364996087375
+192.0 40.62654517348534

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 42.39120888947927
+27.0 54.550913222094984
+48.0 45.49392373655385
+75.0 51.36208021061481
+108.0 43.63013488867403
+147.0 49.68419805912138
+192.0 42.5457713890938

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 47.91450640786182
+23.73046875 70.18415000835407
+34.171875 130.5930513278052
+46.51171875 175.40249695912559
+60.75 55.633587692486856
+76.88671875 69.98450438218035
+94.921875 84.29515755163186
+114.85546875 105.0701279194339
+136.6875 59.010534668180654
+160.41796875 62.33164645892831
+186.046875 71.89788996838325

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.34035046809785
+23.73046875 90.94876643488134
+34.171875 93.57888433733667
+46.51171875 115.24941633539966
+60.75 57.91620969415693
+76.88671875 69.01285319590752
+94.921875 80.1731339374351
+114.85546875 81.88695877919552
+136.6875 59.81485837027026
+160.41796875 62.627542548656216
+186.046875 70.92430858573593

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2.dat

@@ -0,0 +1,11 @@
+15.1875 49.541265548365025
+23.73046875 52.23451345494309
+34.171875 50.51716648682395
+46.51171875 50.85574646979497
+60.75 42.70677762521372
+76.88671875 47.844317018396175
+94.921875 46.812449783935435
+114.85546875 47.63133362143659
+136.6875 42.58453806625135
+160.41796875 43.38142416008992
+186.046875 42.10597070167543

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.85531352216577
+23.73046875 64.18226106386828
+34.171875 63.154717935466564
+46.51171875 63.785224988311604
+60.75 56.23769206133714
+76.88671875 59.67937135482873
+94.921875 58.723057009657836
+114.85546875 59.64876377318175
+136.6875 56.179455752285136
+160.41796875 56.25355249047638
+186.046875 55.86750451373739

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat

@@ -0,0 +1,11 @@
+15.1875 13.064278868379871
+23.73046875 14.756026074641067
+34.171875 13.83982386947793
+46.51171875 14.613968952238817
+60.75 11.783711558597256
+76.88671875 13.931105963251506
+94.921875 12.855632000014735
+114.85546875 13.249002597722258
+136.6875 12.572965056671936
+160.41796875 14.480218135848462
+186.046875 14.040858387720379

+ 5 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat

@@ -0,0 +1,5 @@
+15.1875 14.099898524592977
+23.73046875 14.550933539697333
+34.171875 14.349410926140271
+46.51171875 14.415904334550417
+60.75 13.503051187017753

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x1.dat

@@ -0,0 +1,11 @@
+15.1875 45.92147955991763
+23.73046875 51.36107050546614
+34.171875 49.61930926641356
+46.51171875 49.45181909243622
+60.75 45.15796272895741
+76.88671875 48.36073354687536
+94.921875 44.24184559337953
+114.85546875 47.18178855961836
+136.6875 43.159720253646896
+160.41796875 42.67165627290124
+186.046875 38.537576648017776

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.120354593587074
+23.73046875 64.57108971168358
+34.171875 63.38082696595577
+46.51171875 62.249708571678504
+60.75 54.032191569244006
+76.88671875 58.202057221774425
+94.921875 61.440907627769874
+114.85546875 59.41214640096639
+136.6875 56.7554113833207
+160.41796875 59.19922367910147
+186.046875 58.85342461418237

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 60.852193218742244
+23.73046875 90.34368571742002
+34.171875 64.88259115865708
+46.51171875 75.93301804962653
+60.75 56.570334556680905
+76.88671875 69.56856256378995
+94.921875 61.32231327890172
+114.85546875 65.01978117265061
+136.6875 59.34455850319351
+160.41796875 62.64459678873513
+186.046875 60.73581790609317

+ 9 - 0
julia/mult/res/mult_gen_gcc9_s80_4x1.dat

@@ -0,0 +1,9 @@
+18.75 33.04933833660189
+29.296875 36.73797053791907
+42.1875 38.63857483141544
+57.421875 37.41446097176627
+75.0 37.29798074973393
+94.921875 47.78484008020491
+117.1875 46.920043928079984
+141.796875 44.932797054801505
+168.75 44.2566320371221

+ 3 - 0
julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat

@@ -0,0 +1,3 @@
+15.1875 32.90035486674719
+23.73046875 33.41217720558453
+34.171875 32.65678472416923

+ 11 - 0
julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.840292498287134
+23.73046875 65.60736049798648
+34.171875 63.71211730973094
+46.51171875 64.52944091137051
+60.75 57.21012623951768
+76.88671875 64.30553029147484
+94.921875 63.023853024696905
+114.85546875 61.571602782198134
+136.6875 58.78441267918305
+160.41796875 60.1701272099038
+186.046875 58.911378284423975

+ 62 - 0
julia/mult/res/mult_native.dat

@@ -0,0 +1,62 @@
+0.046875 62.74933655006031
+0.1875 81.76128691363823
+0.421875 56.62036966486339
+0.75 61.59670394349617
+1.171875 78.30779265074588
+1.6875 75.18164244785089
+2.296875 86.16277240336612
+3.0 88.7175896320434
+3.796875 89.94675752869234
+4.6875 91.81258734297542
+5.671875 91.07763358860382
+6.75 89.73871714190392
+7.921875 93.94105347752168
+9.1875 96.0986938110649
+10.546875 86.81427208540494
+12.0 90.48478005441814
+13.546875 87.99680611063411
+15.1875 84.31631390960176
+16.921875 91.48325697666799
+18.75 89.95690606810193
+20.671875 81.49676622230874
+22.6875 81.45233686971325
+24.796875 85.66539502122194
+27.0 87.78572232818597
+29.296875 83.52321886514454
+31.6875 84.0009753910359
+34.171875 86.80205838654649
+36.75 86.99073757030344
+39.421875 81.17955694509793
+42.1875 80.44760372225159
+45.046875 80.56510057628367
+48.0 81.65642955552222
+51.046875 82.11316716249475
+54.1875 82.5995656333009
+57.421875 82.17046140404119
+60.75 82.15768363601998
+64.171875 81.85343346990497
+67.6875 83.50739871185147
+71.296875 81.53545433745259
+75.0 81.48249578388855
+78.796875 83.7802422875762
+82.6875 82.91995474794902
+86.671875 84.37396962418724
+90.75 83.27206282643111
+94.921875 84.42083617832927
+99.1875 84.41380826745248
+103.546875 83.76666519036874
+108.0 83.15578835025194
+112.546875 82.68544528819217
+117.1875 82.44413764522827
+121.921875 75.20336455312317
+126.75 78.74206622501798
+131.671875 78.30429756099845
+136.6875 77.07009004287404
+141.796875 76.79451754647009
+147.0 77.188762171773
+152.296875 83.71882981918472
+157.6875 81.25155945091102
+163.171875 82.77719540968279
+168.75 82.091876711701
+174.421875 80.13244240187723
+180.1875 83.15669561921781

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.12811008957249
+23.73046875 60.33779880359854
+34.171875 58.65251355824629
+46.51171875 59.39753610863093
+60.75 51.71969005913254
+76.88671875 60.41770576441039
+94.921875 55.244712365625155
+114.85546875 59.08425492245039
+136.6875 50.57086001690063
+160.41796875 54.04522394983553
+186.046875 52.97679192814275

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.13411070454958
+23.73046875 61.69285331765543
+34.171875 59.001001016663984
+46.51171875 60.24465674394535
+60.75 52.541458398044604
+76.88671875 62.0774861675393
+94.921875 56.20749571716199
+114.85546875 53.17550437757629
+136.6875 47.70744414255635
+160.41796875 49.931147163084646
+186.046875 47.38560406825662

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.39992483045233
+23.73046875 61.25439146348626
+34.171875 58.97664123460709
+46.51171875 58.949173678252
+60.75 52.78987632022571
+76.88671875 61.569889782842495
+94.921875 56.0234167726132
+114.85546875 59.25765213366246
+136.6875 50.44001190797859
+160.41796875 55.423350523540556
+186.046875 54.219079530491165

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.58835344153732
+23.73046875 61.299696065186076
+34.171875 59.10944402276785
+46.51171875 60.240571021372645
+60.75 52.66550013928241
+76.88671875 61.30385865566083
+94.921875 58.08324611362879
+114.85546875 62.11036688036624
+136.6875 51.309289298267664
+160.41796875 54.50754182628601
+186.046875 52.16594203007848

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.225066705273136
+23.73046875 62.03919972007252
+34.171875 57.31552099956256
+46.51171875 59.10290100154662
+60.75 52.5567601559296
+76.88671875 62.05760119221998
+94.921875 56.85760151243153
+114.85546875 59.990114344500874
+136.6875 51.0662836859927
+160.41796875 54.41003890332101
+186.046875 53.02269691247011

+ 0 - 57
julia/src/Compiler/C/add_for_loop_declarations.jl

@@ -1,57 +0,0 @@
-
-
-"""
-    Returns the list of instruction that will be added before for loop of shape
-        "for for_index_var in set ..."
-"""
-function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
-
-    const decl_pattern = @parse € :: Int64
-    const affect_pattern = @parse € :: Int64 = €
-    const interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-
-    id = set.id
-
-    start_var = starpu_parse(Symbol(:start_, id))
-    start_decl = replace_pattern(affect_pattern, start_var, set.start)
-
-    index_var = starpu_parse(for_index_var)
-    index_decl = replace_pattern(decl_pattern, index_var)
-
-    if isa(set.step, StarpuExprValue)
-
-        stop_var = starpu_parse(Symbol(:stop_, id))
-        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
-
-        return StarpuExpr[start_decl, stop_decl, index_decl]
-    end
-
-    step_var = starpu_parse(Symbol(:step_, id))
-    step_decl = replace_pattern(affect_pattern, step_var, set.step)
-
-    dim_var = starpu_parse(Symbol(:dim_, id))
-    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
-
-    iter_var = starpu_parse(Symbol(:iter_, id))
-    iter_decl = replace_pattern(decl_pattern, iter_var)
-
-
-    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
-end
-
-
-function add_for_loop_declarations(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprFor)
-            return x
-        end
-
-        interval_decl = interval_evaluation_declarations(x.set, x.iter)
-
-        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 15
julia/src/Compiler/C/create_cpu_kernel.jl

@@ -1,15 +0,0 @@
-
-
-
-
-
-function transform_to_cpu_kernel(expr :: StarpuExprFunction)
-
-    output = add_for_loop_declarations(expr)
-    output = substitute_args(output)
-    output = substitute_func_calls(output)
-    output = substitute_indexing(output)
-    output = flatten_blocks(output)
-
-    return output
-end

+ 0 - 27
julia/src/Compiler/C/flatten_blocks.jl

@@ -1,27 +0,0 @@
-
-
-
-function flatten_blocks(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprBlock)
-            return x
-        end
-
-        instrs = StarpuExpr[]
-
-        for sub_expr in x.exprs
-
-            if isa(sub_expr, StarpuExprBlock)
-                push!(instrs, sub_expr.exprs...)
-            else
-                push!(instrs, sub_expr)
-            end
-        end
-
-        return StarpuExprBlock(instrs)
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 76
julia/src/Compiler/C/substitute_args.jl

@@ -1,76 +0,0 @@
-
-
-function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if x == StarpuExprVar(arg_name)
-            return StarpuExprVar(ptr_name)
-        end
-
-        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
-            return x
-        end
-
-        if (length(x.args) != 1)
-            error("Invalid arrity for function $(x.func)")
-        end
-
-        if (x.args[1] != StarpuExprVar(ptr_name))
-            return x
-        end
-
-        new_func = func_substitution[x.func]
-        new_arg = starpu_parse(:($buffer_name[$arg_index]))
-
-        return StarpuExprCall(new_func, [new_arg])
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function substitute_args(expr :: StarpuExprFunction)
-
-    new_body = expr.body
-    func_id = rand_string()
-    buffer_arg_name = Symbol("buffers_", func_id)
-    cl_arg_name = Symbol("cl_arg_", func_id)
-
-    function_start_affectations = StarpuExpr[]
-
-    for i in (1 : length(expr.args))
-
-        var_id = rand_string()
-        ptr = Symbol(:ptr_, var_id)
-
-        if (expr.args[i].typ <: Vector)
-            func_interface = :STARPU_VECTOR_GET_PTR
-
-        elseif (expr.args[i].typ <: Matrix)
-            func_interface = :STARPU_MATRIX_GET_PTR
-            ld_name = Symbol("ld_", var_id)
-            new_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
-            push!(function_start_affectations, new_affect)
-
-        else
-            error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
-        end
-
-        type_in_arg = eltype(expr.args[i].typ)
-        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
-        push!(function_start_affectations, new_affect)
-
-        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, ptr)
-    end
-
-
-    new_args = [
-                    starpu_parse(:($buffer_arg_name :: Matrix{Void})),
-                    starpu_parse(:($cl_arg_name :: Vector{Void}))
-                ]
-    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
-
-    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
-end

+ 0 - 25
julia/src/Compiler/C/substitute_func_calls.jl

@@ -1,25 +0,0 @@
-
-
-
-func_substitution = Dict(
-    :width => :STARPU_MATRIX_GET_NY,
-    :height => :STARPU_MATRIX_GET_NX,
-
-    :length => :STARPU_VECTOR_GET_NX
-)
-
-
-
-function substitute_func_calls(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
-            return x
-        end
-
-        return StarpuExprCall(func_substitution[x.func], x.args)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 52
julia/src/Compiler/C/substitute_indexing.jl

@@ -1,52 +0,0 @@
-
-
-function substitute_indexing(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprRef)
-            return x
-        end
-
-        if !isa(x.ref, StarpuExprVar)
-            error("Only variable indexing is allowed") #TODO allow more ?
-        end
-
-
-        nb_indexes = length(x.indexes)
-
-        if (nb_indexes >= 3)
-            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
-        end
-
-        if (nb_indexes == 0)
-            return x
-
-        elseif nb_indexes == 1
-            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
-                                                                            #TODO : detect when it is a matrix used with one index only
-            return StarpuExprRef(x.ref, [new_index])
-
-        elseif nb_indexes == 2
-
-            var_name = String(x.ref.name)
-
-            if !ismatch(r"ptr_", var_name) || isempty(var_name[5:end])
-                error("Invalid variable ($var_name) for multiple index dereferencing")
-            end
-
-            var_id = var_name[5:end]
-            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
-
-            new_index = x.indexes[2]
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
-            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-
-            return StarpuExprRef(x.ref, [new_index])
-        end
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 179
julia/src/Compiler/Cuda/create_cuda_kernel.jl

@@ -1,179 +0,0 @@
-
-
-function is_indep_for_expr(x :: StarpuExpr)
-    return isa(x, StarpuExprFor) && x.is_independant
-end
-
-
-function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
-                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
-    init = StarpuExpr[]
-    finish = StarpuExpr[]
-
-    if is_indep_for_expr(expr)
-        return init, StarpuIndepFor(expr), finish
-    end
-
-    if !isa(expr, StarpuExprBlock)
-        return [expr], nothing, finish
-    end
-
-    for i in (1 : length(expr.exprs))
-
-        if !is_indep_for_expr(expr.exprs[i])
-            continue
-        end
-
-        init = expr.exprs[1 : i-1]
-        indep = StarpuIndepFor(expr.exprs[i])
-        finish = expr.exprs[i+1 : end]
-
-        if any(is_indep_for_expr, finish)
-            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
-        end
-
-        return init, indep, finish
-    end
-
-    return expr.exprs, nothing, finish
-end
-
-
-
-
-function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
-
-    undefined_variables = Symbol[]
-    defined_variable_names = map((x -> x.name), already_defined)
-    defined_variable_types = map((x -> x.typ), already_defined)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprFunction)
-            error("No function declaration allowed in this section")
-        end
-
-        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
-
-            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
-                push!(undefined_variables, x.name)
-            end
-
-            return x
-        end
-
-        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
-
-            if isa(x, StarpuExprAffect)
-
-                var = x.var
-
-                if !isa(var, StarpuExprTypedVar)
-                    return x
-                end
-
-                name = var.name
-                typ = var.typ
-
-            else
-                name = x.iter
-                typ = Int64
-            end
-
-            if name in defined_variable_names
-                error("Multiple definition of variable $name")
-            end
-
-            filter!((sym -> sym != name), undefined_variables)
-            push!(defined_variable_names, name)
-            push!(defined_variable_types, typ)
-
-            return x
-        end
-
-        return x
-    end
-
-    apply(func_to_apply, expr)
-    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
-
-    return defined_variable, undefined_variables
-end
-
-
-
-function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
-
-    for x in vars
-        if x.name == name
-            return x
-        end
-    end
-
-    return nothing
-end
-
-
-
-function add_device_to_interval_call(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
-            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
-        end
-
-        return x
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function transform_to_cuda_kernel(func :: StarpuExprFunction)
-
-    cpu_func = transform_to_cpu_kernel(func)
-
-    init, indep, finish = extract_init_indep_finish(cpu_func.body)
-
-    if indep == nothing
-        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
-    end
-
-    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
-
-    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
-    prekernel_instr = vcat(init, prekernel_instr)
-    kernel_instr = vcat(kernel_instr, indep.body)
-
-    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
-    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
-
-    for undef_var in indep_for_undef
-
-        found_var = find_variable(undef_var, prekernel_def)
-
-        if found_var == nothing # TODO : error then ?
-            continue
-        end
-
-        push!(kernel_args, found_var)
-    end
-
-    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
-    cuda_call = StarpuExprCudaCall(func.func, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
-    push!(prekernel_instr, cuda_call)
-    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
-    prekernel_instr = vcat(prekernel_instr, finish)
-
-    prekernel_name = Symbol("CUDA_", func.func)
-    prekernel = StarpuExprFunction(Void, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
-    prekernel = flatten_blocks(prekernel)
-
-    kernel = StarpuExprFunction(Void, func.func, kernel_args, StarpuExprBlock(kernel_instr))
-    kernel = add_device_to_interval_call(kernel)
-    kernel = flatten_blocks(kernel)
-    
-    return prekernel, kernel
-end

+ 0 - 49
julia/src/Compiler/Cuda/indep_for.jl

@@ -1,49 +0,0 @@
-
-
-struct StarpuIndepFor
-
-    iters :: Vector{Symbol}
-    sets :: Vector{StarpuExprInterval}
-
-    body :: StarpuExpr
-end
-
-
-function assert_no_indep_for(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if (isa(x, StarpuExprFor) && x.is_independant)
-            error("Invalid usage of intricated @indep for loops")
-        end
-
-        return x
-    end
-
-    return apply(func_to_run, expr)
-end
-
-
-function StarpuIndepFor(expr :: StarpuExprFor)
-
-    if !expr.is_independant
-        error("For expression must be prefixed by @indep")
-    end
-
-    iters = []
-    sets = []
-    for_loop = expr
-
-    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
-
-        push!(iters, for_loop.iter)
-        push!(sets, for_loop.set)
-        for_loop = for_loop.body
-
-        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
-            for_loop = for_loop.exprs[1]
-        end
-    end
-
-    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
-end

+ 0 - 121
julia/src/Compiler/Cuda/indep_for_kernel_ids.jl

@@ -1,121 +0,0 @@
-
-
-function translate_index_code(dims :: Vector{StarpuExprVar})
-
-    ndims = length(dims)
-
-    if ndims == 0
-        error("No dimension specified")
-    end
-
-    prod = StarpuExprValue(1)
-    output = StarpuExpr[]
-    reversed_dim = reverse(dims)
-    thread_index_patern = @parse € :: Int64 = (€ / €) % €
-    thread_id = @parse THREAD_ID
-
-    for i in (1 : ndims)
-        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
-        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
-        push!(output, expr)
-
-        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
-    end
-
-    thread_id_pattern = @parse begin
-
-        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
-
-        if (€ >= €)
-            return
-        end
-    end
-
-    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
-    push!(output, bound_verif)
-
-    return reverse(output)
-end
-
-
-
-
-
-
-
-function kernel_index_declarations(ind_for :: StarpuIndepFor)
-
-    pre_kernel_instr = StarpuExpr[]
-    kernel_args = StarpuExprTypedVar[]
-    kernel_instr = StarpuExpr[]
-
-    decl_pattern = @parse € :: Int64 = €
-    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-    iter_pattern = @parse € :: Int64 = € + € * €
-
-    dims = StarpuExprVar[]
-    ker_instr_to_add_later_on = StarpuExpr[]
-
-    for k in (1 : length(ind_for.sets))
-
-        set = ind_for.sets[k]
-
-        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
-        start_decl = replace_pattern(decl_pattern, start_var, set.start)
-
-        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
-        step_decl = replace_pattern(decl_pattern, step_var, set.step)
-
-        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
-        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
-
-        push!(dims, dim_var)
-
-        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
-        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
-
-        iter_var = starpu_parse(ind_for.iters[k])
-        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
-        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
-
-        push!(ker_instr_to_add_later_on, iter_decl)
-    end
-
-
-    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
-end
-
-
-
-function analyse_sets(ind_for :: StarpuIndepFor)
-
-
-    decl_pattern = @parse € :: Int64 = €
-    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
-
-    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
-
-    dim_prod = @parse 1
-
-    for d in dims
-        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
-    end
-
-    nthreads_var = @parse nthreads
-    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
-    push!(pre_kernel_instr, nthreads_decl)
-
-    nblocks_var = @parse nblocks
-    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
-    push!(pre_kernel_instr, nblocks_decl)
-
-
-    index_decomposition = translate_index_code(dims)
-
-    push!(kernel_instr, index_decomposition...)
-    push!(kernel_instr, ker_instr_to_add...)
-
-    return pre_kernel_instr, kernel_args, kernel_instr
-end

+ 0 - 60
julia/src/Compiler/Expressions/affect.jl

@@ -1,60 +0,0 @@
-
-#======================================================
-                AFFECTATION
-======================================================#
-
-
-struct StarpuExprAffect <: StarpuExpr
-    var :: StarpuExpr
-    expr :: StarpuExpr
-end
-
-function starpu_parse_affect(x :: Expr)
-
-    if (x.head != :(=))
-        error("Invalid \"affectation\" expression")
-    end
-
-    var = starpu_parse(x.args[1])
-    expr = starpu_parse(x.args[2])
-
-    return StarpuExprAffect(var, expr)
-end
-
-
-function equals(x :: StarpuExprAffect, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprAffect
-        return false
-    end
-
-    return equals(x.var, y.var) && equals(x.expr, y.expr)
-end
-
-
-function print(io :: IO, x :: StarpuExprAffect ; indent = 0)
-
-    print(io, x.var, indent = indent)
-    print(io, " = ")
-
-    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
-
-    if need_to_transtyp
-        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
-    end
-
-    print(io, x.expr, indent = indent)
-
-    if need_to_transtyp
-        print(io, ")")
-    end
-
-end
-
-function apply(func :: Function, expr :: StarpuExprAffect)
-
-    var = apply(func, expr.var)
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprAffect(var, new_expr))
-end

+ 0 - 68
julia/src/Compiler/Expressions/block.jl

@@ -1,68 +0,0 @@
-
-#======================================================
-                BLOCK
-(series of instruction, not C variable scoping block)
-======================================================#
-
-
-struct StarpuExprBlock <: StarpuExpr
-    exprs :: Vector{StarpuExpr}
-end
-
-
-function is_unwanted(x :: Symbol)
-    return false
-end
-
-function is_unwanted(x :: Expr)
-
-    if (x.head == :line)
-        return true
-    end
-
-    return false
-end
-
-
-function starpu_parse_block(x :: Expr)
-
-    if (x.head != :block)
-        error("Invalid \"block\" expression")
-    end
-
-    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
-
-    #=for y in x.args
-
-        if (is_unwanted(y))
-            continue
-        end
-
-        push!(exprs, starpu_parse(y))
-    end
-    =#
-    #if (length(exprs) == 1)
-    #    return exprs[1]  #TODO : let 1 instruction blocks be a thing ?
-    #end
-
-    return StarpuExprBlock(exprs)
-end
-
-
-function print(io :: IO, x :: StarpuExprBlock ; indent = 0)
-    for i in (1 : length(x.exprs))
-        print(io, x.exprs[i], indent = indent)
-        print(io, ";")
-        if (i != length(x.exprs))
-            print_newline(io, indent)
-        end
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprBlock)
-
-    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
-end

+ 0 - 75
julia/src/Compiler/Expressions/call.jl

@@ -1,75 +0,0 @@
-
-#======================================================
-                FUNCTION CALL
-======================================================#
-
-
-struct StarpuExprCall <: StarpuExpr
-    func :: Symbol
-    args :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_call(x :: Expr)
-
-    if (x.head != :call)
-        error("Invalid \"call\" expression")
-    end
-
-    func = starpu_parse(x.args[1])
-
-    if (!isa(func, StarpuExprVar))
-        error("Invalid \"call\" expression : function must be a variable")
-    end
-
-    args = map(starpu_parse, x.args[2:end])
-
-    return StarpuExprCall(func.name, args)
-end
-
-
-starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
-
-
-function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    print(io, x.func, "(")
-
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, ", ")
-        end
-        print(io, x.args[i], indent = indent)
-    end
-
-    print(io, ")")
-end
-
-
-function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0)
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, " ", x.func, " ")
-        end
-        print(io, "(")
-        print(io, x.args[i], indent = indent)
-        print(io, ")")
-    end
-end
-
-function print(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
-        print_infix(io, x, indent = indent)
-    else
-        print_prefix(io, x, indent = indent)
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprCall)
-
-    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
-end

+ 0 - 60
julia/src/Compiler/Expressions/cuda_call.jl

@@ -1,60 +0,0 @@
-
-
-#======================================================
-                CUDA KERNEL CALL
-======================================================#
-
-
-
-struct StarpuExprCudaCall <: StarpuExpr
-
-    ker_name :: Symbol
-
-    nblocks :: StarpuExpr
-    threads_per_block :: StarpuExpr
-
-    args :: Vector{StarpuExpr}
-
-end
-
-
-function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, expr.ker_name)
-    print_newline(io, indent + starpu_indent_size)
-    print(io, "<<< ")
-    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
-    print(io, ", ")
-    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
-    print(io, ", 0, starpu_cuda_get_local_stream()")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, ">>> (")
-
-    for i in (1 : length(expr.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + 2 * starpu_indent_size + 1)
-            end
-        end
-
-        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
-
-    end
-
-    print(io, ");")
-    print_newline(io, indent)
-
-end
-
-
-function apply(func :: Function, expr :: StarpuExprCudaCall)
-
-    nblocks = func(expr.nblocks)
-    threads_per_block = func(expr.threads_per_block)
-    args = map((x -> apply(func, x)), expr.args)
-
-    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
-end

+ 0 - 44
julia/src/Compiler/Expressions/field.jl

@@ -1,44 +0,0 @@
-
-
-#======================================================
-                STRUCTURE FIELDS
-======================================================#
-
-
-
-struct StarpuExprField <: StarpuExpr
-
-    left :: StarpuExpr
-    field :: Symbol
-
-    is_an_arrow :: Bool
-end
-
-
-function starpu_parse_field(x :: Expr)
-
-    if x.head != :(.) || length(x.args) != 2
-        error("Invalid parsing of dot expression")
-    end
-
-    left = starpu_parse(x.args[1])
-
-    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
-        error("Invalid parsing of dot expression")
-    end
-
-    return StarpuExprField(left, x.args[2].value, false)
-end
-
-
-function print(io :: IO, x :: StarpuExprField ; indent = 0)
-    print(io, "(")
-    print(io, x.left, indent = indent)
-    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprField)
-    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
-end

+ 0 - 100
julia/src/Compiler/Expressions/for.jl

@@ -1,100 +0,0 @@
-
-#======================================================
-                FOR LOOPS
-======================================================#
-
-
-struct StarpuExprFor <: StarpuExpr
-
-    iter :: Symbol
-    set:: StarpuExprInterval
-    body :: StarpuExpr
-
-    is_independant :: Bool
-    set_declarations :: Vector{StarpuExpr}
-
-end
-
-
-
-function starpu_parse_for(x :: Expr; is_independant = false)
-
-    if (x.head != :for)
-        error("Invalid \"for\" expression")
-    end
-
-    affect = x.args[1]
-
-    if (affect.head != :(=))
-        error("Invalid \"for\" iterator affectation")
-    end
-
-    iter = starpu_parse(affect.args[1])
-
-    if (!isa(iter, StarpuExprVar))
-        error("Invalid \"for\" iterator")
-    end
-
-    set = starpu_parse(affect.args[2])
-
-    if (!isa(set, StarpuExprInterval))
-        error("Set of values in \"for\" loop must be an interval")
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
-end
-
-
-
-
-
-function print(io :: IO, x :: StarpuExprFor ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
-
-    id = x.set.id
-
-    start = "start_" * id
-    stop = "stop_" * id
-    step = "step_" * id
-    dim = "dim_" * id
-    iter = "iter_" * id
-
-    print_newline(io, indent, 2)
-
-    if isa(x.set.step, StarpuExprValue)
-        print(io, "for ($(x.iter) = $start ; ")
-        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
-        print(io, "$(x.iter) $comparison_op $stop ; ")
-        print(io, "$(x.iter) += $(x.set.step.value))")
-
-    else
-        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
-        print(io, "$iter < $dim ; ")
-        print(io, "$iter += 1, $(x.iter) += $step)")
-
-    end
-
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFor)
-
-    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
-    set = apply(func, expr.set)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
-end

+ 0 - 85
julia/src/Compiler/Expressions/function.jl

@@ -1,85 +0,0 @@
-
-
-#======================================================
-                FUNCTION DECLARATION
-======================================================#
-
-
-struct StarpuExprFunction <: StarpuExpr
-    ret_type :: Type
-    func :: Symbol
-    args :: Vector{StarpuExprTypedVar}
-    body :: StarpuExpr
-end
-
-
-function starpu_parse_function(x :: Expr)
-
-    if (x.head != :function)
-        error("Invalid \"function\" expression")
-    end
-
-    typed_decl = starpu_parse(x.args[1])
-
-    if (!isa(typed_decl, StarpuExprTypedExpr))
-        error("Invalid \"function\" prototype : a return type must me explicited")
-    end
-
-    prototype = typed_decl.expr
-
-    if (!isa(prototype, StarpuExprCall))
-        error("Invalid \"function\" prototype")
-    end
-
-    arg_list = StarpuExprTypedVar[]
-
-    for type_arg in prototype.args
-        if (!isa(type_arg, StarpuExprTypedVar))
-            error("Invalid \"function\" argument list")
-        end
-        push!(arg_list, type_arg)
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
-end
-
-
-
-function print(io :: IO, x :: StarpuExprFunction ; indent = 0)
-
-    print(io, starpu_type_traduction(x.ret_type), " ")
-    print(io, x.func, '(')
-
-    for i in (1 : length(x.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
-            end
-        end
-
-        print(io, x.args[i], indent = indent + starpu_indent_size)
-    end
-
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}\n\n")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFunction)
-
-    args = map((x -> apply(func, x)), expr.args)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
-end

+ 0 - 94
julia/src/Compiler/Expressions/if.jl

@@ -1,94 +0,0 @@
-
-
-#======================================================
-                IF STATEMENT
-======================================================#
-
-
-
-struct StarpuExprIf <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-end
-
-
-struct StarpuExprIfElse <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-    else_statement :: StarpuExpr
-end
-
-
-function starpu_parse_if(x :: Expr)
-
-    if (x.head != :if)
-        error("Invalid \"if\" expression")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"if\" statement")
-    end
-
-    cond = starpu_parse(x.args[1])
-    then_statement = starpu_parse(x.args[2])
-
-    if (len == 2)
-        return StarpuExprIf(cond, then_statement)
-    end
-
-    else_statement = starpu_parse(x.args[3])
-
-    return StarpuExprIfElse(cond, then_statement, else_statement)
-end
-
-
-function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0)
-
-    print_newline(io, indent)
-    print(io, "if (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.then_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-
-    if (!isa(x, StarpuExprIfElse))
-        return
-    end
-
-    print(io, " else")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.else_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIf)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-
-    return func(StarpuExprIf(cond, then_statement))
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIfElse)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-    else_statement = apply(func, expr.else_statement)
-
-    return func(StarpuExprIfElse(cond, then_statement, else_statement))
-end

+ 0 - 48
julia/src/Compiler/Expressions/interval.jl

@@ -1,48 +0,0 @@
-
-#======================================================
-                INTERVALS
-======================================================#
-
-
-struct StarpuExprInterval <: StarpuExpr
-    start :: StarpuExpr
-    step :: StarpuExpr
-    stop :: StarpuExpr
-
-    id :: String
-
-    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
-        return new(start, step, stop, id)
-    end
-
-end
-
-
-function starpu_parse_interval(x :: Expr)
-
-    if (x.head != :(:))
-        error("Invalid \"interval\" expression")
-    end
-
-    start = starpu_parse(x.args[1])
-    steop = starpu_parse(x.args[2])
-
-    if (length(x.args) == 2)
-        return StarpuExprInterval(start, StarpuExprValue(1), steop)
-    end
-
-    stop = starpu_parse(x.args[3])
-
-    return StarpuExprInterval(start, steop, stop)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprInterval)
-
-    start = apply(func, expr.start)
-    step = apply(func, expr.step)
-    stop = apply(func, expr.stop)
-
-    return func(StarpuExprInterval(start, step, stop, id = expr.id))
-end

+ 0 - 70
julia/src/Compiler/Expressions/ref.jl

@@ -1,70 +0,0 @@
-
-#======================================================
-                ARRAYS AND REFERENCES
-======================================================#
-
-
-struct StarpuExprRef <: StarpuExpr
-    ref :: StarpuExpr
-    indexes :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_ref(x :: Expr)
-
-    if (x.head != :ref)
-        error("Invalid \"reference\" expression")
-    end
-
-    ref = starpu_parse(x.args[1])
-    indexes = map(starpu_parse, x.args[2:end])
-
-    #=
-    StarpuExpr[]
-
-    for i in (2 : length(x.args))
-        push!(indexes, starpu_parse(x.args[i]))
-    end=#
-
-    return StarpuExprRef(ref, indexes)
-end
-
-
-
-function equals(x :: StarpuExprRef, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprRef
-        return false
-    end
-
-    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
-        return false
-    end
-
-    return all(map(equals, x.indexes, y.indexes))
-end
-
-
-
-
-function print(io :: IO, x :: StarpuExprRef ; indent = 0)
-
-    print(io, x.ref, indent = indent)
-
-    for i in (1 : length(x.indexes))
-        print(io, "[")
-        print(io, x.indexes[i], indent = indent)
-        print(io, "]")
-    end
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprRef)
-
-    ref = apply(func, expr.ref)
-    indexes = map((x -> apply(func, x)), expr.indexes)
-
-    return func(StarpuExprRef(ref, indexes))
-end

+ 0 - 33
julia/src/Compiler/Expressions/return.jl

@@ -1,33 +0,0 @@
-
-#======================================================
-                RETURN EXPRESSION
-======================================================#
-
-
-struct StarpuExprReturn <: StarpuExpr
-    value :: StarpuExpr
-end
-
-function starpu_parse_return(x :: Expr)
-
-    if (x.head != :return)
-        error("Invalid \"return\" expression")
-    end
-
-    value = starpu_parse(x.args[1])
-
-    return StarpuExprReturn(value)
-end
-
-
-function print(io :: IO, x :: StarpuExprReturn ; indent = 0)
-    print(io, "return ")
-    print(io, x.value, indent = indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprReturn)
-
-    return func(StarpuExprReturn(apply(func, expr.value)))
-end

+ 0 - 63
julia/src/Compiler/Expressions/simple_expressions.jl

@@ -1,63 +0,0 @@
-
-
-abstract type StarpuExpr end
-
-
-function apply(func :: Function, expr :: StarpuExpr)
-    return func(expr)
-end
-
-
-
-
-struct StarpuExprVar <: StarpuExpr
-    name :: Symbol
-end
-
-print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
-
-
-
-struct StarpuExprValue <: StarpuExpr
-    value :: Any
-end
-
-
-function print(io :: IO, x :: StarpuExprValue ; indent = 0)
-
-    value = x.value
-
-    if value == nothing
-        return
-    end
-
-    if isa(value, AbstractString)
-        print(io, '"', value, '"')
-        return
-    end
-
-    if isa(value, Char)
-        print(io, '\'', value, '\'')
-        return
-    end
-
-    print(io, value)
-end
-
-
-
-
-struct StarpuExprInvalid <: StarpuExpr
-end
-
-print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
-
-
-
-function starpu_parse(raw_value :: Any)
-    return StarpuExprValue(raw_value)
-end
-
-function starpu_parse(sym :: Symbol)
-    return StarpuExprVar(sym)
-end

+ 0 - 109
julia/src/Compiler/Expressions/typed.jl

@@ -1,109 +0,0 @@
-
-#======================================================
-                TYPED EXPRESSION
-======================================================#
-
-
-
-
-
-
-
-abstract type StarpuExprTyped <: StarpuExpr end
-
-
-struct StarpuExprTypedVar <: StarpuExprTyped
-    name :: Symbol
-    typ :: Type
-end
-
-struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
-    expr :: StarpuExpr
-    typ :: Type
-end
-
-
-function starpu_parse_typed(x :: Expr)
-
-    if (x.head != :(::))
-        error("Invalid type assigned expression")
-    end
-
-    expr = starpu_parse(x.args[1])
-    typ = nothing
-
-    try
-        typ = eval(x.args[2]) :: Type
-    catch
-        error("Invalid type in type assigned expression")
-    end
-
-    if (isa(expr, StarpuExprVar))
-        return StarpuExprTypedVar(expr.name, typ)
-    end
-
-    return StarpuExprTypedExpr(expr, typ)
-end
-
-
-
-
-
-starpu_type_traduction_dict = Dict(
-    Void => "void",
-    Int32 => "int32_t",
-    UInt32 => "uint32_t",
-    Float32 => "float",
-    Int64 => "int64_t",
-    UInt64 => "uint64_t",
-    Float64 => "double"
-)
-
-
-
-function starpu_type_traduction(x)
-
-    if x <: Array
-        return starpu_type_traduction_array(x)
-    end
-
-    if x <: Ptr
-        return starpu_type_traduction(eltype(x)) * "*"
-    end
-
-    return starpu_type_traduction_dict[x]
-
-end
-
-
-function starpu_type_traduction_array(x :: Type{Array{T,N}}) where {T,N}
-
-    output = starpu_type_traduction(T)
-
-    for i in (1 : N)
-        output *= "*"
-    end
-
-    return output
-end
-
-
-
-function print(io :: IO, x :: StarpuExprTyped ; indent = 0)
-
-    if (isa(x, StarpuExprTypedVar))
-        print(io, starpu_type_traduction(x.typ), " ")
-        print(io, x.name)
-    else
-        print(io, x.expr, indent = indent)
-    end
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprTypedExpr)
-
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprTypedExpr(new_expr, expr.typ))
-end

+ 0 - 53
julia/src/Compiler/Expressions/while.jl

@@ -1,53 +0,0 @@
-
-#======================================================
-                While loop
-======================================================#
-
-
-struct StarpuExprWhile <: StarpuExpr
-    cond :: StarpuExpr
-    body :: StarpuExpr
-end
-
-function starpu_parse_while(x :: Expr)
-
-    if (x.head != :while)
-        error("Invalid \"while\" loop")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"while\" loop")
-    end
-
-    cond = starpu_parse(x.args[1])
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprWhile(cond, body)
-end
-
-
-function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
-    print_newline(io, indent)
-    print(io, "while (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprWhile)
-
-    cond = apply(func, expr.cond)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprWhile(cond, body))
-end

+ 0 - 69
julia/src/Compiler/Generate_files/c_files.jl

@@ -1,69 +0,0 @@
-
-
-
-global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-const cpu_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-"
-
-
-"""
-	Opens a new C source file, where generated CPU kernels will be written
-"""
-function starpu_new_cpu_kernel_file(file_name :: String)
-
-    global generated_cpu_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cpu_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU C compiler to the following function declaration.
-	If no call to starpu_new_cpu_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cpu_kernel_file was called.
-"""
-macro cpu_kernel(x)
-
-    starpu_expr = transform_to_cpu_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $starpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end

+ 0 - 134
julia/src/Compiler/Generate_files/cuda_files.jl

@@ -1,134 +0,0 @@
-
-
-
-global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-const cuda_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-#define THREADS_PER_BLOCK 64
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-
-__device__ static inline long long jlstarpu_max__device(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
-{
-	if (stop >= start){
-		return jlstarpu_max__device(0, (stop - start + 1) / step);
-	} else {
-		return jlstarpu_max__device(0, (stop - start - 1) / step);
-	}
-}
-
-
-"
-
-"""
-	Opens a new Cuda source file, where generated GPU kernels will be written
-"""
-function starpu_new_cuda_kernel_file(file_name :: String)
-
-    global generated_cuda_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cuda_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU Cuda compiler to the following function declaration.
-	If no call to starpu_new_cuda_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cuda_kernel_file was called.
-"""
-macro cuda_kernel(x)
-
-    prekernel, kernel = transform_to_cuda_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end
-
-
-
-"""
-	Executes @cuda_kernel and @cpu_kernel
-"""
-macro cpu_cuda_kernel(x)
-
-	parsed = starpu_parse(x)
-	cpu_expr = transform_to_cpu_kernel(parsed)
-	prekernel, kernel = transform_to_cuda_kernel(parsed)
-
-	return quote
-
-		to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $cpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-
-
-		to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-	end
-end

+ 0 - 54
julia/src/Compiler/Generate_files/so_files.jl

@@ -1,54 +0,0 @@
-
-
-"""
-	Compiles C source file opened by starpu_new_cpu_kernel_file
-    and filled by @cpu_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cpu_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = "-O3 -shared -fPIC"
-
-    system("gcc $generated_cpu_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-	Compiles Cuda source file opened by starpu_new_cuda_kernel_file
-    and filled by @cuda_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cuda_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = " -O3 --shared --compiler-options \'-fPIC\' "
-
-    system("nvcc $generated_cuda_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-    Combines several shared library into a new one.
-    Can be used to have both CPU and Cuda kernels (from compile_cpu_kernels
-    compile_cuda_kernels) accessible from the same library.
-"""
-function combine_kernel_files(output_file :: String, input_files :: Vector{String})
-
-    input_str = (*)(map((x -> x * " "), input_files)...)
-
-    system("gcc -shared -fPIC $input_str -o $output_file")
-
-end

+ 0 - 39
julia/src/Compiler/include.jl

@@ -1,39 +0,0 @@
-
-
-
-include("utils.jl")
-
-include("Expressions/simple_expressions.jl")
-include("Expressions/affect.jl")
-include("Expressions/block.jl")
-include("Expressions/call.jl")
-include("Expressions/cuda_call.jl")
-include("Expressions/field.jl")
-include("Expressions/interval.jl")
-include("Expressions/for.jl")
-include("Expressions/typed.jl")
-include("Expressions/function.jl")
-include("Expressions/if.jl")
-include("Expressions/ref.jl")
-include("Expressions/return.jl")
-include("Expressions/while.jl")
-
-include("parsing.jl")
-
-include("expression_manipulation.jl")
-
-include("C/substitute_args.jl")
-include("C/substitute_func_calls.jl")
-include("C/substitute_indexing.jl")
-include("C/add_for_loop_declarations.jl")
-include("C/flatten_blocks.jl")
-include("C/create_cpu_kernel.jl")
-
-include("Cuda/indep_for.jl")
-include("Cuda/indep_for_kernel_ids.jl")
-include("Cuda/create_cuda_kernel.jl")
-
-
-include("Generate_files/c_files.jl")
-include("Generate_files/cuda_files.jl")
-include("Generate_files/so_files.jl")

+ 0 - 146
julia/src/Wrapper/Julia/starpu_codelet.jl

@@ -1,146 +0,0 @@
-
-
-export StarpuDataAccessMode
-export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH
-export STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY
-export STARPU_ACCESS_MODE_MAX
-
-@enum(StarpuDataAccessMode,
-
-    STARPU_NONE = 0,
-    STARPU_R = (1 << 0),
-    STARPU_W = (1 << 1),
-    STARPU_RW = ((1 << 0) | (1 << 1)),
-    STARPU_SCRATCH = (1 << 2),
-    STARPU_REDUX = (1 << 3),
-    STARPU_COMMUTE = (1 << 4),
-    STARPU_SSEND = (1 << 5),
-    STARPU_LOCALITY = (1 << 6),
-    STARPU_ACCESS_MODE_MAX = (1 << 7)
-
-)
-
-
-export StarpuCodelet
-struct StarpuCodelet
-
-    where_to_execute :: UInt32
-
-    cpu_func :: String
-    gpu_func :: String
-
-    modes :: Vector{StarpuDataAccessMode}
-
-    perfmodel :: StarpuPerfmodel
-
-    c_codelet :: Ptr{Void}
-
-
-    function StarpuCodelet(;
-        cpu_func :: String = "",
-        gpu_func :: String = "",
-        modes :: Vector{StarpuDataAccessMode} = StarpuDataAccessMode[],
-        perfmodel :: StarpuPerfmodel = StarpuPerfmodel(),
-        where_to_execute :: Union{Void, UInt32} = nothing
-    )
-
-        if (length(modes) > STARPU_NMAXBUFS)
-            error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)")
-        end
-
-        real_c_codelet_ptr = @starpucall jlstarpu_new_codelet Ptr{Void} ()
-        push!(jlstarpu_allocated_structures, real_c_codelet_ptr)
-
-        if (where_to_execute == nothing)
-            real_where = ((cpu_func != "") * STARPU_CPU) | ((gpu_func != "") * STARPU_CUDA)
-        else
-            real_where = where_to_execute
-        end
-
-        output = new(real_where, cpu_func, gpu_func, modes, perfmodel, real_c_codelet_ptr)
-
-        starpu_c_codelet_update(output)
-
-        return output
-    end
-end
-
-
-
-function starpu_c_codelet_update(cl :: StarpuCodelet)
-
-    translating_cl = StarpuCodeletTranslator(cl)
-
-    @starpucall(jlstarpu_codelet_update,
-                Void, (Ptr{Void}, Ptr{Void}),
-                Ref{StarpuCodeletTranslator}(translating_cl),
-                cl.c_codelet
-            )
-end
-
-
-
-function load_starpu_function_pointer(func_name :: String)
-
-    if (isempty(func_name))
-        return C_NULL
-    end
-
-    func_pointer = Libdl.dlsym(starpu_tasks_library_handle, func_name)
-
-    if (func_pointer == C_NULL)
-        error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library")
-    end
-
-    return func_pointer
-end
-
-
-
-mutable struct StarpuCodeletTranslator
-
-    where_to_execute :: UInt32
-
-    cpu_func :: Ptr{Void}
-    cpu_func_name :: Cstring
-
-    gpu_func :: Ptr{Void}
-
-    nbuffers :: Cint
-    modes :: Ptr{Void}
-
-    perfmodel :: Ptr{Void}
-
-
-
-    function StarpuCodeletTranslator(cl :: StarpuCodelet)
-
-        output = new()
-
-        if (iszero(cl.where_to_execute))
-            error("StarpuCodelet field \"where_to_execute\" is empty")
-        end
-
-        output.where_to_execute = cl.where_to_execute
-
-        cpu_func_ptr = load_starpu_function_pointer(cl.cpu_func)
-        gpu_func_ptr = load_starpu_function_pointer(cl.gpu_func)
-
-        if (cpu_func_ptr == C_NULL && gpu_func_ptr == C_NULL)
-            error("No function specified inside codelet")
-        end
-
-        output.cpu_func = cpu_func_ptr
-        output.cpu_func_name = Cstring_from_String(cl.cpu_func)
-
-        output.gpu_func = gpu_func_ptr
-
-        output.nbuffers = Cint(length(cl.modes))
-        output.modes = pointer(cl.modes)
-
-        output.perfmodel = cl.perfmodel.c_perfmodel
-
-        return output
-    end
-
-end

+ 0 - 234
julia/src/Wrapper/Julia/starpu_data_handle.jl

@@ -1,234 +0,0 @@
-
-
-STARPU_MAIN_RAM = 0 #TODO: ENUM
-
-
-const StarpuDataHandlePointer = Ptr{Void}
-
-
-
-StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer}
-
-
-
-function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle
-    return StarpuDestructible(ptr, destr...)
-end
-
-
-
-function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unregister, Void, (Ptr{Void},), ptr)
-end
-
-
-export starpu_data_unregister
-function starpu_data_unregister(handles :: StarpuDataHandle...)
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unregister_pointer)
-    end
-end
-
-
-
-export starpu_data_register
-
-function starpu_data_register(v :: Vector{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(v)
-
-    @starpucall(starpu_vector_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                length(v), sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(m :: Matrix{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(m)
-    (height, width) = size(m)
-
-    @starpucall(starpu_matrix_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height, width, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(block :: Array{T,3}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(block)
-    (height, width, depth) = size(block)
-
-    @starpucall(starpu_block_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, UInt32,
-                    UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height * width,
-                height, width, depth,
-                sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(ref :: Ref{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-
-    @starpucall(starpu_variable_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, Csize_t),
-                output, STARPU_MAIN_RAM, ref, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(x1, x2, next_args...)
-
-    handle_1 = starpu_data_register(x1)
-    handle_2 = starpu_data_register(x2)
-
-    next_handles = map(starpu_data_register, next_args)
-
-    return [handle_1, handle_2, next_handles...]
-end
-
-
-
-
-export starpu_data_get_sub_data
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, id)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint),
-                        root_data.object, 1, id - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, idx, idy)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint, Cuint),
-                        root_data.object, 2, idx - 1, idy - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-import Base.getindex
-
-
-
-function Base.getindex(handle :: StarpuDataHandle, indexes...)
-     starpu_data_get_sub_data(handle, indexes...)
- end
-
-
-
-
-export StarpuDataFilterFunc
-export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK
-
-@enum(StarpuDataFilterFunc,
-
-    STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    STARPU_MATRIX_FILTER_BLOCK = 1
-)
-
-export StarpuDataFilter
-"""
-    TODO : use real function pointers loaded from starpu shared library
-"""
-mutable struct StarpuDataFilter
-
-    filter_func :: StarpuDataFilterFunc
-    nchildren :: Cuint
-
-    function StarpuDataFilter(filter_func, nchildren)
-        output = new()
-        output.filter_func = filter_func
-        output.nchildren = Cuint(nchildren)
-        return output
-    end
-
-end
-
-
-function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unpartition, Void, (Ptr{Void}, Cuint), ptr, STARPU_MAIN_RAM)
-end
-
-export starpu_data_partition
-function starpu_data_partition(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_partition,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-        )
-end
-
-
-export starpu_data_unpartition
-function starpu_data_unpartition(handles :: StarpuDataHandle...)
-
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unpartition_pointer)
-    end
-
-    return nothing
-end
-
-
-
-export starpu_data_map_filters
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_1_arg,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-    )
-end
-
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: StarpuDataFilter, filter_2 :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_2_arg,
-            Void, (Ptr{Void}, Ptr{Void}, Ptr{Void}),
-            handle.object,
-            Ref{StarpuDataFilter}(filter_1),
-            Ref{StarpuDataFilter}(filter_2)
-    )
-
-end

+ 0 - 49
julia/src/Wrapper/Julia/starpu_define.jl

@@ -1,49 +0,0 @@
-
-
-
-
-STARPU_MAXIMPLEMENTATIONS = 1 # TODO : These must be the same values as defined in C macros !
-STARPU_NMAXBUFS = 8 # TODO : find a way to make it automatically match
-
-
-STARPU_CPU = 1 << 1
-STARPU_CUDA = 1 << 3
-
-macro starpufunc(symbol)
-    :($symbol, "libjlstarpu_c_wrapper")
-end
-
-"""
-    Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so"
-    Works as ccall function
-"""
-macro starpucall(func, ret_type, arg_types, args...)
-    return Expr(:call, :ccall, (func, "libjlstarpu_c_wrapper"), esc(ret_type), esc(arg_types), map(esc, args)...)
-end
-
-
-export @debugprint
-macro debugprint(x...)
-
-    expr = Expr(:call, :println, "\x1b[32m", map(esc, x)..., "\x1b[0m")
-
-    quote
-        $expr
-        flush(STDOUT)
-    end
-end
-
-
-
-function Cstring_from_String(str :: String)
-    return Cstring(pointer(str))
-end
-
-
-
-function jlstarpu_set_to_zero(x :: T) :: Ptr{Void} where {T}
-    @starpucall(memset,
-          Ptr{Void}, (Ptr{Void}, Cint, Csize_t),
-          Ref{T}(x), 0, sizeof(x)
-        )
-end

+ 0 - 125
julia/src/Wrapper/Julia/starpu_destructible.jl

@@ -1,125 +0,0 @@
-
-
-
-"""
-    Object used to store a lost of function which must
-    be applied to and object
-"""
-mutable struct StarpuDestructible{T}
-
-    object :: T
-    destructors :: LinkedList{Function}
-
-end
-
-starpu_block_list = Vector{LinkedList{StarpuDestructible}}()
-
-
-
-function StarpuDestructible(obj :: T, destructors :: Function...) where T
-
-    if (isempty(starpu_block_list))
-        error("Creation of a StarpuDestructible object while not beeing in a @starpu_block")
-    end
-
-    l = LinkedList{Function}()
-
-    for destr in destructors
-        add_to_tail!(l, destr)
-    end
-
-    output = StarpuDestructible{T}(obj, l)
-    add_to_head!(starpu_block_list[end], output)
-
-    return output
-end
-
-
-
-
-function starpu_enter_new_block()
-
-    push!(starpu_block_list, LinkedList{StarpuDestructible}())
-end
-
-"""
-    Applies every stored destructores to the StarpuDestructible stored object
-"""
-function starpu_destruct!(x :: StarpuDestructible)
-
-    for destr in x.destructors
-        destr(x.object)
-    end
-
-    empty!(x.destructors)
-
-    return nothing
-end
-
-
-function starpu_exit_block()
-
-    destr_list = pop!(starpu_block_list)
-
-    for x in destr_list
-        starpu_destruct!(x)
-    end
-
-end
-
-"""
-    Adds new destructors to the list of function. They will be executed before
-        already stored ones when calling starpu_destruct!
-"""
-function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...)
-
-    for d in destrs
-        add_to_head!(x.destructors, d)
-    end
-
-    return nothing
-end
-
-"""
-    Removes detsructor without executing it
-"""
-function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    @foreach_asc x.destructors lnk begin
-
-        if (lnk.data == destr)
-            remove_link!(lnk)
-            break
-        end
-    end
-
-    return nothing
-end
-
-
-"""
-    Executes "destr" function. If it was one of the stored destructors, it
-    is removed.
-    This function can be used to allow user to execute a specific action manually
-        (ex : explicit call to starpu_data_unpartition() without unregistering)
-"""
-function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    starpu_remove_destructor!(x, destr)
-    return destr(x.object)
-end
-
-
-export @starpu_block
-
-"""
-    Declares a block of code. Every declared StarpuDestructible in this code
-    will execute its destructors on its object, once the block is exited
-"""
-macro starpu_block(expr)
-    quote
-        starpu_enter_new_block()
-        $(esc(expr))
-        starpu_exit_block()
-    end
-end

+ 0 - 20
julia/src/Wrapper/Julia/starpu_include.jl

@@ -1,20 +0,0 @@
-
-__precompile__()
-module StarPU
-
-
-    include("starpu_define.jl")
-    include("static_structures.jl")
-    include("starpu_simple_functions.jl")
-    include("starpu_perfmodel.jl")
-    include("starpu_codelet.jl")
-
-    include("linked_list.jl")
-    include("starpu_destructible.jl")
-    include("starpu_data_handle.jl")
-
-    include("starpu_task.jl")
-    include("starpu_task_submit.jl")
-    include("starpu_init_shutdown.jl")
-
-end

+ 0 - 0
julia/src/Wrapper/Julia/starpu_init_shutdown.jl


Vissa filer visades inte eftersom för många filer har ändrats