Browse Source

Merge branch 'fpga' of gitlab.inria.fr:starpu/starpu into fpga

Samuel Thibault 4 years ago
parent
commit
0465129baa
100 changed files with 4541 additions and 1304 deletions
  1. 4 0
      Makefile.am
  2. 144 62
      configure.ac
  3. 2 2
      contrib/ci.inria.fr/disabled/Jenkinsfile-basic
  4. 1 1
      contrib/ci.inria.fr/disabled/Jenkinsfile-windows
  5. 2 2
      contrib/ci.inria.fr/job-1-check-windows.bat
  6. 1 0
      contrib/ci.inria.fr/job-1-check.sh
  7. 275 68
      doc/doxygen/chapters/400_python.doxy
  8. 1 1
      doc/doxygen/chapters/470_simgrid.doxy
  9. BIN
      doc/doxygen/chapters/images/starpu_log.png
  10. 1854 0
      doc/doxygen/chapters/images/starpu_log_arr.eps
  11. BIN
      doc/doxygen/chapters/images/starpu_log_arr.png
  12. 339 316
      doc/doxygen/chapters/images/starpu_log.eps
  13. BIN
      doc/doxygen/chapters/images/starpu_log_list.png
  14. 1 1
      doc/doxygen/refman.tex
  15. 0 1
      doc/doxygen_dev/refman.tex
  16. 2 2
      examples/Makefile.am
  17. 2 2
      examples/cpp/add_vectors_interface.cpp
  18. 2 1
      examples/tag_example/tag_example.c
  19. 3 3
      include/fstarpu_mod.f90
  20. 2 0
      include/starpu_config.h.in
  21. 2 3
      include/starpu_scheduler.h
  22. 1 1
      libstarpu-mic.pc.in
  23. 1 1
      libstarpu.pc.in
  24. 12 0
      m4/libs.m4
  25. 3 0
      mpi/examples/Makefile.am
  26. 2 2
      mpi/examples/native_fortran/nf_mm_task_build.f90
  27. 0 1
      mpi/src/mpi/starpu_mpi_mpi.c
  28. 0 1
      mpi/src/starpu_mpi.c
  29. 3 0
      mpi/tests/Makefile.am
  30. 1 1
      socl/src/init.c
  31. 5 3
      src/Makefile.am
  32. 4 2
      src/common/rbtree_i.h
  33. 14 2
      src/common/thread.c
  34. 2 0
      src/common/utils.h
  35. 2 1
      src/core/dependencies/cg.c
  36. 15 20
      src/core/jobs.c
  37. 2 4
      src/core/jobs.h
  38. 15 12
      src/core/perfmodel/energy_model.c
  39. 4 3
      src/core/perfmodel/perfmodel_history.c
  40. 1 1
      src/core/perfmodel/perfmodel_print.c
  41. 7 14
      src/core/sched_policy.c
  42. 63 10
      src/core/simgrid.c
  43. 3 0
      src/core/simgrid.h
  44. 1 1
      src/core/task.c
  45. 3 3
      src/core/topology.c
  46. 5 4
      src/core/workers.c
  47. 5 5
      src/core/workers.h
  48. 4 2
      src/datawizard/memalloc.c
  49. 2 1
      src/datawizard/memory_nodes.c
  50. 5 3
      src/debug/traces/starpu_fxt.c
  51. 4 2
      src/drivers/cpu/driver_cpu.c
  52. 4 2
      src/drivers/cuda/driver_cuda_init.c
  53. 2 1
      src/drivers/disk/driver_disk.c
  54. 4 2
      src/drivers/mic/driver_mic_init.c
  55. 4 2
      src/drivers/mpi/driver_mpi_init.c
  56. 4 2
      src/drivers/opencl/driver_opencl_init.c
  57. 2 0
      src/profiling/profiling.c
  58. 6 3
      src/sched_policies/component_heteroprio.c
  59. 4 4
      src/sched_policies/component_worker.c
  60. 5 0
      src/sched_policies/helper_mct.c
  61. 2 1
      src/sched_policies/work_stealing_policy.c
  62. 6 4
      src/util/starpu_data_cpy.c
  63. 1 1
      starpu-1.0-mic.pc.in
  64. 1 1
      starpu-1.0.pc.in
  65. 1 1
      starpu-1.1.pc.in
  66. 1 1
      starpu-1.2.pc.in
  67. 1 1
      starpu-1.3.pc.in
  68. 2 2
      starpufft/src/starpufft-double.h
  69. 2 2
      starpufft/src/starpufft-float.h
  70. 2 2
      starpufft/src/starpufftx.c
  71. 21 0
      starpupy/Makefile.am
  72. 43 0
      starpupy/examples/Makefile.am
  73. 59 0
      starpupy/examples/execute.sh.in
  74. 9 9
      starpupy/tests/starpu_py.py
  75. 19 0
      starpupy/examples/starpu_py.sh
  76. 40 0
      starpupy/examples/starpu_py_np.py
  77. 3 7
      starpupy/src/starpu/delay.py
  78. 350 0
      starpupy/examples/starpu_py_parallel.py
  79. 19 0
      starpupy/examples/starpu_py_parallel.sh
  80. 63 0
      starpupy/src/Makefile.am
  81. 2 1
      starpupy/src/starpu/__init__.py
  82. 29 0
      starpupy/src/delay.py
  83. 63 0
      starpupy/src/intermedia.py
  84. 324 0
      starpupy/src/joblib.py
  85. 23 0
      starpupy/src/setup.cfg.in
  86. 40 0
      starpupy/src/setup.py.in
  87. 0 13
      starpupy/src/starpu/Makefile
  88. 0 147
      starpupy/src/starpu/joblib.py
  89. 0 416
      starpupy/src/starpu/starpu_task_wrapper.c
  90. 536 0
      starpupy/src/starpu_task_wrapper.c
  91. 0 6
      starpupy/tests/Makefile
  92. 0 101
      starpupy/tests/starpu_py_parallel.py
  93. 2 2
      tests/Makefile.am
  94. 2 1
      tests/datawizard/bcsr.c
  95. 1 2
      tests/datawizard/interfaces/test_interfaces.c
  96. 1 0
      tests/main/starpu_worker_exists.c
  97. 2 1
      tests/microbenchs/bandwidth.c
  98. 2 0
      tests/microbenchs/tasks_size_overhead.c
  99. 8 1
      tests/perfmodels/regression_based_memset.c
  100. 0 0
      tools/dev/checker/starpu_check_copyright.sh

+ 4 - 0
Makefile.am

@@ -53,6 +53,10 @@ if STARPU_BUILD_STARPURM
 SUBDIRS += starpurm
 SUBDIRS += starpurm
 endif
 endif
 
 
+if STARPU_BUILD_STARPUPY
+SUBDIRS += starpupy
+endif
+
 if STARPU_BUILD_SC_HYPERVISOR
 if STARPU_BUILD_SC_HYPERVISOR
 SUBDIRS += sc_hypervisor
 SUBDIRS += sc_hypervisor
 endif
 endif

File diff suppressed because it is too large
+ 144 - 62
configure.ac


+ 2 - 2
contrib/ci.inria.fr/disabled/Jenkinsfile-basic

@@ -34,7 +34,7 @@ pipeline
 		{
 		{
 			steps
 			steps
 			{
 			{
-				node('autotools')
+				node('autotools2')
 				{
 				{
 					checkout scm
 					checkout scm
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
@@ -62,7 +62,7 @@ pipeline
 			{
 			{
 				script
 				script
 				{
 				{
-					labelToSelect = 'unix'
+					labelToSelect = 'unix2'
 					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
 					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
 					{
 					{
 						node -> node.getLabelString().contains(labelToSelect) ? node.name : null
 						node -> node.getLabelString().contains(labelToSelect) ? node.name : null

+ 1 - 1
contrib/ci.inria.fr/disabled/Jenkinsfile-windows

@@ -34,7 +34,7 @@ pipeline
 		{
 		{
 			steps
 			steps
 			{
 			{
-				node('autotools')
+				node('autotools2')
 				{
 				{
 					checkout scm
 					checkout scm
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'

+ 2 - 2
contrib/ci.inria.fr/job-1-check-windows.bat

@@ -14,9 +14,9 @@ REM
 REM See the GNU Lesser General Public License in COPYING.LGPL for more details.
 REM See the GNU Lesser General Public License in COPYING.LGPL for more details.
 REM
 REM
 
 
-set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin
+set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
 sh -c "./job-1-build-windows.sh"
 sh -c "./job-1-build-windows.sh"
-set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;
+set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
 set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0
 set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0
 
 
 cd starpu_install
 cd starpu_install

+ 1 - 0
contrib/ci.inria.fr/job-1-check.sh

@@ -41,6 +41,7 @@ env > $PWD/env
 
 
 test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
 test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
 tar xfz ../$tarball
 tar xfz ../$tarball
+touch --date="last hour" $(find $basename)
 cd $basename
 cd $basename
 mkdir build
 mkdir build
 cd build
 cd build

File diff suppressed because it is too large
+ 275 - 68
doc/doxygen/chapters/400_python.doxy


+ 1 - 1
doc/doxygen/chapters/470_simgrid.doxy

@@ -23,7 +23,7 @@
 
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
-3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
+3.26. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
 Other versions may have compatibility issues. 3.17 notably does not build at
 Other versions may have compatibility issues. 3.17 notably does not build at
 all. MPI simulation does not work with version 3.22.
 all. MPI simulation does not work with version 3.22.
 
 

BIN
doc/doxygen/chapters/images/starpu_log.png


File diff suppressed because it is too large
+ 1854 - 0
doc/doxygen/chapters/images/starpu_log_arr.eps


BIN
doc/doxygen/chapters/images/starpu_log_arr.png


File diff suppressed because it is too large
+ 339 - 316
doc/doxygen/chapters/images/starpu_log.eps


BIN
doc/doxygen/chapters/images/starpu_log_list.png


+ 1 - 1
doc/doxygen/refman.tex

@@ -138,7 +138,7 @@ Documentation License”.
 
 
 \part{StarPU Extensions}
 \part{StarPU Extensions}
 
 
-\chapter{PythonInterface}
+\chapter{Python Interface}
 \label{PythonInterface}
 \label{PythonInterface}
 \hypertarget{PythonInterface}{}
 \hypertarget{PythonInterface}{}
 \input{PythonInterface}
 \input{PythonInterface}

+ 0 - 1
doc/doxygen_dev/refman.tex

@@ -148,7 +148,6 @@ Documentation License”.
 \input{starpu__data__cpy_8h}
 \input{starpu__data__cpy_8h}
 \input{starpu__debug__helpers_8h}
 \input{starpu__debug__helpers_8h}
 \input{starpu__fxt_8h}
 \input{starpu__fxt_8h}
-\input{starpu__parameters_8h}
 \input{starpu__spinlock_8h}
 \input{starpu__spinlock_8h}
 \input{starpu__task__insert__utils_8h}
 \input{starpu__task__insert__utils_8h}
 \input{tags_8h}
 \input{tags_8h}

+ 2 - 2
examples/Makefile.am

@@ -20,8 +20,8 @@ include $(top_srcdir)/starpu.mk
 
 
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
-AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
-AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 2 - 2
examples/cpp/add_vectors_interface.cpp

@@ -61,9 +61,9 @@ class my_allocator
 		node = a.get_node();
 		node = a.get_node();
 	}
 	}
 
 
-	explicit my_allocator(const unsigned node)
+	explicit my_allocator(const unsigned thenode)
 	{
 	{
-		this->node = node;
+		this->node = thenode;
 	}
 	}
 
 
 	pointer allocate(size_type n, const void * = 0)
 	pointer allocate(size_type n, const void * = 0)

+ 2 - 1
examples/tag_example/tag_example.c

@@ -223,7 +223,8 @@ int main(int argc, char **argv)
 	int ret;
 	int ret;
 
 
 #ifdef STARPU_HAVE_HELGRIND_H
 #ifdef STARPU_HAVE_HELGRIND_H
-	if (RUNNING_ON_VALGRIND) {
+	if (RUNNING_ON_VALGRIND)
+	{
 		ni /= 2;
 		ni /= 2;
 		nj /= 2;
 		nj /= 2;
 		nk /= 2;
 		nk /= 2;

+ 3 - 3
include/fstarpu_mod.f90

@@ -1054,7 +1054,7 @@ module fstarpu_mod
                 end subroutine fstarpu_vector_data_register
                 end subroutine fstarpu_vector_data_register
 
 
                 ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
                 ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
-                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
+                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) &
                                 bind(C,name="starpu_vector_ptr_register")
                                 bind(C,name="starpu_vector_ptr_register")
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         type(c_ptr), intent(out) :: dh
                         type(c_ptr), intent(out) :: dh
@@ -1092,7 +1092,7 @@ module fstarpu_mod
                 end subroutine fstarpu_variable_data_register
                 end subroutine fstarpu_variable_data_register
 
 
                 ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
                 ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
-                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
+                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) &
                                 bind(C,name="starpu_variable_ptr_register")
                                 bind(C,name="starpu_variable_ptr_register")
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         type(c_ptr), intent(out) :: dh
                         type(c_ptr), intent(out) :: dh
@@ -1758,7 +1758,7 @@ module fstarpu_mod
                 end function fstarpu_data_descr_array_alloc
                 end function fstarpu_data_descr_array_alloc
 
 
                 ! struct starpu_data_descr *fstarpu_data_descr_alloc(void);
                 ! struct starpu_data_descr *fstarpu_data_descr_alloc(void);
-                function fstarpu_data_descr_alloc (nb) bind(C)
+                function fstarpu_data_descr_alloc () bind(C)
                         use iso_c_binding, only: c_ptr
                         use iso_c_binding, only: c_ptr
                         type(c_ptr) :: fstarpu_data_descr_alloc
                         type(c_ptr) :: fstarpu_data_descr_alloc
                 end function fstarpu_data_descr_alloc
                 end function fstarpu_data_descr_alloc

+ 2 - 0
include/starpu_config.h.in

@@ -331,4 +331,6 @@ typedef ssize_t starpu_ssize_t;
 #undef STARPU_HAVE_STATEMENT_EXPRESSIONS
 #undef STARPU_HAVE_STATEMENT_EXPRESSIONS
 #undef STARPU_PERF_MODEL_DIR
 #undef STARPU_PERF_MODEL_DIR
 
 
+#undef STARPU_PYTHON_HAVE_NUMPY
+
 #endif
 #endif

+ 2 - 3
include/starpu_scheduler.h

@@ -294,9 +294,8 @@ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_t
 /**
 /**
    The scheduling policy may put tasks directly into a worker’s local
    The scheduling policy may put tasks directly into a worker’s local
    queue so that it is not always necessary to create its own queue
    queue so that it is not always necessary to create its own queue
-   when the local queue is sufficient. If \p back is not 0, \p task is
-   put at the back of the queue where the worker will pop tasks first.
-   Setting \p back to 0 therefore ensures a FIFO ordering.
+   when the local queue is sufficient. \p back is ignored: the task priority is
+   used to order tasks in this queue.
 */
 */
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
 
 

+ 1 - 1
libstarpu-mic.pc.in

@@ -22,6 +22,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
libstarpu.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 12 - 0
m4/libs.m4

@@ -202,3 +202,15 @@ AC_DEFUN([IS_SUPPORTED_FLAG],
 	IS_SUPPORTED_FFLAG($1)
 	IS_SUPPORTED_FFLAG($1)
 	IS_SUPPORTED_FCFLAG($1)
 	IS_SUPPORTED_FCFLAG($1)
 ])
 ])
+
+# AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found])
+# Check if the given python module is available
+AC_DEFUN([AC_PYTHON_MODULE],
+[
+	echo "import $1" | $PYTHON - 2>/dev/null
+	if test $? -ne 0 ; then
+	   	$3
+	else
+		$2
+	fi
+])

+ 3 - 0
mpi/examples/Makefile.am

@@ -108,6 +108,9 @@ endif
 endif
 endif
 
 
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS)

+ 2 - 2
mpi/examples/native_fortran/nf_mm_task_build.f90

@@ -169,7 +169,7 @@ program nf_mm
         do b_col=1,NB
         do b_col=1,NB
            do b_row=1,NB
            do b_row=1,NB
               task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
               task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
-                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_A(b_row), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 C_NULL_PTR /))
                                                 C_NULL_PTR /))
@@ -177,7 +177,7 @@ program nf_mm
                  ret = fstarpu_task_submit(task)
                  ret = fstarpu_task_submit(task)
               endif
               endif
               call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
               call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
-                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_A(b_row), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 C_NULL_PTR /))
                                                 C_NULL_PTR /))

+ 0 - 1
mpi/src/mpi/starpu_mpi_mpi.c

@@ -41,7 +41,6 @@
 #include <core/simgrid.h>
 #include <core/simgrid.h>
 #include <core/task.h>
 #include <core/task.h>
 #include <core/topology.h>
 #include <core/topology.h>
-#include <core/workers.h>
 
 
 #ifdef STARPU_USE_MPI_MPI
 #ifdef STARPU_USE_MPI_MPI
 
 

+ 0 - 1
mpi/src/starpu_mpi.c

@@ -33,7 +33,6 @@
 #include <core/simgrid.h>
 #include <core/simgrid.h>
 #include <core/task.h>
 #include <core/task.h>
 #include <core/topology.h>
 #include <core/topology.h>
-#include <core/workers.h>
 
 
 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
 {
 {

+ 3 - 0
mpi/tests/Makefile.am

@@ -84,6 +84,9 @@ endif
 endif
 endif
 
 
 AM_CFLAGS += -Wno-unused
 AM_CFLAGS += -Wno-unused
+AM_CXXFLAGS += -Wno-unused
+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(STARPU_CUDA_LDFLAGS)
 LIBS += $(STARPU_CUDA_LDFLAGS)

+ 1 - 1
socl/src/init.c

@@ -16,7 +16,7 @@
  */
  */
 
 
 #include <stdlib.h>
 #include <stdlib.h>
-#include "../src/core/workers.h"
+#include "../src/common/utils.h"
 #include "socl.h"
 #include "socl.h"
 #include "gc.h"
 #include "gc.h"
 #include "mem_objects.h"
 #include "mem_objects.h"

+ 5 - 3
src/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 # Copyright (C) 2013       Simon Archipoff
 # Copyright (C) 2013       Simon Archipoff
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -21,6 +21,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU -DSTARPU_DATADIR='"$(da
 AM_CPPFLAGS += $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS += $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS += $(FXT_CFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(STARPU_RCCE_CPPFLAGS)
 AM_CPPFLAGS += $(FXT_CFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(STARPU_RCCE_CPPFLAGS)
 LIBS += -lm $(LIBSTARPU_LDFLAGS)
 LIBS += -lm $(LIBSTARPU_LDFLAGS)
+if STARPU_USE_MPI_MASTER_SLAVE
+LIBS += $(MPICC_LDFLAGS)
+endif
 
 
 SUBDIRS =
 SUBDIRS =
 
 
@@ -60,8 +63,7 @@ endif STARPU_HAVE_WINDOWS
 
 
 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 
 
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined									\
-  -version-info $(libstarpu_so_version)
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpu_so_version)
 
 
 if STARPU_HAVE_DARWIN
 if STARPU_HAVE_DARWIN
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS += \
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS += \

+ 4 - 2
src/common/rbtree_i.h

@@ -44,7 +44,8 @@
  * architectures, as long as the nodes aren't embedded in structures with
  * architectures, as long as the nodes aren't embedded in structures with
  * special alignment constraints such as member packing.
  * special alignment constraints such as member packing.
  */
  */
-struct starpu_rbtree_node {
+struct starpu_rbtree_node
+{
     uintptr_t parent;
     uintptr_t parent;
     struct starpu_rbtree_node *children[2];
     struct starpu_rbtree_node *children[2];
 };
 };
@@ -52,7 +53,8 @@ struct starpu_rbtree_node {
 /**
 /**
  * Red-black tree structure.
  * Red-black tree structure.
  */
  */
-struct starpu_rbtree {
+struct starpu_rbtree
+{
     struct starpu_rbtree_node *root;
     struct starpu_rbtree_node *root;
 };
 };
 
 

+ 14 - 2
src/common/thread.c

@@ -96,14 +96,22 @@ int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const s
 	if (attr && attr->stacksize)
 	if (attr && attr->stacksize)
 		sg_actor_set_stacksize(*thread, attr->stacksize);
 		sg_actor_set_stacksize(*thread, attr->stacksize);
 #endif
 #endif
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(*thread, tsd);
+#else
 	sg_actor_data_set(*thread, tsd);
 	sg_actor_data_set(*thread, tsd);
+#endif
 	sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args);
 	sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args);
 #else
 #else
 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
 #ifdef HAVE_SG_ACTOR_DATA
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(*thread, tsd);
+#else
 	sg_actor_data_set(*thread, tsd);
 	sg_actor_data_set(*thread, tsd);
 #endif
 #endif
 #endif
 #endif
+#endif
 #ifndef HAVE_SG_ACTOR_SET_STACKSIZE
 #ifndef HAVE_SG_ACTOR_SET_STACKSIZE
 	if (attr && attr->stacksize)
 	if (attr && attr->stacksize)
 		_starpu_simgrid_set_stack_size(_starpu_default_stack_size);
 		_starpu_simgrid_set_stack_size(_starpu_default_stack_size);
@@ -328,7 +336,9 @@ extern void *smpi_process_get_user_data();
 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 {
 {
 	void **array;
 	void **array;
-#ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_GET_DATA
+	array = sg_actor_get_data(sg_actor_self());
+#elif defined(HAVE_SG_ACTOR_DATA)
 	array = sg_actor_data(sg_actor_self());
 	array = sg_actor_data(sg_actor_self());
 #else
 #else
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
@@ -355,7 +365,9 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 {
 {
 	void **array;
 	void **array;
-#ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_GET_DATA
+	array = sg_actor_get_data(sg_actor_self());
+#elif defined(HAVE_SG_ACTOR_DATA)
 	array = sg_actor_data(sg_actor_self());
 	array = sg_actor_data(sg_actor_self());
 #else
 #else
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)

+ 2 - 0
src/common/utils.h

@@ -183,4 +183,6 @@ int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
 
 void _starpu_util_init(void);
 void _starpu_util_init(void);
 
 
+enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
+
 #endif // __COMMON_UTILS_H__
 #endif // __COMMON_UTILS_H__

+ 2 - 1
src/core/dependencies/cg.c

@@ -221,7 +221,8 @@ void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg
 					tag_successors->ndeps_completed = 0;
 					tag_successors->ndeps_completed = 0;
 					/* This releases the lock */
 					/* This releases the lock */
 					_starpu_tag_set_ready(tag);
 					_starpu_tag_set_ready(tag);
-				} else
+				}
+				else
 					_starpu_spin_unlock(&tag->lock);
 					_starpu_spin_unlock(&tag->lock);
 				break;
 				break;
 			}
 			}

+ 15 - 20
src/core/jobs.c

@@ -347,19 +347,10 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 				_starpu_spin_unlock(&handle->header_lock);
 				_starpu_spin_unlock(&handle->header_lock);
 		}
 		}
 	}
 	}
+
 	/* Check nowhere before releasing the sequential consistency (which may
 	/* Check nowhere before releasing the sequential consistency (which may
 	 * unregister the handle and free its switch_cl, and thus task->cl here.  */
 	 * unregister the handle and free its switch_cl, and thus task->cl here.  */
 	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
 	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
-	/* If this is a continuation, we do not release task dependencies now.
-	 * Task dependencies will be released only when the continued task
-	 * fully completes */
-	if (!continuation)
-	{
-		/* Tell other tasks that we don't exist any more, thus no need for
-		 * implicit dependencies any more.  */
-		_starpu_release_task_enforce_sequential_consistency(j);
-	}
-
 	/* If the job was executed on a combined worker there is no need for the
 	/* If the job was executed on a combined worker there is no need for the
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * data as it's not linked to an actual worker */
 	 * data as it's not linked to an actual worker */
@@ -395,6 +386,16 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	if (!callback && task->cl)
 	if (!callback && task->cl)
 		callback = task->cl->callback_func;
 		callback = task->cl->callback_func;
 
 
+	/* If this is a continuation, we do not release task dependencies now.
+	 * Task dependencies will be released only when the continued task
+	 * fully completes */
+	if (!continuation)
+	{
+		/* Tell other tasks that we don't exist any more, thus no need for
+		 * implicit dependencies any more.  */
+		_starpu_release_task_enforce_sequential_consistency(j);
+	}
+
 	/* Task does not have a cl, but has explicit data dependencies, we need
 	/* Task does not have a cl, but has explicit data dependencies, we need
 	 * to tell them that we will not exist any more before notifying the
 	 * to tell them that we will not exist any more before notifying the
 	 * tasks waiting for us
 	 * tasks waiting for us
@@ -764,14 +765,14 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker)
 		}
 		}
 	}
 	}
 
 
-	if (!starpu_task_list_empty(&worker->local_tasks))
-		task = starpu_task_list_pop_front(&worker->local_tasks);
+	if (!starpu_task_prio_list_empty(&worker->local_tasks))
+		task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks);
 
 
 	_starpu_pop_task_end(task);
 	_starpu_pop_task_end(task);
 	return task;
 	return task;
 }
 }
 
 
-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio)
+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task)
 {
 {
 	/* Check that the worker is able to execute the task ! */
 	/* Check that the worker is able to execute the task ! */
 	STARPU_ASSERT(task && task->cl);
 	STARPU_ASSERT(task && task->cl);
@@ -814,13 +815,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 	}
 	}
 	else
 	else
 	{
 	{
-#ifdef STARPU_DEVEL
-#warning FIXME use a prio_list
-#endif
-		if (prio)
-			starpu_task_list_push_front(&worker->local_tasks, task);
-		else
-			starpu_task_list_push_back(&worker->local_tasks, task);
+		starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	}
 	}
 
 
 	starpu_wake_worker_locked(worker->workerid);
 	starpu_wake_worker_locked(worker->workerid);

+ 2 - 4
src/core/jobs.h

@@ -269,10 +269,8 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
 struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
 
 
 /** Put a task into the pool of tasks that are explicitly attributed to the
 /** Put a task into the pool of tasks that are explicitly attributed to the
- * specified worker. If "back" is set, the task is put at the back of the list.
- * Considering the tasks are popped from the back, this value should be 0 to
- * enforce a FIFO ordering. */
-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio);
+ * specified worker. */
+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task);
 
 
 #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index)
 #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index)
 #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
 #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)

+ 15 - 12
src/core/perfmodel/energy_model.c

@@ -56,8 +56,11 @@ static const int N_EVTS = 2;
 
 
 static int nsockets;
 static int nsockets;
 
 
-static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
-				     "rapl::RAPL_ENERGY_DRAM:cpu=%d"};
+static const char* event_names[] =
+{
+	"rapl::RAPL_ENERGY_PKG:cpu=%d",
+	"rapl::RAPL_ENERGY_DRAM:cpu=%d"
+};
 
 
 static int add_event(int EventSet, int socket);
 static int add_event(int EventSet, int socket);
 
 
@@ -66,9 +69,6 @@ static int add_event(int EventSet, int socket);
 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
 static int EventSet = PAPI_NULL;
 static int EventSet = PAPI_NULL;
 
 
-/*This is where we store the values we read from the eventset */
-static long long *values;
-
 #endif
 #endif
 
 
 static double t1;
 static double t1;
@@ -99,9 +99,6 @@ int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
 
 
 		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
 		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
 
 
-		values=calloc(nsockets * N_EVTS,sizeof(long long));
-		STARPU_ASSERT(values);
-
 		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
 		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
 			ERROR_RETURN(retval);
 			ERROR_RETURN(retval);
 
 
@@ -178,6 +175,9 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 	{
 	{
 		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
 		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
 
 
+		/*This is where we store the values we read from the eventset */
+		long long values[nsockets*N_EVTS];
+
 		/* Stop counting and store the values into the array */
 		/* Stop counting and store the values into the array */
 		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
 		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
 			ERROR_RETURN(retval);
 			ERROR_RETURN(retval);
@@ -196,9 +196,6 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 				      delta, t, delta/(t*1.0E-6));
 				      delta, t, delta/(t*1.0E-6));
 			}
 			}
 		}
 		}
-		free(values);
-
-		energy = energy * 0.23 / 1.0e9 / ntasks;
 
 
 		/*removes all events from a PAPI event set */
 		/*removes all events from a PAPI event set */
 		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
 		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
@@ -242,7 +239,7 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
 
 	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
 	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
 
 
-	starpu_perfmodel_update_history(model, task, arch, cpuid, nimpl, energy);
+	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
 
 
 	return retval;
 	return retval;
 }
 }
@@ -266,6 +263,12 @@ static int add_event(int eventSet, int socket)
 		retval = PAPI_add_named_event(eventSet, buf);
 		retval = PAPI_add_named_event(eventSet, buf);
 		if (retval != PAPI_OK)
 		if (retval != PAPI_OK)
 		{
 		{
+			if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
+			{
+				/* Ok, too bad */
+				_STARPU_DISP("Note: DRAM energy measurement not available\n");
+				return PAPI_OK;
+			}
 			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
 			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
 			return retval;
 			return retval;
 		}
 		}

+ 4 - 3
src/core/perfmodel/perfmodel_history.c

@@ -1243,7 +1243,8 @@ void _starpu_initialize_registered_performance_models(void)
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
 
 
-	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
+	{
 		char name[128];
 		char name[128];
 		const char *arch = starpu_worker_get_type_as_env_var(archtype);
 		const char *arch = starpu_worker_get_type_as_env_var(archtype);
 		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
 		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
@@ -1518,8 +1519,8 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
 	return 0;
 	return 0;
 }
 }
 
 
-int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
-
+int starpu_perfmodel_deinit(struct starpu_perfmodel *model)
+{
 	_starpu_deinitialize_performance_model(model);
 	_starpu_deinitialize_performance_model(model);
 	free(model->state);
 	free(model->state);
 	model->state = NULL;
 	model->state = NULL;

+ 1 - 1
src/core/perfmodel/perfmodel_print.c

@@ -30,7 +30,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 	ptr = per_arch_model->list;
 	ptr = per_arch_model->list;
 
 
 	if (!parameter && ptr)
 	if (!parameter && ptr)
-		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\tstddev (us)\t\tn\n");
+		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n");
 
 
 	while (ptr)
 	while (ptr)
 	{
 	{

+ 7 - 14
src/core/sched_policy.c

@@ -372,10 +372,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		}
 		}
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 
 
-		if(task->priority > 0)
-			return _starpu_push_local_task(worker, task, 1);
-		else
-			return _starpu_push_local_task(worker, task, 0);
+		return _starpu_push_local_task(worker, task);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -406,7 +403,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
 
 			_STARPU_TRACE_JOB_PUSH(alias, alias->priority);
 			_STARPU_TRACE_JOB_PUSH(alias, alias->priority);
 			worker = _starpu_get_worker_struct(combined_workerid[j]);
 			worker = _starpu_get_worker_struct(combined_workerid[j]);
-			ret |= _starpu_push_local_task(worker, alias, 0);
+			ret |= _starpu_push_local_task(worker, alias);
 		}
 		}
 
 
 		return ret;
 		return ret;
@@ -632,7 +629,8 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 				enum starpu_worker_archtype type;
 				enum starpu_worker_archtype type;
 				for (type = 0; type < STARPU_NARCH; type++)
 				for (type = 0; type < STARPU_NARCH; type++)
 				{
 				{
-					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type))
+					{
 						if (config->arch_nodeid[type] >= 0)
 						if (config->arch_nodeid[type] >= 0)
 							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
 							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
 						break;
 						break;
@@ -1032,7 +1030,7 @@ pick:
 	}
 	}
 
 
 	task->mf_skip = 1;
 	task->mf_skip = 1;
-	starpu_task_list_push_back(&worker->local_tasks, task);
+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	goto pick;
 	goto pick;
 
 
 profiling:
 profiling:
@@ -1174,16 +1172,11 @@ void _starpu_wait_on_sched_event(void)
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 }
 }
 
 
-/* The scheduling policy may put tasks directly into a worker's local queue so
- * that it is not always necessary to create its own queue when the local queue
- * is sufficient. If "back" not null, the task is put at the back of the queue
- * where the worker will pop tasks first. Setting "back" to 0 therefore ensures
- * a FIFO ordering. */
-int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
+int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED)
 {
 {
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 
 
-	return  _starpu_push_local_task(worker, task, prio);
+	return  _starpu_push_local_task(worker, task);
 }
 }
 
 
 void _starpu_print_idle_time()
 void _starpu_print_idle_time()

+ 63 - 10
src/core/simgrid.c

@@ -357,11 +357,16 @@ void _starpu_start_simgrid(int *argc, char **argv)
 	int limit_bandwidth = starpu_get_env_number("STARPU_LIMIT_BANDWIDTH");
 	int limit_bandwidth = starpu_get_env_number("STARPU_LIMIT_BANDWIDTH");
 	if (limit_bandwidth >= 0)
 	if (limit_bandwidth >= 0)
 	{
 	{
-#ifdef HAVE_SG_LINK_BANDWIDTH_SET
+#if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH)
 		sg_link_t *links = sg_link_list();
 		sg_link_t *links = sg_link_list();
 		int count = sg_link_count(), i;
 		int count = sg_link_count(), i;
-		for (i = 0; i < count; i++) {
+		for (i = 0; i < count; i++)
+		{
+#ifdef HAVE_SG_LINK_SET_BANDWIDTH
+			sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.);
+#else
 			sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.);
 			sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.);
+#endif
 		}
 		}
 #else
 #else
 		_STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth);
 		_STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth);
@@ -492,7 +497,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
 
 #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
 #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
 		sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
 		sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
+#ifdef HAVE_SG_ACTOR_SET_DATA
+		sg_actor_set_data(actor, tsd);
+#else
 		sg_actor_data_set(actor, tsd);
 		sg_actor_data_set(actor, tsd);
+#endif
 #else
 #else
 		MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
 		MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
 #endif
 #endif
@@ -519,7 +528,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 		void **tsd;
 		void **tsd;
 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 #ifdef HAVE_SG_ACTOR_DATA
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+		sg_actor_set_data(sg_actor_self(), tsd);
+#else
 		sg_actor_data_set(sg_actor_self(), tsd);
 		sg_actor_data_set(sg_actor_self(), tsd);
+#endif
 #else
 #else
 		smpi_process_set_user_data(tsd);
 		smpi_process_set_user_data(tsd);
 #endif
 #endif
@@ -735,6 +748,9 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 		 * to be able to easily check scheduling robustness */
 		 * to be able to easily check scheduling robustness */
 	}
 	}
 
 
+#ifdef HAVE_SG_HOST_GET_SPEED
+	flops = length/1000000.0*sg_host_get_speed(sg_host_self());
+#else
 #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
 #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
 #  if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
 #  if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
 	flops = length/1000000.0*sg_host_speed(sg_host_self());
 	flops = length/1000000.0*sg_host_speed(sg_host_self());
@@ -746,6 +762,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 #else
 #else
 	flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
 	flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
 #endif
 #endif
+#endif
 
 
 #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
 #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
 	simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
 	simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
@@ -1210,14 +1227,22 @@ starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t
 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 #ifdef HAVE_SG_ACTOR_INIT
 #ifdef HAVE_SG_ACTOR_INIT
 	actor = sg_actor_init(name, host);
 	actor = sg_actor_init(name, host);
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(actor, tsd);
+#else
 	sg_actor_data_set(actor, tsd);
 	sg_actor_data_set(actor, tsd);
+#endif
 	sg_actor_start(actor, code, argc, argv);
 	sg_actor_start(actor, code, argc, argv);
 #else
 #else
 	actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
 	actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
 #ifdef HAVE_SG_ACTOR_DATA
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(actor, tsd);
+#else
 	sg_actor_data_set(actor, tsd);
 	sg_actor_data_set(actor, tsd);
 #endif
 #endif
 #endif
 #endif
+#endif
 	return actor;
 	return actor;
 }
 }
 
 
@@ -1251,7 +1276,7 @@ starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node)
 
 
 void _starpu_simgrid_count_ngpus(void)
 void _starpu_simgrid_count_ngpus(void)
 {
 {
-#if (defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
+#if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
 	unsigned src, dst;
 	unsigned src, dst;
 	starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
 	starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
 
 
@@ -1261,7 +1286,7 @@ void _starpu_simgrid_count_ngpus(void)
 		{
 		{
 			int busid;
 			int busid;
 			starpu_sg_host_t srchost, dsthost;
 			starpu_sg_host_t srchost, dsthost;
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
 			SD_link_t *route;
 			SD_link_t *route;
 #else
 #else
@@ -1281,8 +1306,12 @@ void _starpu_simgrid_count_ngpus(void)
 
 
 			srchost = _starpu_simgrid_get_memnode_host(src);
 			srchost = _starpu_simgrid_get_memnode_host(src);
 			dsthost = _starpu_simgrid_get_memnode_host(dst);
 			dsthost = _starpu_simgrid_get_memnode_host(dst);
-#if defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
+#ifdef HAVE_SG_HOST_GET_ROUTE
+			sg_host_get_route(srchost, dsthost, route_dynar);
+#else
 			sg_host_route(srchost, dsthost, route_dynar);
 			sg_host_route(srchost, dsthost, route_dynar);
+#endif
 			routesize = xbt_dynar_length(route_dynar);
 			routesize = xbt_dynar_length(route_dynar);
 			route = xbt_dynar_to_array(route_dynar);
 			route = xbt_dynar_to_array(route_dynar);
 #else
 #else
@@ -1293,7 +1322,13 @@ void _starpu_simgrid_count_ngpus(void)
 			/* If it goes through "Host", do not care, there is no
 			/* If it goes through "Host", do not care, there is no
 			 * direct transfer support */
 			 * direct transfer support */
 			for (i = 0; i < routesize; i++)
 			for (i = 0; i < routesize; i++)
-				if (!strcmp(sg_link_name(route[i]), "Host"))
+				if (
+#ifdef HAVE_SG_LINK_GET_NAME
+					!strcmp(sg_link_get_name(route[i]), "Host")
+#else
+					!strcmp(sg_link_name(route[i]), "Host")
+#endif
+					)
 					break;
 					break;
 			if (i < routesize)
 			if (i < routesize)
 				continue;
 				continue;
@@ -1302,7 +1337,11 @@ void _starpu_simgrid_count_ngpus(void)
 			through = -1;
 			through = -1;
 			for (i = 0; i < routesize; i++)
 			for (i = 0; i < routesize; i++)
 			{
 			{
+#ifdef HAVE_SG_LINK_GET_NAME
+				name = sg_link_get_name(route[i]);
+#else
 				name = sg_link_name(route[i]);
 				name = sg_link_name(route[i]);
+#endif
 				size_t len = strlen(name);
 				size_t len = strlen(name);
 				if (!strcmp(" through", name+len-8))
 				if (!strcmp(" through", name+len-8))
 					through = i;
 					through = i;
@@ -1315,7 +1354,11 @@ void _starpu_simgrid_count_ngpus(void)
 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
 				continue;
 				continue;
 			}
 			}
+#ifdef HAVE_SG_LINK_GET_NAME
+			name = sg_link_get_name(route[through]);
+#else
 			name = sg_link_name(route[through]);
 			name = sg_link_name(route[through]);
+#endif
 
 
 			/*
 			/*
 			 * count how many direct routes go through it between
 			 * count how many direct routes go through it between
@@ -1339,10 +1382,14 @@ void _starpu_simgrid_count_ngpus(void)
 
 
 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
 				int routesize2;
 				int routesize2;
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
 				SD_link_t *route2;
 				SD_link_t *route2;
+#ifdef HAVE_SG_HOST_GET_ROUTE
+				sg_host_get_route(srchost2, ramhost, route_dynar2);
+#else
 				sg_host_route(srchost2, ramhost, route_dynar2);
 				sg_host_route(srchost2, ramhost, route_dynar2);
+#endif
 				routesize2 = xbt_dynar_length(route_dynar2);
 				routesize2 = xbt_dynar_length(route_dynar2);
 				route2 = xbt_dynar_to_array(route_dynar2);
 				route2 = xbt_dynar_to_array(route_dynar2);
 #else
 #else
@@ -1351,19 +1398,25 @@ void _starpu_simgrid_count_ngpus(void)
 #endif
 #endif
 
 
 				for (i = 0; i < routesize2; i++)
 				for (i = 0; i < routesize2; i++)
-					if (!strcmp(name, sg_link_name(route2[i])))
+					if (
+#ifdef HAVE_SG_LINK_GET_NAME
+						!strcmp(name, sg_link_get_name(route2[i]))
+#else
+						!strcmp(name, sg_link_name(route2[i]))
+#endif
+						)
 					{
 					{
 						/* This GPU goes through this PCI bridge to access RAM */
 						/* This GPU goes through this PCI bridge to access RAM */
 						ngpus++;
 						ngpus++;
 						break;
 						break;
 					}
 					}
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 				free(route2);
 				free(route2);
 #endif
 #endif
 			}
 			}
 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
 			starpu_bus_set_ngpus(busid, ngpus);
 			starpu_bus_set_ngpus(busid, ngpus);
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 			free(route);
 			free(route);
 #endif
 #endif
 		}
 		}

+ 3 - 0
src/core/simgrid.h

@@ -24,6 +24,9 @@
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
+
+/* Note: when changing something here, update the include list in configure.ac
+ * in the part that tries to enable stdc++11 */
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #include <simgrid/msg.h>
 #include <simgrid/msg.h>

+ 1 - 1
src/core/task.c

@@ -1084,7 +1084,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
 
 	struct _starpu_worker *worker;
 	struct _starpu_worker *worker;
 	worker = _starpu_get_worker_struct(workerid);
 	worker = _starpu_get_worker_struct(workerid);
-	starpu_task_list_push_back(&worker->local_tasks, task);
+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	starpu_wake_worker_locked(worker->workerid);
 	starpu_wake_worker_locked(worker->workerid);
 
 
 	_starpu_profiling_set_task_push_end_time(task);
 	_starpu_profiling_set_task_push_end_time(task);

+ 3 - 3
src/core/topology.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  * Copyright (C) 2013       Thibaut Lambert
  * Copyright (C) 2013       Thibaut Lambert
  * Copyright (C) 2016       Uppsala University
  * Copyright (C) 2016       Uppsala University
  *
  *
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  * Discover the topology of the machine
  * Discover the topology of the machine
  */
  */
 
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 						int *current, int *workers_gpuid,
 						int *current, int *workers_gpuid,
 						const char *varname, unsigned nhwgpus,
 						const char *varname, unsigned nhwgpus,
@@ -1817,7 +1817,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
 
 	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
 	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
 	for (i = 0; i < nopencl; i++)
 	for (i = 0; i < nopencl; i++)
-		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
+		topology->nworker[STARPU_OPENCL_WORKER][i] = 1;
 	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
 	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
 
 
 	_starpu_initialize_workers_opencl_gpuid(config);
 	_starpu_initialize_workers_opencl_gpuid(config);

+ 5 - 4
src/core/workers.c

@@ -688,7 +688,7 @@ void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machin
 	/* memory_node initialized by topology.c */
 	/* memory_node initialized by topology.c */
 	STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
 	STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
-	starpu_task_list_init(&workerarg->local_tasks);
+	starpu_task_prio_list_init(&workerarg->local_tasks);
 	_starpu_ctx_change_list_init(&workerarg->ctx_change_list);
 	_starpu_ctx_change_list_init(&workerarg->ctx_change_list);
 	workerarg->local_ordered_tasks = NULL;
 	workerarg->local_ordered_tasks = NULL;
 	workerarg->local_ordered_tasks_size = 0;
 	workerarg->local_ordered_tasks_size = 0;
@@ -1039,7 +1039,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
         if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
         if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
         {
         {
                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
-                struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
+                struct _starpu_worker * worker_zero STARPU_ATTRIBUTE_UNUSED = &worker_set_zero->workers[0];
                 STARPU_PTHREAD_CREATE_ON(
                 STARPU_PTHREAD_CREATE_ON(
                                 "zero",
                                 "zero",
                                 &worker_set_zero->worker_thread,
                                 &worker_set_zero->worker_thread,
@@ -1445,7 +1445,8 @@ int _starpu_get_catch_signals(void)
 	return _starpu_config.conf.catch_signals;
 	return _starpu_config.conf.catch_signals;
 }
 }
 
 
-void starpu_drivers_preinit(void) {
+void starpu_drivers_preinit(void)
+{
 	_starpu_cpu_preinit();
 	_starpu_cpu_preinit();
 	_starpu_cuda_preinit();
 	_starpu_cuda_preinit();
 	_starpu_opencl_preinit();
 	_starpu_opencl_preinit();
@@ -1828,7 +1829,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 		}
 		}
 
 
 out:
 out:
-		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
+		STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks));
 		for (n = 0; n < worker->local_ordered_tasks_size; n++)
 		for (n = 0; n < worker->local_ordered_tasks_size; n++)
 			STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
 			STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
 		_starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
 		_starpu_sched_ctx_list_delete(&worker->sched_ctx_list);

+ 5 - 5
src/core/workers.h

@@ -61,8 +61,6 @@
 
 
 #define STARPU_MAX_PIPELINE 4
 #define STARPU_MAX_PIPELINE 4
 
 
-enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
-
 struct _starpu_ctx_change_list;
 struct _starpu_ctx_change_list;
 
 
 /** This is initialized by _starpu_worker_init() */
 /** This is initialized by _starpu_worker_init() */
@@ -125,7 +123,7 @@ LIST_TYPE(_starpu_worker,
 	     * subsequent processing once worker completes the ongoing scheduling
 	     * subsequent processing once worker completes the ongoing scheduling
 	     * operation */
 	     * operation */
 	struct _starpu_ctx_change_list ctx_change_list;
 	struct _starpu_ctx_change_list ctx_change_list;
-	struct starpu_task_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
+	struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
 	struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
 	struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
 	unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */
 	unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */
 	unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */
 	unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */
@@ -427,7 +425,8 @@ struct _starpu_machine_config
 };
 };
 
 
 /** Provides information for a device driver */
 /** Provides information for a device driver */
-struct starpu_driver_info {
+struct starpu_driver_info
+{
 	const char *name_upper;	/**< Name of worker type in upper case */
 	const char *name_upper;	/**< Name of worker type in upper case */
 	const char *name_var;	/**< Name of worker type for environment variables */
 	const char *name_var;	/**< Name of worker type for environment variables */
 	const char *name_lower;	/**< Name of worker type in lower case */
 	const char *name_lower;	/**< Name of worker type in lower case */
@@ -441,7 +440,8 @@ extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
 
 
 /** Provides information for a memory node driver */
 /** Provides information for a memory node driver */
-struct starpu_memory_driver_info {
+struct starpu_memory_driver_info
+{
 	const char *name_upper;	/**< Name of memory in upper case */
 	const char *name_upper;	/**< Name of memory in upper case */
 	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
 	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
 };
 };

+ 4 - 2
src/datawizard/memalloc.c

@@ -1513,7 +1513,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			/* First try to flush data explicitly marked for freeing */
 			/* First try to flush data explicitly marked for freeing */
 			size_t freed = flush_memchunk_cache(dst_node, reclaim);
 			size_t freed = flush_memchunk_cache(dst_node, reclaim);
 
 
-			if (freed >= reclaim) {
+			if (freed >= reclaim)
+			{
 				/* That freed enough data, retry allocating */
 				/* That freed enough data, retry allocating */
 				prefetch_out_of_memory[dst_node] = 0;
 				prefetch_out_of_memory[dst_node] = 0;
 				continue;
 				continue;
@@ -1550,7 +1551,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
 			prefetch_out_of_memory[dst_node] = 0;
 			prefetch_out_of_memory[dst_node] = 0;
-		} else
+		}
+		else
 			prefetch_out_of_memory[dst_node] = 0;
 			prefetch_out_of_memory[dst_node] = 0;
 	}
 	}
 	while((allocated_memory == -ENOMEM) && attempts++ < 2);
 	while((allocated_memory == -ENOMEM) && attempts++ < 2);

+ 2 - 1
src/datawizard/memory_nodes.c

@@ -180,7 +180,8 @@ int starpu_memory_node_get_devid(unsigned node)
 	return _starpu_descr.devid[node];
 	return _starpu_descr.devid[node];
 }
 }
 
 
-enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
+enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind)
+{
 	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
 	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
 	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
 	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
 	return archtype;
 	return archtype;

+ 5 - 3
src/debug/traces/starpu_fxt.c

@@ -193,7 +193,8 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		fprintf(tasks_file, "Name: %s\n", task->name);
 		fprintf(tasks_file, "Name: %s\n", task->name);
 	if (task->model_name)
 	if (task->model_name)
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
-	if (task->file) {
+	if (task->file)
+	{
 		fprintf(tasks_file, "File: %s\n", task->file);
 		fprintf(tasks_file, "File: %s\n", task->file);
 		fprintf(tasks_file, "Line: %d\n", task->line);
 		fprintf(tasks_file, "Line: %d\n", task->line);
 	}
 	}
@@ -4129,7 +4130,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 
 
 	if (out_paje_file && !options->no_bus)
 	if (out_paje_file && !options->no_bus)
 	{
 	{
-		while (!_starpu_communication_list_empty(&communication_list)) {
+		while (!_starpu_communication_list_empty(&communication_list))
+		{
 			struct _starpu_communication*itor;
 			struct _starpu_communication*itor;
 			itor = _starpu_communication_list_pop_front(&communication_list);
 			itor = _starpu_communication_list_pop_front(&communication_list);
 
 
@@ -4423,7 +4425,7 @@ void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options)
 			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
 			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
 
 
 		/* FUT_SETUP_CODE is the event with the maximal value */
 		/* FUT_SETUP_CODE is the event with the maximal value */
-		number_events = calloc(FUT_SETUP_CODE+1, sizeof(uint64_t));
+		_STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t));
 	}
 	}
 	else
 	else
 		number_events_file = NULL;
 		number_events_file = NULL;

+ 4 - 2
src/drivers/cpu/driver_cpu.c

@@ -60,7 +60,8 @@
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
 
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "CPU",
 	.name_upper = "CPU",
 	.name_var = "CPU",
 	.name_var = "CPU",
 	.name_lower = "cpu",
 	.name_lower = "cpu",
@@ -68,7 +69,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 0.5f,
 	.alpha = 0.5f,
 };
 };
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "NUMA",
 	.name_upper = "NUMA",
 	.worker_archtype = STARPU_CPU_WORKER,
 	.worker_archtype = STARPU_CPU_WORKER,
 };
 };

+ 4 - 2
src/drivers/cuda/driver_cuda_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <drivers/cuda/driver_cuda.h>
 #include <drivers/cuda/driver_cuda.h>
 
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "CUDA",
 	.name_upper = "CUDA",
 	.name_var = "CUDA",
 	.name_var = "CUDA",
 	.name_lower = "cuda",
 	.name_lower = "cuda",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 13.33f,
 	.alpha = 13.33f,
 };
 };
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "CUDA",
 	.name_upper = "CUDA",
 	.worker_archtype = STARPU_CUDA_WORKER,
 	.worker_archtype = STARPU_CUDA_WORKER,
 };
 };

+ 2 - 1
src/drivers/disk/driver_disk.c

@@ -23,7 +23,8 @@
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
 #include <datawizard/memory_nodes.h>
 #include <datawizard/memory_nodes.h>
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "Disk",
 	.name_upper = "Disk",
 	.worker_archtype = (enum starpu_worker_archtype) -1,
 	.worker_archtype = (enum starpu_worker_archtype) -1,
 };
 };

+ 4 - 2
src/drivers/mic/driver_mic_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <drivers/mic/driver_mic_source.h>
 #include <drivers/mic/driver_mic_source.h>
 
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "MIC",
 	.name_upper = "MIC",
 	.name_var = "MIC",
 	.name_var = "MIC",
 	.name_lower = "mic",
 	.name_lower = "mic",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 0.5f,
 	.alpha = 0.5f,
 };
 };
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "MIC",
 	.name_upper = "MIC",
 	.worker_archtype = STARPU_MIC_WORKER,
 	.worker_archtype = STARPU_MIC_WORKER,
 };
 };

+ 4 - 2
src/drivers/mpi/driver_mpi_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <drivers/mpi/driver_mpi_source.h>
 #include <drivers/mpi/driver_mpi_source.h>
 
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "MPI_MS",
 	.name_upper = "MPI_MS",
 	.name_var = "MPI_MS",
 	.name_var = "MPI_MS",
 	.name_lower = "mpi_ms",
 	.name_lower = "mpi_ms",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 1.0f,
 	.alpha = 1.0f,
 };
 };
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "MPI_MS",
 	.name_upper = "MPI_MS",
 	.worker_archtype = STARPU_MPI_MS_WORKER,
 	.worker_archtype = STARPU_MPI_MS_WORKER,
 };
 };

+ 4 - 2
src/drivers/opencl/driver_opencl_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
 
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "OpenCL",
 	.name_upper = "OpenCL",
 	.name_var = "OPENCL",
 	.name_var = "OPENCL",
 	.name_lower = "opencl",
 	.name_lower = "opencl",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 12.22f,
 	.alpha = 12.22f,
 };
 };
 
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "OpenCL",
 	.name_upper = "OpenCL",
 	.worker_archtype = STARPU_OPENCL_WORKER,
 	.worker_archtype = STARPU_OPENCL_WORKER,
 };
 };

+ 2 - 0
src/profiling/profiling.c

@@ -201,11 +201,13 @@ void _starpu_profiling_papi_task_start_counters(struct starpu_task *task)
 		for(i=0; i<papi_nevents; i++)
 		for(i=0; i<papi_nevents; i++)
 		{
 		{
 			int ret = PAPI_add_event(profiling_info->papi_event_set, papi_events[i]);
 			int ret = PAPI_add_event(profiling_info->papi_event_set, papi_events[i]);
+#ifdef PAPI_ECMP_DISABLED
 			if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable)
 			if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable)
 			{
 			{
 				_STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n");
 				_STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n");
 				warned_component_unavailable = 1;
 				warned_component_unavailable = 1;
 			}
 			}
+#endif
 			profiling_info->papi_values[i]=0;
 			profiling_info->papi_values[i]=0;
 		}
 		}
 		PAPI_reset(profiling_info->papi_event_set);
 		PAPI_reset(profiling_info->papi_event_set);

+ 6 - 3
src/sched_policies/component_heteroprio.c

@@ -434,9 +434,12 @@ static int heteroprio_push_task(struct starpu_sched_component * component, struc
 			/* Didn't find it, add one */
 			/* Didn't find it, add one */
 			data->naccel++;
 			data->naccel++;
 
 
-			float *newaccel = malloc(data->naccel * sizeof(*newaccel));
-			struct _starpu_prio_deque **newbuckets = malloc(data->naccel * sizeof(*newbuckets));
-			struct _starpu_prio_deque *newbucket = malloc(sizeof(*newbucket));
+			float *newaccel;
+			_STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel));
+			struct _starpu_prio_deque **newbuckets;
+			_STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets));
+			struct _starpu_prio_deque *newbucket;
+			_STARPU_MALLOC(newbucket, sizeof(*newbucket));
 			_starpu_prio_deque_init(newbucket);
 			_starpu_prio_deque_init(newbucket);
 			int inserted = 0;
 			int inserted = 0;
 
 

+ 4 - 4
src/sched_policies/component_worker.c

@@ -510,11 +510,11 @@ static double simple_worker_estimated_load(struct starpu_sched_component * compo
 	struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
 	struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
 	int nb_task = 0;
 	int nb_task = 0;
 	STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
 	STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
-	struct starpu_task_list list = worker->local_tasks;
+	struct starpu_task_prio_list *list = &worker->local_tasks;
 	struct starpu_task * task;
 	struct starpu_task * task;
-	for(task = starpu_task_list_front(&list);
-	    task != starpu_task_list_end(&list);
-	    task = starpu_task_list_next(task))
+	for(task = starpu_task_prio_list_begin(list);
+	    task != starpu_task_prio_list_end(list);
+	    task = starpu_task_prio_list_next(list, task))
 		nb_task++;
 		nb_task++;
 	STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
 	STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
 	struct _starpu_worker_component_data * d = component->data;
 	struct _starpu_worker_component_data * d = component->data;

+ 5 - 0
src/sched_policies/helper_mct.c

@@ -88,6 +88,11 @@ static double compute_expected_time(double now, double predicted_end, double pre
 
 
 double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
 double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
 {
 {
+	if(isnan(local_energy))
+		/* Energy not calibrated yet, but we cannot do this
+		 * automatically anyway, so ignoring this for now */
+		local_energy = 0.;
+
 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
 	
 	
 	/* min_exp_end_of_task is the minimum end time of the task over all workers */
 	/* min_exp_end_of_task is the minimum end time of the task over all workers */

+ 2 - 1
src/sched_policies/work_stealing_policy.c

@@ -145,7 +145,8 @@ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsi
 		if (!ws->per_worker[workerids[worker]].notask)
 		if (!ws->per_worker[workerids[worker]].notask)
 		{
 		{
 			if (ws->per_worker[workerids[worker]].busy
 			if (ws->per_worker[workerids[worker]].busy
-						   || starpu_worker_is_blocked_in_parallel(workerids[worker])) {
+			    || starpu_worker_is_blocked_in_parallel(workerids[worker]))
+			{
 				ntasks = 1;
 				ntasks = 1;
 				break;
 				break;
 			}
 			}

+ 6 - 4
src/util/starpu_data_cpy.c

@@ -86,7 +86,7 @@ void mp_cpy_kernel(void *descr[], void *cl_arg)
 
 
 	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
 	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
 	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
 	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
-	
+
 	void *dst_interface = descr[0];
 	void *dst_interface = descr[0];
 	void *src_interface = descr[1];
 	void *src_interface = descr[1];
 
 
@@ -151,7 +151,7 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
 
 	unsigned *interface_id;
 	unsigned *interface_id;
 	_STARPU_MALLOC(interface_id, sizeof(*interface_id));
 	_STARPU_MALLOC(interface_id, sizeof(*interface_id));
-	*interface_id = dst_handle->ops->interfaceid; 
+	*interface_id = dst_handle->ops->interfaceid;
 	task->cl_arg = interface_id;
 	task->cl_arg = interface_id;
 	task->cl_arg_size = sizeof(*interface_id);
 	task->cl_arg_size = sizeof(*interface_id);
 	task->cl_arg_free = 1;
 	task->cl_arg_free = 1;
@@ -181,7 +181,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
 {
 {
 	_starpu_spin_lock(&src_handle->header_lock);
 	_starpu_spin_lock(&src_handle->header_lock);
-	if (src_handle->readonly_dup) {
+	if (src_handle->readonly_dup)
+	{
 		/* Already a ro duplicate, just return it with one more ref */
 		/* Already a ro duplicate, just return it with one more ref */
 		*dst_handle = src_handle->readonly_dup;
 		*dst_handle = src_handle->readonly_dup;
 		_starpu_spin_unlock(&src_handle->header_lock);
 		_starpu_spin_unlock(&src_handle->header_lock);
@@ -190,7 +191,8 @@ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t sr
 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
 		return 0;
 		return 0;
 	}
 	}
-	if (src_handle->readonly) {
+	if (src_handle->readonly)
+	{
 		src_handle->aliases++;
 		src_handle->aliases++;
 		_starpu_spin_unlock(&src_handle->header_lock);
 		_starpu_spin_unlock(&src_handle->header_lock);
 		*dst_handle = src_handle;
 		*dst_handle = src_handle;

+ 1 - 1
starpu-1.0-mic.pc.in

@@ -23,7 +23,7 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@
 Requires.private: @GORDON_REQUIRES@
 Requires.private: @GORDON_REQUIRES@

+ 1 - 1
starpu-1.0.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.1.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.2.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.3.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 2 - 2
starpufft/src/starpufft-double.h

@@ -25,8 +25,8 @@
 #include <cufft.h>
 #include <cufft.h>
 #endif
 #endif
 
 
-#undef  FLOAT
-#define DOUBLE
+#undef  STARPUFFT_FLOAT
+#define STARPUFFT_DOUBLE
 
 
 typedef double real;
 typedef double real;
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 

+ 2 - 2
starpufft/src/starpufft-float.h

@@ -25,8 +25,8 @@
 #include <cufft.h>
 #include <cufft.h>
 #endif
 #endif
 
 
-#undef  DOUBLE
-#define FLOAT
+#undef  STARPUFFT_DOUBLE
+#define STARPUFFT_FLOAT
 
 
 typedef float real;
 typedef float real;
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 

+ 2 - 2
starpufft/src/starpufftx.c

@@ -28,7 +28,7 @@
 #define _externC extern
 #define _externC extern
 #include "cudax_kernels.h"
 #include "cudax_kernels.h"
 
 
-#if defined(FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
+#if defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
 #  define __STARPU_USE_CUDA
 #  define __STARPU_USE_CUDA
 #else
 #else
 #  undef __STARPU_USE_CUDA
 #  undef __STARPU_USE_CUDA
@@ -172,7 +172,7 @@ compute_roots(STARPUFFT(plan) plan)
 }
 }
 
 
 /* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
 /* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
-#ifdef DOUBLE
+#ifdef STARPUFFT_DOUBLE
 static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) {
 static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) {
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 		return 1;
 		return 1;

+ 21 - 0
starpupy/Makefile.am

@@ -0,0 +1,21 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu-subdirtests.mk
+
+SUBDIRS  = src
+SUBDIRS += examples
+

+ 43 - 0
starpupy/examples/Makefile.am

@@ -0,0 +1,43 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu.mk
+
+SUBDIRS =
+
+CLEANFILES = *.gcno *.gcda *.linkinfo
+
+TESTS	=
+TESTS	+=	starpu_py.sh
+TESTS	+=	starpu_py_parallel.sh
+
+if STARPU_STARPUPY_NUMPY
+TESTS	+=	starpu_py_np.sh
+endif
+
+EXTRA_DIST	=		\
+	starpu_py_parallel.py	\
+	starpu_py_parallel.sh	\
+	starpu_py.py		\
+	starpu_py.sh		\
+	starpu_py_np.py		\
+	starpu_py_np.sh
+
+python_sourcesdir = $(libdir)/starpu/python
+dist_python_sources_DATA	=	\
+	starpu_py_parallel.py	\
+	starpu_py.py
+

+ 59 - 0
starpupy/examples/execute.sh.in

@@ -0,0 +1,59 @@
+#!@REALBASH@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+exampledir=@STARPU_SRC_DIR@/starpupy/examples
+
+modpath=@STARPU_BUILD_DIR@/src/.libs:
+pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH
+
+valgrind=""
+gdb=""
+if test "$1" == "--valgrind"
+then
+    valgrind=1
+    shift
+fi
+if test "$1" == "--gdb"
+then
+    gdb=1
+    shift
+fi
+
+examplefile=$1
+if test -f $examplefile
+then
+    pythonscript=$examplefile
+elif test -f $exampledir/$examplefile
+then
+    pythonscript=$exampledir/$examplefile
+else
+    echo "Error. Python script $examplefile not found in current directory or in $exampledir"
+    exit 1
+fi
+shift
+
+set -x
+if test "$valgrind" == "1"
+then
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath PYTHONMALLOC=malloc valgrind --track-origins=yes @PYTHON@ $pythonscript $*
+elif test "$gdb" == "1"
+then
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath gdb --args @PYTHON@ $pythonscript $*
+else
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath @PYTHON@ $pythonscript $*
+fi
+

+ 9 - 9
starpupy/tests/starpu_py.py

@@ -73,7 +73,7 @@ def sub(a,b,c):
 ###############################################################################
 ###############################################################################
 
 
 #using decorator wrap the function with input
 #using decorator wrap the function with input
-@starpu.delayed
+@starpu.delayed(name="test")
 def add_deco(a,b,c):
 def add_deco(a,b,c):
 	#time.sleep(1)
 	#time.sleep(1)
 	print ("Example 8:")
 	print ("Example 8:")
@@ -83,7 +83,7 @@ def add_deco(a,b,c):
 ###############################################################################
 ###############################################################################
 
 
 #using decorator wrap the function with input
 #using decorator wrap the function with input
-@starpu.delayed
+@starpu.delayed(color=1)
 def sub_deco(x,a):
 def sub_deco(x,a):
 	print ("Example 9:")
 	print ("Example 9:")
 	print ("This is a function with input and output wrapped by the decorator function:")
 	print ("This is a function with input and output wrapped by the decorator function:")
@@ -93,34 +93,34 @@ def sub_deco(x,a):
 
 
 async def main():
 async def main():
 	#submit function "hello"
 	#submit function "hello"
-    fut = starpu.task_submit(hello)
+    fut = starpu.task_submit()(hello)
     await fut
     await fut
 
 
     #submit function "func1"
     #submit function "func1"
-    fut1 = starpu.task_submit(func1)
+    fut1 = starpu.task_submit()(func1)
     await fut1
     await fut1
 
 
     #apply starpu.delayed(func1_deco())
     #apply starpu.delayed(func1_deco())
     await func1_deco()
     await func1_deco()
 
 
 	#submit function "func2"
 	#submit function "func2"
-    fut2 = starpu.task_submit(func2)
+    fut2 = starpu.task_submit()(func2)
     res2 = await fut2
     res2 = await fut2
 	#print the result of function
 	#print the result of function
     print("This is a function no input and the return value is", res2)
     print("This is a function no input and the return value is", res2)
 
 
     #submit function "multi"
     #submit function "multi"
-    fut3 = starpu.task_submit(multi, 2, 3)
+    fut3 = starpu.task_submit()(multi, 2, 3)
     res3 = await fut3
     res3 = await fut3
     print("The result of function multi is :", res3)
     print("The result of function multi is :", res3)
 
 
 	#submit function "add"
 	#submit function "add"
-    fut4 = starpu.task_submit(add, 1.2, 2.5, 3.6, 4.9)
+    fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9)
     res4 = await fut4
     res4 = await fut4
     print("The result of function add is :", res4)
     print("The result of function add is :", res4)
 
 
 	#submit function "sub"
 	#submit function "sub"
-    fut5 = starpu.task_submit(sub, 6, 2, 5.9)
+    fut5 = starpu.task_submit()(sub, 6, 2, 5.9)
     res5 = await fut5
     res5 = await fut5
     print("The result of function sub is:", res5)
     print("The result of function sub is:", res5)
 
 
@@ -138,4 +138,4 @@ async def main():
 asyncio.run(main())
 asyncio.run(main())
 
 
 
 
-#starpu.task_wait_for_all()
+#starpu.task_wait_for_all()

+ 19 - 0
starpupy/examples/starpu_py.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+$(dirname $0)/execute.sh starpu_py.py $*
+

+ 40 - 0
starpupy/examples/starpu_py_np.py

@@ -0,0 +1,40 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+import starpu
+import asyncio
+import numpy as np
+
+
+###############################################################################
+
+def scal(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a
+	return t
+
+t=np.array([1,2,3,4,5,6,7,8,9,10])
+
+async def main():
+    fut8 = starpu.task_submit()(scal, 2, t)
+    res8 = await fut8
+    print("The result of Example 10 is", res8)
+    print("The return array is", t)
+    #print("The result type is", type(res8))
+
+asyncio.run(main())
+
+
+#starpu.task_wait_for_all()

+ 3 - 7
starpupy/src/starpu/delay.py

@@ -1,3 +1,4 @@
+#!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
@@ -13,11 +14,6 @@
 #
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 #
-from starpu import starpupy
-import asyncio
 
 
-def delayed(f):
-	def submit(*args,**kwargs):
-		fut = starpupy.task_submit(f, *args,**kwargs)
-		return fut
-	return submit
+$(dirname $0)/execute.sh starpu_py_np.py $*
+

+ 350 - 0
starpupy/examples/starpu_py_parallel.py

@@ -0,0 +1,350 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+import starpu
+import starpu.joblib
+import time
+import asyncio
+from math import sqrt
+from math import log10
+import numpy as np
+import sys
+
+#generate a list to store functions
+g_func=[]
+
+#function no input no output print hello world
+def hello():
+	print ("Example 1: Hello, world!")
+g_func.append(starpu.joblib.delayed(hello)())
+
+#function no input no output
+def func1():
+	print ("Example 2: This is a function no input no output")
+g_func.append(starpu.joblib.delayed(func1)())
+
+#function no input return a value
+def func2():
+	print ("Example 3:")
+	return 12
+g_func.append(starpu.joblib.delayed(func2)())
+
+#function has 2 int inputs and 1 int output
+def exp(a,b):
+	res_exp=a**b
+	print("Example 4: The result of ",a,"^",b,"is",res_exp)
+	return res_exp
+g_func.append(starpu.joblib.delayed(exp)(2, 3))
+
+#function has 4 float inputs and 1 float output
+def add(a,b,c,d):
+	res_add=a+b+c+d
+	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
+	return res_add
+g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
+
+#function has 2 int inputs 1 float input and 1 float output 1 int output
+def sub(a,b,c):
+	res_sub1=a-b-c
+	res_sub2=a-b
+	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
+	return res_sub1, res_sub2
+g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
+
+##########functions of array calculation###############
+
+def scal(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a
+	return t
+
+def add_scal(a, t1, t2):
+	for i in range(len(t1)):
+		t1[i]=t1[i]*a+t2[i]
+	return t1
+
+def scal_arr(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a[i]
+	return t
+
+def multi(a,b):
+	res_multi=a*b
+	return res_multi
+
+def multi_2arr(a, b):
+        for i in range(len(a)):
+                a[i]=a[i]*b[i]
+        return a
+
+def multi_list(l):
+	res = []
+	for (a,b) in l:
+		res.append(a*b)
+	return res
+
+def log10_arr(t):
+	for i in range(len(t)):
+		t[i]=log10(t[i])
+	return t
+########################################################
+
+#################scikit test###################
+# DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__
+# class MyBackend(DEFAULT_JOBLIB_BACKEND):  # type: ignore
+#         def __init__(self, *args, **kwargs):
+#                 self.count = 0
+#                 super().__init__(*args, **kwargs)
+
+#         def start_call(self):
+#                 self.count += 1
+#                 return super().start_call()
+
+# starpu.joblib.register_parallel_backend('testing', MyBackend)
+
+# with starpu.joblib.parallel_backend("testing") as (ba, n_jobs):
+# 	print("backend and n_jobs is", ba, n_jobs)
+###############################################
+
+N=100
+# A=np.arange(N)
+# B=np.arange(N)
+# a=np.arange(N)
+# b=np.arange(N, 2*N, 1)
+
+displayPlot=False
+listX=[10, 100, 1000, 10000]
+for arg in sys.argv[1:]:
+        if arg == "-long":
+                listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000]
+        if arg == "-plot":
+                displayPlot=True
+
+for x in listX:
+	for X in range(x, x*10, x):
+		print("X=",X)
+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X))
+		A=np.arange(1,X+1,1)
+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A))
+
+print("************************")
+print("parallel Normal version:")
+print("************************")
+print("--(sqrt)(i**2)for i in range(N)")
+start_exec1=time.time()
+start_cpu1=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
+end_exec1=time.time()
+end_cpu1=time.process_time()
+print("the program execution time is", end_exec1-start_exec1)
+print("the cpu execution time is", end_cpu1-start_cpu1)
+
+print("--(multi)(i,j) for i,j in zip(a,b)")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec2=time.time()
+start_cpu2=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
+end_exec2=time.time()
+end_cpu2=time.process_time()
+print("the program execution time is", end_exec2-start_exec2)
+print("the cpu execution time is", end_cpu2-start_cpu2)
+
+print("--(scal_arr)((i for i in b), A)")
+A=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec3=time.time()
+start_cpu3=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
+end_exec3=time.time()
+end_cpu3=time.process_time()
+print("the program execution time is", end_exec3-start_exec3)
+print("the cpu execution time is", end_cpu3-start_cpu3)
+
+print("--(multi_list)((i,j) for i,j in zip(a,b))")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec4=time.time()
+start_cpu4=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
+end_exec4=time.time()
+end_cpu4=time.process_time()
+print("the program execution time is", end_exec4-start_exec4)
+print("the cpu execution time is", end_cpu4-start_cpu4)
+
+print("--(multi_2arr)((i for i in a), (j for j in b))")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec5=time.time()
+start_cpu5=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
+end_exec5=time.time()
+end_cpu5=time.process_time()
+print("the program execution time is", end_exec5-start_exec5)
+print("the cpu execution time is", end_cpu5-start_cpu5)
+
+print("--(multi_2arr)(A, B)")
+# A=np.arange(N)
+# B=np.arange(N, 2*N, 1)
+n, m = 4, 5
+A = np.arange(n*m).reshape(n, m)
+B = np.arange(n*m, 2*n*m, 1).reshape(n, m)
+print("The input arrays are A", A, "B", B)
+start_exec6=time.time()
+start_cpu6=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(A, B))
+end_exec6=time.time()
+end_cpu6=time.process_time()
+print("the program execution time is", end_exec6-start_exec6)
+print("the cpu execution time is", end_cpu6-start_cpu6)
+print("The return arrays are A", A, "B", B)
+
+print("--(scal)(2, t=(j for j in a))")
+a=np.arange(N)
+start_exec7=time.time()
+start_cpu7=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a)))
+end_exec7=time.time()
+end_cpu7=time.process_time()
+print("the program execution time is", end_exec7-start_exec7)
+print("the cpu execution time is", end_cpu7-start_cpu7)
+
+print("--(scal)(2,A)")
+A=np.arange(N)
+print("The input array is", A)
+start_exec8=time.time()
+start_cpu8=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,A))
+end_exec8=time.time()
+end_cpu8=time.process_time()
+print("the program execution time is", end_exec8-start_exec8)
+print("the cpu execution time is", end_cpu8-start_cpu8)
+print("The return array is", A)
+
+print("--(add_scal)(t1=A,t2=B,a=2)")
+A=np.arange(N)
+B=np.arange(N)
+print("The input arrays are A", A, "B", B)
+start_exec9=time.time()
+start_cpu9=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2))
+end_exec9=time.time()
+end_cpu9=time.process_time()
+print("the program execution time is", end_exec9-start_exec9)
+print("the cpu execution time is", end_cpu9-start_cpu9)
+print("The return arrays are A", A, "B", B)
+
+
+print("--input is iterable function list")
+start_exec10=time.time()
+start_cpu10=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func)
+end_exec10=time.time()
+end_cpu10=time.process_time()
+print("the program execution time is", end_exec10-start_exec10)
+print("the cpu execution time is", end_cpu10-start_cpu10)
+
+# def producer():
+# 	for i in range(6):
+# 		print('Produced %s' % i)
+# 		yield i
+#starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer())
+
+print("************************")
+print("parallel Future version:")
+print("************************")
+async def main():
+
+	print("--(sqrt)(i**2)for i in range(N)")
+	fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
+	res1=await fut1
+	#print(res1)
+
+	print("--(multi)(i,j) for i,j in zip(a,b)")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
+	res2=await fut2
+	#print(res2)
+
+	print("--(scal_arr)((i for i in b), A)")
+	A=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
+	res3=await fut3
+	#print(res3)
+
+	print("--(multi_list)((i,j) for i,j in zip(a,b))")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
+	res4=await fut4
+	#print(res4)
+
+	print("--(multi_2arr)((i for i in a), (j for j in b))")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
+	res5=await fut5
+	#print(res5)
+
+	print("--(multi_2arr)(b=B, a=A)")
+	A=np.arange(N)
+	B=np.arange(N, 2*N, 1)
+	print("The input arrays are A", A, "B", B)
+	fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(b=B, a=A))
+	res6=await fut6
+	print("The return arrays are A", A, "B", B)
+
+
+	print("--(scal)(2, (j for j in a))")
+	a=np.arange(N)
+	fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a)))
+	res7=await fut7
+	#print(res6)
+
+	print("--(scal)(2,t=A)")
+	A=np.arange(N)
+	print("The input array is", A)
+	fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,t=A))
+	res8=await fut8
+	print("The return array is", A)
+
+	print("--(scal)(2,A,B)")
+	A=np.arange(N)
+	B=np.arange(N)
+	print("The input arrays are A", A, "B", B)
+	fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B))
+	res9=await fut9
+	print("The return arrays are A", A, "B", B)
+
+	print("--input is iterable function list")
+	fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1)(g_func)
+	res10=await fut10
+	#print(res9)
+
+asyncio.run(main())
+
+starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="scal",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="func",view=displayPlot)
+
+starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot)

+ 19 - 0
starpupy/examples/starpu_py_parallel.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+STARPU_CALIBRATE=1 $(dirname $0)/execute.sh starpu_py_parallel.py $*
+

+ 63 - 0
starpupy/src/Makefile.am

@@ -0,0 +1,63 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu-notests.mk
+
+SUBDIRS =
+
+PYTHON_PY_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*py)
+PYTHON_PY_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC)))
+
+PYTHON_C_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*c)
+PYTHON_C_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC)))
+
+$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py
+	$(MKDIR_P) starpu
+	$(V_ln) $(LN_S) $< $@
+$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c
+	@$(MKDIR_P) starpu
+	$(V_ln) $(LN_S) $< $@
+
+all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD)
+	$(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS)
+
+install-exec-local:
+	@if test -d $(prefix)/lib/python* ; \
+	then	\
+		chmod u+w $(prefix)/lib/python* ; \
+		chmod u+w $(prefix)/lib/python*/site-packages ; \
+	fi
+	$(PYTHON) setup.py install
+
+if STARPU_BUILD_STARPUPY
+clean-local:
+	$(PYTHON) setup.py clean
+	rm -f starpu/*py starpu/*c
+endif
+
+distclean-local:
+	rm -rf build
+
+uninstall-local:
+	rm -rf $(prefix)/lib/python*/site-packages/starpu*
+	rm -rf $(prefix)/lib/python*/site-packages/tmp/starpu*
+
+EXTRA_DIST	=		\
+	delay.py		\
+	__init__.py	\
+	intermedia.py	\
+	joblib.py	\
+	starpu_task_wrapper.c

+ 2 - 1
starpupy/src/starpu/__init__.py

@@ -17,4 +17,5 @@
 
 
 from.starpupy import *
 from.starpupy import *
 from .delay import *
 from .delay import *
-from . import joblib
+#from . import joblib
+from .intermedia import *

+ 29 - 0
starpupy/src/delay.py

@@ -0,0 +1,29 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+from starpu import starpupy
+import starpu
+import asyncio
+from functools import partial
+
+def delayed(f=None,*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
+	# add options of task_submit
+	if f is None:
+		return partial(delayed, name=name, synchronous=synchronous, priority=priority, color=color, flops=flops, perfmodel=perfmodel)
+	def submit(*args):
+		fut = starpu.task_submit(name=name, synchronous=synchronous, priority=priority,\
+								 color=color, flops=flops, perfmodel=perfmodel)(f, *args)
+		return fut
+	return submit

+ 63 - 0
starpupy/src/intermedia.py

@@ -0,0 +1,63 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+from starpu import starpupy
+import os
+
+#class perfmodel
+class Perfmodel(object):
+	def __init__(self, symbol):
+		self.symbol=symbol
+		self.pstruct=starpupy.init_perfmodel(self.symbol)
+
+	def get_struct(self):
+		return self.pstruct
+
+	def __del__(self):
+	#def free_struct(self):
+		starpupy.free_perfmodel(self.pstruct)
+
+# generate the dictionary which contains the perfmodel symbol and its struct pointer
+dict_perf={}
+def dict_perf_generator(perfsymbol):
+	if dict_perf.get(perfsymbol)==None:
+		p=Perfmodel(perfsymbol)
+		dict_perf[perfsymbol]=p
+	else:
+		p=dict_perf[perfsymbol]
+	return p
+
+#add options in function task_submit
+def task_submit(*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
+	if perfmodel==None:
+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': None}
+	else:
+		p=dict_perf_generator(perfmodel)
+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': p.get_struct()}
+
+	def call_task_submit(f, *args):
+		fut=starpupy._task_submit(f, *args, dict_option)
+		return fut
+	return call_task_submit
+
+# dump performance model and show the plot
+def perfmodel_plot(perfmodel, view=True):
+	p=dict_perf[perfmodel]
+	starpupy.save_history_based_model(p.get_struct())
+	if view == True:
+		os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
+		os.system('gnuplot starpu_'+perfmodel+'.gp')
+		os.system('gv starpu_'+perfmodel+'.eps')

+ 324 - 0
starpupy/src/joblib.py

@@ -0,0 +1,324 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+import sys
+import types
+import joblib as jl
+from joblib import logger
+from joblib._parallel_backends import ParallelBackendBase
+from starpu import starpupy
+import starpu
+import asyncio
+import math
+import functools
+import numpy as np
+import inspect
+import threading
+
+BACKENDS={
+	#'loky': LokyBackend,
+}
+_backend = threading.local()
+
+# get the number of CPUs controlled by StarPU
+def cpu_count():
+	n_cpus=starpupy.cpu_worker_get_count()
+	return n_cpus
+
+# split a list ls into n_block numbers of sub-lists 
+def partition(ls, n_block):
+	if len(ls)>=n_block:
+		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
+		q1=math.ceil(len(ls)/n_block)
+		q2=math.floor(len(ls)/n_block)
+		n1=len(ls)%n_block
+		#n2=n_block-n1
+		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
+		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
+		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
+
+		L=L1+L2
+	else:
+		# if the block number is larger than the length of list, each element in the list is a sub-list
+		L=[ls[i:i+1] for i in range (len(ls))]
+	return L
+
+# split a two-dimension numpy matrix into n_block numbers of sub-matrices
+def array2d_split(a, n_block):
+	# decompose number of n_jobs to two integers multiply
+	c_tmp=math.floor(math.sqrt(n_block))
+	for i in range (c_tmp,0,-1):
+		if n_block%i==0:
+			c=i
+			r=int(n_block/c)
+			break
+	# split column
+	arr_split_c=np.array_split(a,c,0)
+	arr_split=[]
+	# split row
+	for i in range(c):
+		arr_split_r=np.array_split(arr_split_c[i],r,1)
+		for j in range(r):
+			arr_split.append(arr_split_r[j])
+	return arr_split
+
+
+def future_generator(iterable, n_jobs, dict_task):
+	# iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
+	#print("iterable type is ", type(iterable))
+	#print("iterable is", iterable)
+	# get the number of block
+	if n_jobs<-cpu_count()-1 or n_jobs>cpu_count():
+		raise SystemExit('Error: n_jobs is out of range')
+		#print("Error: n_jobs is out of range, number of CPUs is", cpu_count())
+	elif n_jobs<0:
+		n_block=cpu_count()+1+n_jobs
+	else:
+		n_block=n_jobs
+
+	# if arguments is tuple format
+	if type(iterable) is tuple:
+		# the function is always the first element
+		f=iterable[0]
+		# get the name of formal arguments of f
+		formal_args=inspect.getargspec(f).args
+		# get the arguments list
+		args=[]
+		# argument is arbitrary in iterable[1]
+		args=list(iterable[1])
+		# argument is keyword argument in iterable[2]
+		for i in range(len(formal_args)):
+			for j in iterable[2].keys():
+				if j==formal_args[i]:
+					args.append(iterable[2][j])
+		# check whether all arrays have the same size
+		l_arr=[]
+		# list of Future result
+		L_fut=[]
+		# split the vector
+		args_split=[]
+		for i in range(len(args)):
+			args_split.append([])
+			# if the array is an numpy array
+			if type(args[i]) is np.ndarray:
+				# one-dimension matrix
+				if args[i].ndim==1:
+					# split numpy array
+					args_split[i]=np.array_split(args[i],n_block)
+					# get the length of numpy array
+					l_arr.append(args[i].size)
+				# two-dimension matrix
+				elif args[i].ndim==2:
+					# split numpy 2D array
+					args_split[i]=array2d_split(args[i],n_block)
+			# if the array is a generator
+			elif isinstance(args[i],types.GeneratorType):
+				# split generator
+				args_split[i]=partition(list(args[i]),n_block)
+				# get the length of generator
+				l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i]))))
+		if len(set(l_arr))>1:
+			raise SystemExit('Error: all arrays should have the same size')
+		#print("args list is", args_split)
+		for i in range(n_block):
+			# generate the argument list
+			L_args=[]
+			for j in range(len(args)):
+				if type(args[j]) is np.ndarray or isinstance(args[j],types.GeneratorType):
+					L_args.append(args_split[j][i])
+				else:
+					L_args.append(args[j])
+			#print("L_args is", L_args)
+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
+				                  (f, *L_args)
+			L_fut.append(fut)
+		return L_fut
+
+	# if iterable is a generator or a list of function
+	else:
+		L=list(iterable)
+		#print(L)
+		# generate a list of function according to iterable
+		def lf(ls):
+			L_func=[]
+			for i in range(len(ls)):
+				# the first element is the function
+				f=ls[i][0]
+				# the second element is the args list of a type tuple
+				L_args=list(ls[i][1])
+				# generate a list of function
+				L_func.append(f(*L_args))
+			return L_func
+
+		# generate the split function list
+		L_split=partition(L,n_block)
+		# operation in each split list
+		L_fut=[]
+		for i in range(len(L_split)):
+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
+				                  (lf, L_split[i])
+			L_fut.append(fut)
+		return L_fut
+
+class Parallel(object):
+	def __init__(self, mode="normal", perfmodel=None, end_msg=None,\
+			 name=None, synchronous=0, priority=0, color=None, flops=None,\
+	         n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
+	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
+	         mmap_mode='r', prefer=None, require=None):
+		#active_backend= get_active_backend()
+		# nesting_level = active_backend.nesting_level
+
+		# if backend is None:
+		# 	backend = active_backend
+
+		# else:
+		# 	try:
+		# 		backend_factory = BACKENDS[backend]
+		# 	except KeyError as e:
+		# 		raise ValueError("Invalid backend: %s, expected one of %r"
+  #                                % (backend, sorted(BACKENDS.keys()))) from e
+		# 	backend = backend_factory(nesting_level=nesting_level)
+
+		if n_jobs is None:
+			n_jobs = 1
+
+		self.mode=mode
+		self.perfmodel=perfmodel
+		self.end_msg=end_msg
+		self.name=name
+		self.synchronous=synchronous
+		self.priority=priority
+		self.color=color
+		self.flops=flops
+		self.n_jobs=n_jobs
+		self._backend=backend
+
+	def print_progress(self):
+		#pass
+		print("", starpupy.task_nsubmitted())
+
+	def __call__(self,iterable):
+		#generate the dictionary of task_submit
+		dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel}
+		if hasattr(self._backend, 'start_call'):
+			self._backend.start_call()
+		# the mode normal, user can call the function directly without using async
+		if self.mode=="normal":
+			async def asy_main():
+				L_fut=future_generator(iterable, self.n_jobs, dict_task)
+				res=[]
+				for i in range(len(L_fut)):
+					L_res=await L_fut[i]
+					res.extend(L_res)
+				#print(res)
+				#print("type of result is", type(res))
+				return res
+			#asyncio.run(asy_main())
+			#retVal=asy_main
+			loop = asyncio.get_event_loop()
+			results = loop.run_until_complete(asy_main())
+			retVal = results
+		# the mode future, user needs to use asyncio module and await the Future result in main function
+		elif self.mode=="future":
+			L_fut=future_generator(iterable, self.n_jobs, dict_task)
+			fut=asyncio.gather(*L_fut)
+			if self.end_msg!=None:
+				fut.add_done_callback(functools.partial(print, self.end_msg))
+			retVal=fut
+		if hasattr(self._backend, 'stop_call'):
+			self._backend.stop_call()
+		return retVal
+
+def delayed(function):
+	def delayed_function(*args, **kwargs):
+		return function, args, kwargs
+	return delayed_function
+
+
+######################################################################
+__version__ = jl.__version__
+
+class Memory(jl.Memory):
+	def __init__(self,location=None, backend='local', cachedir=None,
+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
+                 backend_options=None):
+		super(Memory, self).__init__(location=None, backend='local', cachedir=None,
+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
+                 backend_options=None)
+
+
+def dump(value, filename, compress=0, protocol=None, cache_size=None):
+	return jl.dump(value, filename, compress, protocol, cache_size)
+
+def load(filename, mmap_mode=None):
+	return jl.load(filename, mmap_mode)
+
+def hash(obj, hash_name='md5', coerce_mmap=False):
+	return jl.hash(obj, hash_name, coerce_mmap)
+
+def register_compressor(compressor_name, compressor, force=False):
+	return jl.register_compressor(compressor_name, compressor, force)
+
+def effective_n_jobs(n_jobs=-1):
+	return cpu_count()
+
+def get_active_backend():
+	backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
+	if backend_and_jobs is not None:
+		backend,n_jobs=backend_and_jobs
+		return backend
+	backend = BACKENDS[loky](nesting_level=0)
+	return backend
+
+class parallel_backend(object):
+	def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None,
+                 **backend_params):
+		if isinstance(backend, str):
+			backend = BACKENDS[backend](**backend_params)
+
+		current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
+		if backend.nesting_level is None:
+			if current_backend_and_jobs is None:
+				nesting_level = 0
+			else:
+				nesting_level = current_backend_and_jobs[0].nesting_level
+
+			backend.nesting_level = nesting_level
+
+		# Save the backends info and set the active backend
+		self.old_backend_and_jobs = current_backend_and_jobs
+		self.new_backend_and_jobs = (backend, n_jobs)
+
+		_backend.backend_and_jobs = (backend, n_jobs)
+
+	def __enter__(self):
+		return self.new_backend_and_jobs
+
+	def __exit__(self, type, value, traceback):
+		self.unregister()
+
+	def unregister(self):
+		if self.old_backend_and_jobs is None:
+			if getattr(_backend, 'backend_and_jobs', None) is not None:
+				del _backend.backend_and_jobs
+		else:
+			_backend.backend_and_jobs = self.old_backend_and_jobs
+
+def register_parallel_backend(name, factory):
+	BACKENDS[name] = factory

+ 23 - 0
starpupy/src/setup.cfg.in

@@ -0,0 +1,23 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+[build]
+build-platlib=build
+build-temp=build/tmp
+
+[install]
+prefix=@prefix@
+
+

+ 40 - 0
starpupy/src/setup.py.in

@@ -0,0 +1,40 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+from distutils.core import setup, Extension
+
+numpy_dir = '@PYTHON_NUMPY_DIR@'
+if numpy_dir != '':
+    numpy_include_dir = [numpy_dir]
+else:
+    numpy_include_dir = []
+starpupy = Extension('starpu.starpupy',
+                     include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include'] + numpy_include_dir,
+                     libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@'],
+                     library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'],
+	             sources = ['starpu/starpu_task_wrapper.c'])
+
+setup(
+    name = 'starpupy',
+    version = '0.5',
+    description = 'Python bindings for StarPU',
+    author = 'StarPU team',
+    author_email = 'starpu-devel@lists.gforge.inria.fr',
+    url = 'https://starpu.gitlabpages.inria.fr/',
+    license = 'GPL',
+    platforms = 'posix',
+    ext_modules = [starpupy],
+    packages = ['starpu'],
+    )

+ 0 - 13
starpupy/src/starpu/Makefile

@@ -1,13 +0,0 @@
-PYTHON ?= python3
-
-CPPFLAGS = $(shell $(PYTHON)-config --includes) -Wall -O2 -g
-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
-LDLIBS += $(shell pkg-config --libs starpu-1.3)
-
-all: starpupy.so
-
-starpupy.so: starpu_task_wrapper.c Makefile
-	$(CC) -fPIC $(CFLAGS) $< -o $@ -shared  $(CPPFLAGS) $(LDLIBS)
-
-clean:
-	rm -f starpupy.so

+ 0 - 147
starpupy/src/starpu/joblib.py

@@ -1,147 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-from starpu import starpupy
-import asyncio
-import math
-import os
-import pickle
-import json
-import functools
-
-# get the number of CPUs controlled by StarPU
-n_cpus=starpupy.cpu_worker_get_count()
-
-#class perfmodel
-class Perfmodel(object):
-	def __init__(self, symbol):
-		self.symbol=symbol
-		self.pstruct=starpupy.init_perfmodel(self.symbol)
-
-	def get_struct(self):
-		return self.pstruct
-
-	def __del__(self):
-	#def free_struct(self):
-		starpupy.free_perfmodel(self.pstruct)
-
-# split a list ls into n_block numbers of sub-lists 
-def partition(ls, n_block):
-	if len(ls)>=n_block:
-		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
-		q1=math.ceil(len(ls)/n_block)
-		q2=math.floor(len(ls)/n_block)
-		n1=len(ls)%n_block
-		#n2=n_block-n1
-		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
-		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
-		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
-
-		L=L1+L2
-	else:
-		# if the block number is larger than the length of list, each element in the list is a sub-list
-		L=[ls[i:i+1] for i in range (len(ls))]
-	return L
-
-# generate the dictionary which contains the perfmodel symbol and its struct pointer
-dict_perf={}
-def dict_perf_generator(perfsymbol):
-	if dict_perf.get(perfsymbol)==None:
-		p=Perfmodel(perfsymbol)
-		dict_perf[perfsymbol]=p
-	else:
-		p=dict_perf[perfsymbol]
-	return p
-
-def future_generator(g, n_jobs, perfsymbol):
-	# g is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
-	L=list(g)
-	# generate a list of function according to g
-	def lf(ls):
-		L_func=[]
-		for i in range(len(ls)):
-			# the first element is the function
-			f=ls[i][0]
-			# the second element is the args list of a type tuple
-			L_args=list(ls[i][1])
-			# generate a list of function
-			L_func.append(f(*L_args))
-		return L_func
-	# get the number of block
-	if n_jobs<-n_cpus-1 or n_jobs>n_cpus:
-		print("Error: n_jobs is out of range, number of CPUs is", n_cpus)
-	elif n_jobs<0:
-		n_block=n_cpus+1+n_jobs
-	else:
-		n_block=n_jobs
-	# generate the split function list
-	L_split=partition(L,n_block)
-	# operation in each split list
-	L_fut=[]
-	for i in range(len(L_split)):
-		if perfsymbol==None:
-			fut=starpupy.task_submit(lf, L_split[i])
-			L_fut.append(fut)
-		else:
-			p=dict_perf_generator(perfsymbol)
-			fut=starpupy.task_submit(lf, L_split[i], p.get_struct())
-			L_fut.append(fut)
-	return L_fut
-
-def parallel(*, mode="normal", n_jobs=1, perfmodel=None, end_msg=None,\
-	         backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
-	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
-	         mmap_mode='r', prefer=None, require=None):
-	# the mode normal, user can call the function directly without using async
-	if mode=="normal":
-		def parallel_normal(g):
-			async def asy_main():
-				L_fut=future_generator(g, n_jobs, perfmodel)
-				res=[]
-				for i in range(len(L_fut)):
-					L_res=await L_fut[i]
-					res.extend(L_res)
-				#print(res)
-				return res
-			asyncio.run(asy_main())
-			return asy_main
-		return parallel_normal
-	# the mode future, user needs to use asyncio module and await the Future result in main function
-	elif mode=="future":
-		def parallel_future(g):
-			L_fut=future_generator(g, n_jobs, perfmodel)
-			fut=asyncio.gather(*L_fut)
-			if end_msg==None:
-				return fut
-			else:
-				fut.add_done_callback(functools.partial(print, end_msg))
-				return fut
-			#return fut
-		return parallel_future
-
-def delayed(f):
-	def delayed_func(*args):
-		return f, args
-	return delayed_func
-
-
-######################################################################
-# dump performance model
-def perfmodel_plot(perfmodel):
-	p=dict_perf[perfmodel]
-	starpupy.save_history_based_model(p.get_struct())
-	os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
-	os.system('gnuplot starpu_'+perfmodel+'.gp')
-	os.system('gv starpu_'+perfmodel+'.eps')

+ 0 - 416
starpupy/src/starpu/starpu_task_wrapper.c

@@ -1,416 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <starpu.h>
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-/*macro*/
-#if defined(Py_DEBUG) || defined(DEBUG)
-extern void _Py_CountReferences(FILE*);
-#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
-#else
-#define CURIOUS(x)
-#endif
-#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
-#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
-#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
-#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
-/*******/
-
-/*********************Functions passed in task_submit wrapper***********************/
-
-static PyObject* asyncio_module; /*python asyncio library*/
-
-/*structure contains parameters which are passed to starpu_task.cl_arg*/
-struct codelet_struct { 
-    PyObject* f; /*the python function passed in*/
-    PyObject* argList; /*argument list of python function passed in*/
-    PyObject* rv; /*return value when using PyObject_CallObject call the function f*/
-    PyObject* fut; /*asyncio.Future*/
-    PyObject* lp; /*asyncio.Eventloop*/
-};
-typedef struct codelet_struct codelet_st;
-
-/*function passed to starpu_codelet.cpu_func*/
-void codelet_func(void *buffers[], void *cl_arg){
-
-    codelet_st* cst = (codelet_st*) cl_arg;
-
-    /*make sure we own the GIL*/
-    PyGILState_STATE state = PyGILState_Ensure();
-
-    /*verify that the function is a proper callable*/
-    if (!PyCallable_Check(cst->f)) {
-
-        printf("py_callback: expected a callable function\n"); 
-        exit(1);
-    }
-    
-    /*check the arguments of python function passed in*/
-    for (int i=0; i < PyTuple_Size(cst->argList); i++){
-      PyObject* obj=PyTuple_GetItem(cst->argList, i);
-      const char* tp = Py_TYPE(obj)->tp_name;
-      if(strcmp(tp, "_asyncio.Future") == 0){
-        /*if one of arguments is Future, get its result*/
-        PyObject * fut_result = PyObject_CallMethod(obj, "result", NULL);
-        /*replace the Future argument to its result*/
-        PyTuple_SetItem(cst->argList, i, fut_result);
-      }
-    }
-
-    /*call the python function*/
-    PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
-    cst->rv=pRetVal;
-
-    //Py_DECREF(cst->f);
-
-    /*restore previous GIL state*/
-    PyGILState_Release(state);
-
-}
-
-/*function passed to starpu_task.callback_func*/
-void cb_func(void *v){
-
-	struct starpu_task *task=starpu_task_get_current();
-    codelet_st* cst = (codelet_st*) task->cl_arg;
-
-    /*make sure we own the GIL*/
-    PyGILState_STATE state = PyGILState_Ensure();
-
-    /*set the Future result and mark the Future as done*/
-    PyObject * set_result = PyObject_GetAttrString(cst->fut, "set_result");
-    PyObject * loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
-
-    Py_DECREF(loop_callback);
-    Py_DECREF(set_result);
-    Py_DECREF(cst->rv);
-    Py_DECREF(cst->fut);
-    Py_DECREF(cst->lp);
-
-    //Py_DECREF(perfmodel);
-    struct starpu_codelet * func_cl=(struct starpu_codelet *) task->cl;
-    if (func_cl->model != NULL){
-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
-      PyObject* perfmodel=PyCapsule_New(perf, "Perf", 0);
-      Py_DECREF(perfmodel);
-    }
-
-    for(int i = 0; i < PyTuple_Size(cst->argList); i++){
-        Py_DECREF(PyTuple_GetItem(cst->argList, i));
-    }
-    Py_DECREF(cst->argList);
-
-    /*restore previous GIL state*/
-    PyGILState_Release(state);
-
-    /*deallocate task*/
-    free(task->cl);
-	  free(task->cl_arg);
-
-}
-
-/***********************************************************************************/
-/*PyObject*->struct starpu_task**/
-static struct starpu_task *PyTask_AsTask(PyObject* obj){
-  return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
-}
-
-/* destructor function for task */
-static void del_Task(PyObject *obj) {
-  struct starpu_task* obj_task=PyTask_AsTask(obj);
-  obj_task->destroy=1; /*XXX we should call starpu task destroy*/
-}
-
-/*struct starpu_task*->PyObject**/
-static PyObject *PyTask_FromTask(struct starpu_task *task) {
-  return PyCapsule_New(task, "Task", del_Task);
-}
-
-/***********************************************************************************/
-static size_t sizebase (struct starpu_task * task, unsigned nimpl){
-
-  codelet_st* cst = (codelet_st*) task->cl_arg;
-
-  PyObject* obj=PyTuple_GetItem(cst->argList, 0);
-  /*get the length of arguments*/
-  int n = PyList_Size(obj);
-
-  return n;
-}
-
-static void del_Perf(PyObject *obj){
-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
-  free(perf);
-}
-/*initialization of perfmodel*/
-static PyObject* init_perfmodel(PyObject *self, PyObject *args){
-
-  char* sym;
-
-  if (!PyArg_ParseTuple(args, "s", &sym))
-    return NULL;
-
-  /*allocate a perfmodel structure*/
-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
-
-  /*get the perfmodel symbol*/
-  char* p =strdup(sym);
-  perf->symbol=p;
-  perf->type=STARPU_HISTORY_BASED;
-
-  /*struct perfmodel*->PyObject**/
-  PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
-
-  return perfmodel;
-}
-
-
-/*free perfmodel*/
-static PyObject* free_perfmodel(PyObject *self, PyObject *args){
-
-  PyObject* perfmodel;
-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
-    return NULL;
-
-  /*PyObject*->struct perfmodel**/
-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-
-  starpu_save_history_based_model(perf);
-  //starpu_perfmodel_unload_model(perf);
-  free(perf->symbol);
-  starpu_perfmodel_deinit(perf);
-  free(perf);
-
-  /*return type is void*/
-  Py_INCREF(Py_None);
-  return Py_None;
-}
-
-static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args){
-
-  PyObject* perfmodel;
-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
-    return NULL;
-
-  /*PyObject*->struct perfmodel**/
-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-
-  starpu_save_history_based_model(perf);
-
-  /*return type is void*/
-  Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*****************************Wrappers of StarPU methods****************************/
-/*wrapper submit method*/
-static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args){
-
-    /*get the running Event loop*/
-    PyObject* loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
-    /*create a asyncio.Future object*/
-    PyObject* fut = PyObject_CallMethod(loop, "create_future", NULL);
-
-    /*first argument in args is always the python function passed in*/
-    PyObject* func_py = PyTuple_GetItem(args, 0);
-    Py_INCREF(func_py);
-
-	  /*allocate a task structure and initialize it with default values*/
-    struct starpu_task *task=starpu_task_create();
-    task->destroy=0;
-
-    PyObject* PyTask=PyTask_FromTask(task);
-
-    /*set one of fut attribute to the task pointer*/
-    PyObject_SetAttrString(fut, "starpu_task", PyTask);
-    /*check the arguments of python function passed in*/
-    for (int i=1; i < PyTuple_Size(args); i++){
-      PyObject* obj=PyTuple_GetItem(args, i);
-      const char* tp = Py_TYPE(obj)->tp_name;
-      if(strcmp(tp, "_asyncio.Future") == 0){
-        /*if one of arguments is Future, get its corresponding task*/
-        PyObject* fut_task=PyObject_GetAttrString(obj, "starpu_task");
-        /*declare task dependencies between the current task and the corresponding task of Future argument*/
-        starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
-
-        Py_DECREF(fut_task);
-      }
-    }
-    
-    /*allocate a codelet structure*/
-    struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
-    /*initialize func_cl with default values*/
-    starpu_codelet_init(func_cl);
-    func_cl->cpu_func=&codelet_func;
-    
-    /*check whether the last argument in args is the perfmodel*/
-    PyObject* perfmodel=PyTuple_GetItem(args, PyTuple_Size(args)-1);
-    const char* tp_perf = Py_TYPE(perfmodel)->tp_name;
-    if (strcmp(tp_perf, "PyCapsule")==0){
-      /*PyObject*->struct perfmodel**/
-      struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-      func_cl->model=perf;
-      Py_INCREF(perfmodel);
-    }
-    
-
-    /*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
-    codelet_st *cst = (codelet_st*)malloc(sizeof(codelet_st));
-    cst->f = func_py;
-    cst->fut = fut;
-    cst->lp = loop;
-    
-    Py_INCREF(fut);
-    Py_INCREF(loop);
-
-    /*pass args in argList*/
-    if (PyTuple_Size(args)==1 || (PyTuple_Size(args)==2 && strcmp(tp_perf, "PyCapsule")==0))/*function no arguments*/
-      cst->argList = PyTuple_New(0);
-    else if(PyTuple_Size(args)>2 && strcmp(tp_perf, "PyCapsule")==0){/*function has arguments and the last argument in args is the perfmodel*/
-      cst->argList = PyTuple_New(PyTuple_Size(args)-2);
-      for (int i=0; i < PyTuple_Size(args)-2; i++){
-        PyObject* tmp=PyTuple_GetItem(args, i+1);
-        PyTuple_SetItem(cst->argList, i, tmp);
-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
-      }
-    }
-    else{/*function has arguments and no perfmodel*/
-      cst->argList = PyTuple_New(PyTuple_Size(args)-1);
-      for (int i=0; i < PyTuple_Size(args)-1; i++){
-        PyObject* tmp=PyTuple_GetItem(args, i+1);
-        PyTuple_SetItem(cst->argList, i, tmp);
-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
-      }
-    }
-
-    task->cl=func_cl;
-    task->cl_arg=cst;
-    /*call starpu_task_submit method*/
-    starpu_task_submit(task);
-    task->callback_func=&cb_func;
-    if (strcmp(tp_perf, "PyCapsule")==0){
-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
-      perf->size_base=&sizebase;
-    }
-
-    //printf("the number of reference is %ld\n", Py_REFCNT(func_py));
-    //_Py_PrintReferences(stderr);
-    //COUNTREFS();
-    return fut;
-
-}
-
-/*wrapper wait for all method*/
-static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_task_wait_for_all method*/
-	Py_BEGIN_ALLOW_THREADS
-	starpu_task_wait_for_all();
-	Py_END_ALLOW_THREADS
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper pause method*/
-static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_pause method*/
-	starpu_pause();
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper resume method*/
-static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_resume method*/
-	starpu_resume();
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper get count cpu method*/
-static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args){
-
-  /*call starpu_cpu_worker_get_count method*/
-  int num_cpu=starpu_cpu_worker_get_count();
-
-  /*return type is unsigned*/
-  return Py_BuildValue("I", num_cpu);
-}
-
-/***********************************************************************************/
-
-/***************The module’s method table and initialization function**************/
-/*method table*/
-static PyMethodDef starpupyMethods[] = 
-{ 
-  {"task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
-  {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
-  {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
-  {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
-  {"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
-  {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
-  {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
-  {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
-  {NULL, NULL}
-};
-
-/*deallocation function*/
-static void starpupyFree(void* self){
-	starpu_shutdown();
-  Py_DECREF(asyncio_module);
-  //COUNTREFS();
-}
-
-/*module definition structure*/
-static struct PyModuleDef starpupymodule={
-  PyModuleDef_HEAD_INIT,
-  "starpupy", /*name of module*/
-  NULL,
-  -1,
-  starpupyMethods, /*method table*/
-  NULL,
-  NULL,
-  NULL,
-  starpupyFree /*deallocation function*/
-};
-
-/*initialization function*/
-PyMODINIT_FUNC
-PyInit_starpupy(void)
-{
-    PyEval_InitThreads();
-    /*starpu initialization*/
-	  starpu_init(NULL);
-    /*python asysncio import*/
-    asyncio_module = PyImport_ImportModule("asyncio");
-    /*module import initialization*/
-    return PyModule_Create(&starpupymodule);
-}
-/***********************************************************************************/

+ 536 - 0
starpupy/src/starpu_task_wrapper.c

@@ -0,0 +1,536 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#undef NDEBUG
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <starpu.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+#include <numpy/arrayobject.h>
+#endif
+
+/*macro*/
+#if defined(Py_DEBUG) || defined(DEBUG)
+extern void _Py_CountReferences(FILE*);
+#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
+#else
+#define CURIOUS(x)
+#endif
+#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
+#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
+#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
+#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
+/*******/
+
+/*********************Functions passed in task_submit wrapper***********************/
+
+static PyObject *asyncio_module; /*python asyncio library*/
+
+/*structure contains parameters which are passed to starpu_task.cl_arg*/
+struct codelet_args
+{
+	PyObject *f; /*the python function passed in*/
+	PyObject *argList; /*argument list of python function passed in*/
+	PyObject *rv; /*return value when using PyObject_CallObject call the function f*/
+	PyObject *fut; /*asyncio.Future*/
+	PyObject *lp; /*asyncio.Eventloop*/
+};
+
+/*function passed to starpu_codelet.cpu_func*/
+void codelet_func(void *buffers[], void *cl_arg)
+{
+	struct codelet_args *cst = (struct codelet_args*) cl_arg;
+
+	/*make sure we own the GIL*/
+	PyGILState_STATE state = PyGILState_Ensure();
+
+	/*verify that the function is a proper callable*/
+	if (!PyCallable_Check(cst->f))
+	{
+		printf("py_callback: expected a callable function\n");
+		exit(1);
+	}
+
+	/*check the arguments of python function passed in*/
+	int i;
+	for(i=0; i < PyTuple_Size(cst->argList); i++)
+	{
+		PyObject *obj = PyTuple_GetItem(cst->argList, i);
+		const char *tp = Py_TYPE(obj)->tp_name;
+		if(strcmp(tp, "_asyncio.Future") == 0)
+		{
+			/*if one of arguments is Future, get its result*/
+			PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL);
+			/*replace the Future argument to its result*/
+			PyTuple_SetItem(cst->argList, i, fut_result);
+		}
+		/*else if (strcmp(tp, "numpy.ndarray")==0)
+		  {
+		  printf("array is %p\n", obj);
+		  }*/
+	}
+
+	/*call the python function*/
+	PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
+	//const char *tp = Py_TYPE(pRetVal)->tp_name;
+	//printf("return value type is %s\n", tp);
+	cst->rv = pRetVal;
+
+	//Py_DECREF(cst->f);
+
+	/*restore previous GIL state*/
+	PyGILState_Release(state);
+}
+
+/*function passed to starpu_task.callback_func*/
+void cb_func(void *v)
+{
+	struct starpu_task *task = starpu_task_get_current();
+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
+
+	/*make sure we own the GIL*/
+	PyGILState_STATE state = PyGILState_Ensure();
+
+	/*set the Future result and mark the Future as done*/
+	PyObject *set_result = PyObject_GetAttrString(cst->fut, "set_result");
+	PyObject *loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
+
+	Py_DECREF(loop_callback);
+	Py_DECREF(set_result);
+	Py_DECREF(cst->rv);
+	Py_DECREF(cst->fut);
+	Py_DECREF(cst->lp);
+	Py_DECREF(cst->argList);
+
+	//Py_DECREF(perfmodel);
+	struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl;
+	if (func_cl->model != NULL)
+	{
+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
+		PyObject *perfmodel=PyCapsule_New(perf, "Perf", 0);
+		Py_DECREF(perfmodel);
+	}
+
+	/*restore previous GIL state*/
+	PyGILState_Release(state);
+
+	/*deallocate task*/
+	free(task->cl);
+	free(task->cl_arg);
+}
+
+/***********************************************************************************/
+/*PyObject*->struct starpu_task**/
+static struct starpu_task *PyTask_AsTask(PyObject *obj)
+{
+	return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
+}
+
+/* destructor function for task */
+static void del_Task(PyObject *obj)
+{
+	struct starpu_task *obj_task=PyTask_AsTask(obj);
+	obj_task->destroy=1; /*XXX we should call starpu task destroy*/
+}
+
+/*struct starpu_task*->PyObject**/
+static PyObject *PyTask_FromTask(struct starpu_task *task)
+{
+	return PyCapsule_New(task, "Task", del_Task);
+}
+
+/***********************************************************************************/
+static size_t sizebase (struct starpu_task *task, unsigned nimpl)
+{
+	int n=0;
+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
+
+	/*get the result of function*/
+	PyObject *obj=cst->rv;
+	/*get the length of result*/
+	const char *tp = Py_TYPE(obj)->tp_name;
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+	/*if the result is a numpy array*/
+	if (strcmp(tp, "numpy.ndarray")==0)
+		n = PyArray_SIZE(obj);
+	else
+#endif
+	/*if the result is a list*/
+	if (strcmp(tp, "list")==0)
+		n = PyList_Size(obj);
+	/*else error*/
+	else
+	{
+		printf("starpu_perfmodel::size_base: the type of function result is unrecognized\n");
+		exit(1);
+	}
+	return n;
+}
+
+static void del_Perf(PyObject *obj)
+{
+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
+	free(perf);
+}
+
+/*initialization of perfmodel*/
+static PyObject* init_perfmodel(PyObject *self, PyObject *args)
+{
+	char *sym;
+
+	if (!PyArg_ParseTuple(args, "s", &sym))
+		return NULL;
+
+	/*allocate a perfmodel structure*/
+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
+
+	/*get the perfmodel symbol*/
+	char *p =strdup(sym);
+	perf->symbol=p;
+	perf->type=STARPU_HISTORY_BASED;
+
+	/*struct perfmodel*->PyObject**/
+	PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
+
+	return perfmodel;
+}
+
+/*free perfmodel*/
+static PyObject* free_perfmodel(PyObject *self, PyObject *args)
+{
+	PyObject *perfmodel;
+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
+		return NULL;
+
+	/*PyObject*->struct perfmodel**/
+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+
+	starpu_save_history_based_model(perf);
+	//starpu_perfmodel_unload_model(perf);
+	//free(perf->symbol);
+	starpu_perfmodel_deinit(perf);
+	free(perf);
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args)
+{
+	PyObject *perfmodel;
+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
+		return NULL;
+
+	/*PyObject*->struct perfmodel**/
+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+
+	starpu_save_history_based_model(perf);
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*****************************Wrappers of StarPU methods****************************/
+/*wrapper submit method*/
+static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
+{
+	/*get the running Event loop*/
+	PyObject *loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
+	/*create a asyncio.Future object*/
+	PyObject *fut = PyObject_CallMethod(loop, "create_future", NULL);
+
+	/*first argument in args is always the python function passed in*/
+	PyObject *func_py = PyTuple_GetItem(args, 0);
+	Py_INCREF(func_py);
+
+	/*allocate a task structure and initialize it with default values*/
+	struct starpu_task *task=starpu_task_create();
+	task->destroy=0;
+
+	PyObject *PyTask=PyTask_FromTask(task);
+
+	/*set one of fut attribute to the task pointer*/
+	PyObject_SetAttrString(fut, "starpu_task", PyTask);
+	/*check the arguments of python function passed in*/
+	int i;
+	for(i=1; i < PyTuple_Size(args)-1; i++)
+	{
+		PyObject *obj=PyTuple_GetItem(args, i);
+		const char* tp = Py_TYPE(obj)->tp_name;
+		if(strcmp(tp, "_asyncio.Future") == 0)
+		{
+			/*if one of arguments is Future, get its corresponding task*/
+			PyObject *fut_task=PyObject_GetAttrString(obj, "starpu_task");
+			/*declare task dependencies between the current task and the corresponding task of Future argument*/
+			starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
+
+			Py_DECREF(fut_task);
+		}
+	}
+
+	/*allocate a codelet structure*/
+	struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
+	/*initialize func_cl with default values*/
+	starpu_codelet_init(func_cl);
+	func_cl->cpu_funcs[0]=&codelet_func;
+	func_cl->cpu_funcs_name[0]="codelet_func";
+
+	/*check whether the option perfmodel is None*/
+	PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1);/*the last argument is the option dictionary*/
+	PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel");
+	const char *tp_perf = Py_TYPE(perfmodel)->tp_name;
+	if (strcmp(tp_perf, "PyCapsule")==0)
+	{
+		/*PyObject*->struct perfmodel**/
+		struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+		func_cl->model=perf;
+		Py_INCREF(perfmodel);
+	}
+
+	/*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
+	struct codelet_args *cst = (struct codelet_args*)malloc(sizeof(struct codelet_args));
+	cst->f = func_py;
+	cst->fut = fut;
+	cst->lp = loop;
+
+	Py_INCREF(fut);
+	Py_INCREF(loop);
+
+	/*pass args in argList*/
+	if (PyTuple_Size(args)==2)/*function no arguments*/
+		cst->argList = PyTuple_New(0);
+	else
+	{/*function has arguments*/
+		cst->argList = PyTuple_New(PyTuple_Size(args)-2);
+		int i;
+		for(i=0; i < PyTuple_Size(args)-2; i++)
+		{
+			PyObject *tmp=PyTuple_GetItem(args, i+1);
+			PyTuple_SetItem(cst->argList, i, tmp);
+			Py_INCREF(PyTuple_GetItem(cst->argList, i));
+		}
+	}
+
+	task->cl=func_cl;
+	task->cl_arg=cst;
+
+	/*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None*/
+	/*const char * name*/
+	PyObject *PyName = PyDict_GetItemString(dict_option, "name");
+	const char *name_type = Py_TYPE(PyName)->tp_name;
+	if (strcmp(name_type, "NoneType")!=0)
+	{
+		PyObject *pStrObj = PyUnicode_AsUTF8String(PyName);
+		char* name_str = PyBytes_AsString(pStrObj);
+		char* name = strdup(name_str);
+		//printf("name is %s\n", name);
+		task->name=name;
+		Py_DECREF(pStrObj);
+	}
+
+	/*unsigned synchronous:1*/
+	PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous");
+	unsigned sync=PyLong_AsUnsignedLong(PySync);
+	//printf("sync is %u\n", sync);
+	task->synchronous=sync;
+
+	/*int priority*/
+	PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority");
+	int prio=PyLong_AsLong(PyPrio);
+	//printf("prio is %d\n", prio);
+	task->priority=prio;
+
+	/*unsigned color*/
+	PyObject *PyColor = PyDict_GetItemString(dict_option, "color");
+	const char *color_type = Py_TYPE(PyColor)->tp_name;
+	if (strcmp(color_type, "NoneType")!=0)
+	{
+		unsigned color=PyLong_AsUnsignedLong(PyColor);
+		//printf("color is %u\n", color);
+		task->color=color;
+	}
+
+	/*double flops*/
+	PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops");
+	const char *flops_type = Py_TYPE(PyFlops)->tp_name;
+	if (strcmp(flops_type, "NoneType")!=0)
+	{
+		double flops=PyFloat_AsDouble(PyFlops);
+		//printf("flops is %f\n", flop);
+		task->flops=flops;
+	}
+
+	task->callback_func=&cb_func;
+
+	/*call starpu_task_submit method*/
+	Py_BEGIN_ALLOW_THREADS
+		int ret = starpu_task_submit(task);
+		assert(ret==0);
+	Py_END_ALLOW_THREADS
+
+	if (strcmp(tp_perf, "PyCapsule")==0)
+	{
+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
+		perf->size_base=&sizebase;
+	}
+
+	//printf("the number of reference is %ld\n", Py_REFCNT(func_py));
+	//_Py_PrintReferences(stderr);
+	//COUNTREFS();
+	return fut;
+}
+
+/*wrapper wait for all method*/
+static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_task_wait_for_all method*/
+	Py_BEGIN_ALLOW_THREADS
+		starpu_task_wait_for_all();
+	Py_END_ALLOW_THREADS
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper pause method*/
+static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_pause method*/
+	starpu_pause();
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper resume method*/
+static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_resume method*/
+	starpu_resume();
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper get count cpu method*/
+static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_cpu_worker_get_count method*/
+	int num_cpu=starpu_cpu_worker_get_count();
+
+	/*return type is unsigned*/
+	return Py_BuildValue("I", num_cpu);
+}
+
+/*wrapper get min priority method*/
+static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_sched_get_min_priority*/
+	int min_prio=starpu_sched_get_min_priority();
+
+	/*return type is int*/
+	return Py_BuildValue("i", min_prio);
+}
+
+/*wrapper get max priority method*/
+static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_sched_get_max_priority*/
+	int max_prio=starpu_sched_get_max_priority();
+
+	/*return type is int*/
+	return Py_BuildValue("i", max_prio);
+}
+
+/*wrapper get the number of no completed submitted tasks method*/
+static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_task_nsubmitted*/
+	int num_task=starpu_task_nsubmitted();
+
+	/*Return the number of submitted tasks which have not completed yet */
+	return Py_BuildValue("i", num_task);
+}
+/***********************************************************************************/
+
+/***************The module’s method table and initialization function**************/
+/*method table*/
+static PyMethodDef starpupyMethods[] =
+{
+	{"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
+	{"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
+	{"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
+	{"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
+	{"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
+	{"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
+	{"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
+	{"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
+	{"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/
+	{"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/
+	{"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/
+	{NULL, NULL}
+};
+
+/*deallocation function*/
+static void starpupyFree(void *self)
+{
+	starpu_shutdown();
+	Py_DECREF(asyncio_module);
+	//COUNTREFS();
+}
+
+/*module definition structure*/
+static struct PyModuleDef starpupymodule =
+{
+	PyModuleDef_HEAD_INIT,
+	"starpupy", /*name of module*/
+	NULL,
+	-1,
+	starpupyMethods, /*method table*/
+	NULL,
+	NULL,
+	NULL,
+	starpupyFree /*deallocation function*/
+};
+
+/*initialization function*/
+PyMODINIT_FUNC
+PyInit_starpupy(void)
+{
+	PyEval_InitThreads();
+	/*starpu initialization*/
+	int ret = starpu_init(NULL);
+	assert(ret==0);
+	/*python asysncio import*/
+	asyncio_module = PyImport_ImportModule("asyncio");
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+	/*numpy import array*/
+	import_array();
+#endif
+	/*module import initialization*/
+	return PyModule_Create(&starpupymodule);
+}
+/***********************************************************************************/

+ 0 - 6
starpupy/tests/Makefile

@@ -1,6 +0,0 @@
-PYTHON ?= python3
-
-all:
-	PYTHONPATH=../src $(PYTHON) starpu_py.py
-	PYTHONPATH=../src STARPU_CALIBRATE=1 $(PYTHON) starpu_py_parallel.py
-

+ 0 - 101
starpupy/tests/starpu_py_parallel.py

@@ -1,101 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-import starpu
-import time
-import asyncio
-from math import sqrt
-from math import log10
-
-#generate a list to store functions
-g_func=[]
-
-#function no input no output print hello world
-def hello():
-	print ("Example 1: Hello, world!")
-g_func.append(starpu.joblib.delayed(hello)())
-
-#function no input no output
-def func1():
-	print ("Example 2: This is a function no input no output")
-g_func.append(starpu.joblib.delayed(func1)())
-
-#function no input return a value
-def func2():
-	print ("Example 3:")
-	return 12
-g_func.append(starpu.joblib.delayed(func2)())
- 
-#function has 2 int inputs and 1 int output
-def multi(a,b):
-	res_multi=a*b
-	print("Example 4: The result of ",a,"*",b,"is",res_multi)
-	return res_multi
-g_func.append(starpu.joblib.delayed(multi)(2, 3))
-
-#function has 4 float inputs and 1 float output
-def add(a,b,c,d):
-	res_add=a+b+c+d
-	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
-	return res_add
-g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
-
-#function has 2 int inputs 1 float input and 1 float output 1 int output
-def sub(a,b,c):
-	res_sub1=a-b-c
-	res_sub2=a-b
-	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
-	return res_sub1, res_sub2
-g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
-
-#the size of generator
-N=1000000
-
-print("************************")
-print("parallel Normal version:")
-print("************************")
-print("--input is iterable argument list, example 1")
-starpu.joblib.parallel(mode="normal", n_jobs=-2, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
-
-print("--input is iterable argument list, example 2")
-starpu.joblib.parallel(mode="normal", n_jobs=2, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
-
-print("--input is iterable function list")
-starpu.joblib.parallel(mode="normal", n_jobs=3, perfmodel="third")(g_func)
-
-
-print("************************")
-print("parallel Future version:")
-print("************************")
-async def main():
-	print("--input is iterable argument list, example 1")
-	fut1=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
-	res1=await fut1
-	#print(res1)
-
-	print("--input is iterable argument list, example 2")
-	fut2=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
-	res2=await fut2
-	#print(res2)
-
-	print("--input is iterable function list")
-	fut3=starpu.joblib.parallel(mode="future", n_jobs=2, perfmodel="third")(g_func)
-	res3=await fut3
-	#print(res3)
-asyncio.run(main())
-
-starpu.joblib.perfmodel_plot(perfmodel="first")
-starpu.joblib.perfmodel_plot(perfmodel="second")
-starpu.joblib.perfmodel_plot(perfmodel="third")

+ 2 - 2
tests/Makefile.am

@@ -17,8 +17,8 @@ include $(top_srcdir)/starpu.mk
 
 
 AM_CFLAGS += -Wno-unused
 AM_CFLAGS += -Wno-unused
 AM_CXXFLAGS += -Wno-unused
 AM_CXXFLAGS += -Wno-unused
-AM_FFLAGS += -Wno-unused
-AM_FCFLAGS += -Wno-unused
+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 2 - 1
tests/datawizard/bcsr.c

@@ -123,7 +123,8 @@ int main(int argc, char **argv)
 	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV)
 	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV)
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 
 
-	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) {
+	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1)
+	{
 		starpu_shutdown();
 		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}

+ 1 - 2
tests/datawizard/interfaces/test_interfaces.c

@@ -16,8 +16,7 @@
 
 
 #include <starpu.h>
 #include <starpu.h>
 
 
-/* XXX Why cant we dereference a handle without this one ? */
-#include <core/sched_policy.h>
+#include <datawizard/coherency.h>
 
 
 #include <assert.h>
 #include <assert.h>
 
 

+ 1 - 0
tests/main/starpu_worker_exists.c

@@ -14,6 +14,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
+#define BUILDING_STARPU
 #include <starpu.h>
 #include <starpu.h>
 #include "core/workers.h"
 #include "core/workers.h"
 #include "../helper.h"
 #include "../helper.h"

+ 2 - 1
tests/microbenchs/bandwidth.c

@@ -170,7 +170,8 @@ static unsigned interleave(unsigned i)
 		return 0;
 		return 0;
 }
 }
 
 
-enum sleep_type {
+enum sleep_type
+{
 	PAUSE,
 	PAUSE,
 	NOP,
 	NOP,
 	SYNC,
 	SYNC,

+ 2 - 0
tests/microbenchs/tasks_size_overhead.c

@@ -228,6 +228,8 @@ int main(int argc, char **argv)
 		goto error;
 		goto error;
 	}
 	}
 
 
+	if (mincpus <= 0)
+		mincpus = 1;
 	/* For each number of cpus, benchmark */
 	/* For each number of cpus, benchmark */
 	for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep)
 	for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep)
 	{
 	{

+ 8 - 1
tests/perfmodels/regression_based_memset.c

@@ -213,7 +213,7 @@ static int bench_energy(int workerid, int where, enum starpu_worker_archtype arc
 		if ( (retval = starpu_energy_start(workerid, archtype)) != 0)
 		if ( (retval = starpu_energy_start(workerid, archtype)) != 0)
 		{
 		{
 			starpu_data_unregister(handle);
 			starpu_data_unregister(handle);
-			_STARPU_DISP("Energy measurement not supported for archtype %d\n", archtype);
+			_STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype));
 			return -1;
 			return -1;
 		}
 		}
 
 
@@ -328,6 +328,9 @@ int main(int argc, char **argv)
 	starpu_conf_init(&conf);
 	starpu_conf_init(&conf);
 
 
 	/* Use a scheduler which doesn't choose the implementation */
 	/* Use a scheduler which doesn't choose the implementation */
+#ifdef STARPU_HAVE_UNSETENV
+	unsetenv("STARPU_SCHED");
+#endif
 	conf.sched_policy_name = "eager";
 	conf.sched_policy_name = "eager";
 	conf.calibrate = 1;
 	conf.calibrate = 1;
 
 
@@ -345,15 +348,19 @@ int main(int argc, char **argv)
 	{
 	{
 		memset_cl.cpu_funcs[1] = NULL;
 		memset_cl.cpu_funcs[1] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl);
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl);
+#ifdef STARPU_HAVE_UNSETENV
 		memset_cl.cpu_funcs[1] = memset_cpu;
 		memset_cl.cpu_funcs[1] = memset_cpu;
 		memset_cl.cpu_funcs[0] = NULL;
 		memset_cl.cpu_funcs[0] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl);
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl);
+#endif
 
 
 		nl_memset_cl.cpu_funcs[1] = NULL;
 		nl_memset_cl.cpu_funcs[1] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl);
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl);
+#ifdef STARPU_HAVE_UNSETENV
 		nl_memset_cl.cpu_funcs[1] = memset_cpu;
 		nl_memset_cl.cpu_funcs[1] = memset_cpu;
 		nl_memset_cl.cpu_funcs[0] = NULL;
 		nl_memset_cl.cpu_funcs[0] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl);
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl);
+#endif
 	}
 	}
 
 
 	for (i = 0; i < starpu_cuda_worker_get_count(); i++)
 	for (i = 0; i < starpu_cuda_worker_get_count(); i++)

+ 0 - 0
tools/dev/checker/starpu_check_copyright.sh


Some files were not shown because too many files changed in this diff