hace 4 años · 0465129baa
--- a/Makefile.am
+++ b/Makefile.am
@@ -53,6 +53,10 @@ if STARPU_BUILD_STARPURM
 
				 SUBDIRS += starpurm
			
 
				 endif
			
 
				 
			
 
				+if STARPU_BUILD_STARPUPY
			
 
				+SUBDIRS += starpupy
			
 
				+endif
			
 
				+
			
 
				 if STARPU_BUILD_SC_HYPERVISOR
			
 
				 SUBDIRS += sc_hypervisor
			
 
				 endif
			
--- a/configure.ac
+++ b/configure.ac
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
@@ -34,7 +34,7 @@ pipeline
 
				 		{
			
 
				 			steps
			
 
				 			{
			
 
				-				node('autotools')
			
 
				+				node('autotools2')
			
 
				 				{
			
 
				 					checkout scm
			
 
				 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
			
@@ -62,7 +62,7 @@ pipeline
 
				 			{
			
 
				 				script
			
 
				 				{
			
 
				-					labelToSelect = 'unix'
			
 
				+					labelToSelect = 'unix2'
			
 
				 					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
			
 
				 					{
			
 
				 						node -> node.getLabelString().contains(labelToSelect) ? node.name : null
			
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-windows
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-windows
@@ -34,7 +34,7 @@ pipeline
 
				 		{
			
 
				 			steps
			
 
				 			{
			
 
				-				node('autotools')
			
 
				+				node('autotools2')
			
 
				 				{
			
 
				 					checkout scm
			
 
				 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
			
--- a/contrib/ci.inria.fr/job-1-check-windows.bat
+++ b/contrib/ci.inria.fr/job-1-check-windows.bat
@@ -14,9 +14,9 @@ REM
 
				 REM See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 REM
			
 
				 
			
 
				-set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin
			
 
				+set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
			
 
				 sh -c "./job-1-build-windows.sh"
			
 
				-set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;
			
 
				+set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
			
 
				 set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0
			
 
				 
			
 
				 cd starpu_install
			
--- a/contrib/ci.inria.fr/job-1-check.sh
+++ b/contrib/ci.inria.fr/job-1-check.sh
@@ -41,6 +41,7 @@ env > $PWD/env
 
				 
			
 
				 test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
			
 
				 tar xfz ../$tarball
			
 
				+touch --date="last hour" $(find $basename)
			
 
				 cd $basename
			
 
				 mkdir build
			
 
				 cd build
			
--- a/doc/doxygen/chapters/400_python.doxy
+++ b/doc/doxygen/chapters/400_python.doxy
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -23,7 +23,7 @@
 
				 
			
 
				 StarPU can use Simgrid in order to simulate execution on an arbitrary
			
 
				 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
			
 
				-3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
			
 
				+3.26. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
			
 
				 Other versions may have compatibility issues. 3.17 notably does not build at
			
 
				 all. MPI simulation does not work with version 3.22.
			
 
				 
			
--- a/doc/doxygen/chapters/images/starpu_log.png
+++ b/doc/doxygen/chapters/images/starpu_log.png
--- a/doc/doxygen/chapters/images/starpu_log_arr.eps
+++ b/doc/doxygen/chapters/images/starpu_log_arr.eps
--- a/doc/doxygen/chapters/images/starpu_log_arr.png
+++ b/doc/doxygen/chapters/images/starpu_log_arr.png
--- a/doc/doxygen/chapters/images/starpu_log.eps
+++ b/doc/doxygen/chapters/images/starpu_log.eps
--- a/doc/doxygen/chapters/images/starpu_log_list.png
+++ b/doc/doxygen/chapters/images/starpu_log_list.png
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -138,7 +138,7 @@ Documentation License”.
 
				 
			
 
				 \part{StarPU Extensions}
			
 
				 
			
 
				-\chapter{PythonInterface}
			
 
				+\chapter{Python Interface}
			
 
				 \label{PythonInterface}
			
 
				 \hypertarget{PythonInterface}{}
			
 
				 \input{PythonInterface}
			
--- a/doc/doxygen_dev/refman.tex
+++ b/doc/doxygen_dev/refman.tex
@@ -148,7 +148,6 @@ Documentation License”.
 
				 \input{starpu__data__cpy_8h}
			
 
				 \input{starpu__debug__helpers_8h}
			
 
				 \input{starpu__fxt_8h}
			
 
				-\input{starpu__parameters_8h}
			
 
				 \input{starpu__spinlock_8h}
			
 
				 \input{starpu__task__insert__utils_8h}
			
 
				 \input{tags_8h}
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,8 +20,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				-AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				-AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
			
 
				+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
			
--- a/examples/cpp/add_vectors_interface.cpp
+++ b/examples/cpp/add_vectors_interface.cpp
@@ -61,9 +61,9 @@ class my_allocator
 
				 		node = a.get_node();
			
 
				 	}
			
 
				 
			
 
				-	explicit my_allocator(const unsigned node)
			
 
				+	explicit my_allocator(const unsigned thenode)
			
 
				 	{
			
 
				-		this->node = node;
			
 
				+		this->node = thenode;
			
 
				 	}
			
 
				 
			
 
				 	pointer allocate(size_type n, const void * = 0)
			
--- a/examples/tag_example/tag_example.c
+++ b/examples/tag_example/tag_example.c
@@ -223,7 +223,8 @@ int main(int argc, char **argv)
 
				 	int ret;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HELGRIND_H
			
 
				-	if (RUNNING_ON_VALGRIND) {
			
 
				+	if (RUNNING_ON_VALGRIND)
			
 
				+	{
			
 
				 		ni /= 2;
			
 
				 		nj /= 2;
			
 
				 		nk /= 2;
			
--- a/include/fstarpu_mod.f90
+++ b/include/fstarpu_mod.f90
@@ -1054,7 +1054,7 @@ module fstarpu_mod
 
				                 end subroutine fstarpu_vector_data_register
			
 
				 
			
 
				                 ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
			
 
				-                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
			
 
				+                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) &
			
 
				                                 bind(C,name="starpu_vector_ptr_register")
			
 
				                         use iso_c_binding, only: c_ptr, c_int, c_size_t
			
 
				                         type(c_ptr), intent(out) :: dh
			
@@ -1092,7 +1092,7 @@ module fstarpu_mod
 
				                 end subroutine fstarpu_variable_data_register
			
 
				 
			
 
				                 ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
			
 
				-                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
			
 
				+                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) &
			
 
				                                 bind(C,name="starpu_variable_ptr_register")
			
 
				                         use iso_c_binding, only: c_ptr, c_int, c_size_t
			
 
				                         type(c_ptr), intent(out) :: dh
			
@@ -1758,7 +1758,7 @@ module fstarpu_mod
 
				                 end function fstarpu_data_descr_array_alloc
			
 
				 
			
 
				                 ! struct starpu_data_descr *fstarpu_data_descr_alloc(void);
			
 
				-                function fstarpu_data_descr_alloc (nb) bind(C)
			
 
				+                function fstarpu_data_descr_alloc () bind(C)
			
 
				                         use iso_c_binding, only: c_ptr
			
 
				                         type(c_ptr) :: fstarpu_data_descr_alloc
			
 
				                 end function fstarpu_data_descr_alloc
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -331,4 +331,6 @@ typedef ssize_t starpu_ssize_t;
 
				 #undef STARPU_HAVE_STATEMENT_EXPRESSIONS
			
 
				 #undef STARPU_PERF_MODEL_DIR
			
 
				 
			
 
				+#undef STARPU_PYTHON_HAVE_NUMPY
			
 
				+
			
 
				 #endif
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -294,9 +294,8 @@ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_t
 
				 /**
			
 
				    The scheduling policy may put tasks directly into a worker’s local
			
 
				    queue so that it is not always necessary to create its own queue
			
 
				-   when the local queue is sufficient. If \p back is not 0, \p task is
			
 
				-   put at the back of the queue where the worker will pop tasks first.
			
 
				-   Setting \p back to 0 therefore ensures a FIFO ordering.
			
 
				+   when the local queue is sufficient. \p back is ignored: the task priority is
			
 
				+   used to order tasks in this queue.
			
 
				 */
			
 
				 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
			
 
				 
			
--- a/libstarpu-mic.pc.in
+++ b/libstarpu-mic.pc.in
@@ -22,6 +22,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/libstarpu.pc.in
+++ b/libstarpu.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/m4/libs.m4
+++ b/m4/libs.m4
@@ -202,3 +202,15 @@ AC_DEFUN([IS_SUPPORTED_FLAG],
 
				 	IS_SUPPORTED_FFLAG($1)
			
 
				 	IS_SUPPORTED_FCFLAG($1)
			
 
				 ])
			
 
				+
			
 
				+# AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found])
			
 
				+# Check if the given python module is available
			
 
				+AC_DEFUN([AC_PYTHON_MODULE],
			
 
				+[
			
 
				+	echo "import $1" | $PYTHON - 2>/dev/null
			
 
				+	if test $? -ne 0 ; then
			
 
				+	   	$3
			
 
				+	else
			
 
				+		$2
			
 
				+	fi
			
 
				+])
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -108,6 +108,9 @@ endif
 
				 endif
			
 
				 
			
 
				 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
			
 
				+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS)
			
--- a/mpi/examples/native_fortran/nf_mm_task_build.f90
+++ b/mpi/examples/native_fortran/nf_mm_task_build.f90
@@ -169,7 +169,7 @@ program nf_mm
 
				         do b_col=1,NB
			
 
				            do b_row=1,NB
			
 
				               task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
			
 
				-                   				FSTARPU_R,  dh_A(b_row), &
			
 
				+                                                FSTARPU_R,  dh_A(b_row), &
			
 
				                                                 FSTARPU_R,  dh_B(b_col), &
			
 
				                                                 FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				                                                 C_NULL_PTR /))
			
@@ -177,7 +177,7 @@ program nf_mm
 
				                  ret = fstarpu_task_submit(task)
			
 
				               endif
			
 
				               call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
			
 
				-                   				FSTARPU_R,  dh_A(b_row), &
			
 
				+                                                FSTARPU_R,  dh_A(b_row), &
			
 
				                                                 FSTARPU_R,  dh_B(b_col), &
			
 
				                                                 FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				                                                 C_NULL_PTR /))
			
--- a/mpi/src/mpi/starpu_mpi_mpi.c
+++ b/mpi/src/mpi/starpu_mpi_mpi.c
@@ -41,7 +41,6 @@
 
				 #include <core/simgrid.h>
			
 
				 #include <core/task.h>
			
 
				 #include <core/topology.h>
			
 
				-#include <core/workers.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_MPI_MPI
			
 
				 
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -33,7 +33,6 @@
 
				 #include <core/simgrid.h>
			
 
				 #include <core/task.h>
			
 
				 #include <core/topology.h>
			
 
				-#include <core/workers.h>
			
 
				 
			
 
				 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
			
 
				 {
			
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -84,6 +84,9 @@ endif
 
				 endif
			
 
				 
			
 
				 AM_CFLAGS += -Wno-unused
			
 
				+AM_CXXFLAGS += -Wno-unused
			
 
				+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
			
 
				+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(STARPU_CUDA_LDFLAGS)
			
--- a/socl/src/init.c
+++ b/socl/src/init.c
@@ -16,7 +16,7 @@
 
				  */
			
 
				 
			
 
				 #include <stdlib.h>
			
 
				-#include "../src/core/workers.h"
			
 
				+#include "../src/common/utils.h"
			
 
				 #include "socl.h"
			
 
				 #include "gc.h"
			
 
				 #include "mem_objects.h"
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,6 +1,6 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				 # Copyright (C) 2013       Simon Archipoff
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -21,6 +21,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU -DSTARPU_DATADIR='"$(da
 
				 AM_CPPFLAGS += $(STARPU_H_CPPFLAGS)
			
 
				 AM_CPPFLAGS += $(FXT_CFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(STARPU_RCCE_CPPFLAGS)
			
 
				 LIBS += -lm $(LIBSTARPU_LDFLAGS)
			
 
				+if STARPU_USE_MPI_MASTER_SLAVE
			
 
				+LIBS += $(MPICC_LDFLAGS)
			
 
				+endif
			
 
				 
			
 
				 SUBDIRS =
			
 
				 
			
@@ -60,8 +63,7 @@ endif STARPU_HAVE_WINDOWS
 
				 
			
 
				 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 
			
 
				-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined									\
			
 
				-  -version-info $(libstarpu_so_version)
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpu_so_version)
			
 
				 
			
 
				 if STARPU_HAVE_DARWIN
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS += \
			
--- a/src/common/rbtree_i.h
+++ b/src/common/rbtree_i.h
@@ -44,7 +44,8 @@
 
				  * architectures, as long as the nodes aren't embedded in structures with
			
 
				  * special alignment constraints such as member packing.
			
 
				  */
			
 
				-struct starpu_rbtree_node {
			
 
				+struct starpu_rbtree_node
			
 
				+{
			
 
				     uintptr_t parent;
			
 
				     struct starpu_rbtree_node *children[2];
			
 
				 };
			
@@ -52,7 +53,8 @@ struct starpu_rbtree_node {
 
				 /**
			
 
				  * Red-black tree structure.
			
 
				  */
			
 
				-struct starpu_rbtree {
			
 
				+struct starpu_rbtree
			
 
				+{
			
 
				     struct starpu_rbtree_node *root;
			
 
				 };
			
 
				 
			
--- a/src/common/thread.c
+++ b/src/common/thread.c
@@ -96,14 +96,22 @@ int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const s
 
				 	if (attr && attr->stacksize)
			
 
				 		sg_actor_set_stacksize(*thread, attr->stacksize);
			
 
				 #endif
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+	sg_actor_set_data(*thread, tsd);
			
 
				+#else
			
 
				 	sg_actor_data_set(*thread, tsd);
			
 
				+#endif
			
 
				 	sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args);
			
 
				 #else
			
 
				 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
			
 
				 #ifdef HAVE_SG_ACTOR_DATA
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+	sg_actor_set_data(*thread, tsd);
			
 
				+#else
			
 
				 	sg_actor_data_set(*thread, tsd);
			
 
				 #endif
			
 
				 #endif
			
 
				+#endif
			
 
				 #ifndef HAVE_SG_ACTOR_SET_STACKSIZE
			
 
				 	if (attr && attr->stacksize)
			
 
				 		_starpu_simgrid_set_stack_size(_starpu_default_stack_size);
			
@@ -328,7 +336,9 @@ extern void *smpi_process_get_user_data();
 
				 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
			
 
				 {
			
 
				 	void **array;
			
 
				-#ifdef HAVE_SG_ACTOR_DATA
			
 
				+#ifdef HAVE_SG_ACTOR_GET_DATA
			
 
				+	array = sg_actor_get_data(sg_actor_self());
			
 
				+#elif defined(HAVE_SG_ACTOR_DATA)
			
 
				 	array = sg_actor_data(sg_actor_self());
			
 
				 #else
			
 
				 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
			
@@ -355,7 +365,9 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 
				 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
			
 
				 {
			
 
				 	void **array;
			
 
				-#ifdef HAVE_SG_ACTOR_DATA
			
 
				+#ifdef HAVE_SG_ACTOR_GET_DATA
			
 
				+	array = sg_actor_get_data(sg_actor_self());
			
 
				+#elif defined(HAVE_SG_ACTOR_DATA)
			
 
				 	array = sg_actor_data(sg_actor_self());
			
 
				 #else
			
 
				 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
			
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -183,4 +183,6 @@ int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
				 
			
 
				 void _starpu_util_init(void);
			
 
				 
			
 
				+enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
			
 
				+
			
 
				 #endif // __COMMON_UTILS_H__
			
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -221,7 +221,8 @@ void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg
 
				 					tag_successors->ndeps_completed = 0;
			
 
				 					/* This releases the lock */
			
 
				 					_starpu_tag_set_ready(tag);
			
 
				-				} else
			
 
				+				}
			
 
				+				else
			
 
				 					_starpu_spin_unlock(&tag->lock);
			
 
				 				break;
			
 
				 			}
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -347,19 +347,10 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 				_starpu_spin_unlock(&handle->header_lock);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				 	/* Check nowhere before releasing the sequential consistency (which may
			
 
				 	 * unregister the handle and free its switch_cl, and thus task->cl here.  */
			
 
				 	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
			
 
				-	/* If this is a continuation, we do not release task dependencies now.
			
 
				-	 * Task dependencies will be released only when the continued task
			
 
				-	 * fully completes */
			
 
				-	if (!continuation)
			
 
				-	{
			
 
				-		/* Tell other tasks that we don't exist any more, thus no need for
			
 
				-		 * implicit dependencies any more.  */
			
 
				-		_starpu_release_task_enforce_sequential_consistency(j);
			
 
				-	}
			
 
				-
			
 
				 	/* If the job was executed on a combined worker there is no need for the
			
 
				 	 * scheduler to process it : the task structure doesn't contain any valuable
			
 
				 	 * data as it's not linked to an actual worker */
			
@@ -395,6 +386,16 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	if (!callback && task->cl)
			
 
				 		callback = task->cl->callback_func;
			
 
				 
			
 
				+	/* If this is a continuation, we do not release task dependencies now.
			
 
				+	 * Task dependencies will be released only when the continued task
			
 
				+	 * fully completes */
			
 
				+	if (!continuation)
			
 
				+	{
			
 
				+		/* Tell other tasks that we don't exist any more, thus no need for
			
 
				+		 * implicit dependencies any more.  */
			
 
				+		_starpu_release_task_enforce_sequential_consistency(j);
			
 
				+	}
			
 
				+
			
 
				 	/* Task does not have a cl, but has explicit data dependencies, we need
			
 
				 	 * to tell them that we will not exist any more before notifying the
			
 
				 	 * tasks waiting for us
			
@@ -764,14 +765,14 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (!starpu_task_list_empty(&worker->local_tasks))
			
 
				-		task = starpu_task_list_pop_front(&worker->local_tasks);
			
 
				+	if (!starpu_task_prio_list_empty(&worker->local_tasks))
			
 
				+		task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks);
			
 
				 
			
 
				 	_starpu_pop_task_end(task);
			
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio)
			
 
				+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task)
			
 
				 {
			
 
				 	/* Check that the worker is able to execute the task ! */
			
 
				 	STARPU_ASSERT(task && task->cl);
			
@@ -814,13 +815,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning FIXME use a prio_list
			
 
				-#endif
			
 
				-		if (prio)
			
 
				-			starpu_task_list_push_front(&worker->local_tasks, task);
			
 
				-		else
			
 
				-			starpu_task_list_push_back(&worker->local_tasks, task);
			
 
				+		starpu_task_prio_list_push_back(&worker->local_tasks, task);
			
 
				 	}
			
 
				 
			
 
				 	starpu_wake_worker_locked(worker->workerid);
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -269,10 +269,8 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 
				 struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
			
 
				 
			
 
				 /** Put a task into the pool of tasks that are explicitly attributed to the
			
 
				- * specified worker. If "back" is set, the task is put at the back of the list.
			
 
				- * Considering the tasks are popped from the back, this value should be 0 to
			
 
				- * enforce a FIFO ordering. */
			
 
				-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio);
			
 
				+ * specified worker. */
			
 
				+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task);
			
 
				 
			
 
				 #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index)
			
 
				 #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
			
--- a/src/core/perfmodel/energy_model.c
+++ b/src/core/perfmodel/energy_model.c
@@ -56,8 +56,11 @@ static const int N_EVTS = 2;
 
				 
			
 
				 static int nsockets;
			
 
				 
			
 
				-static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
			
 
				-				     "rapl::RAPL_ENERGY_DRAM:cpu=%d"};
			
 
				+static const char* event_names[] =
			
 
				+{
			
 
				+	"rapl::RAPL_ENERGY_PKG:cpu=%d",
			
 
				+	"rapl::RAPL_ENERGY_DRAM:cpu=%d"
			
 
				+};
			
 
				 
			
 
				 static int add_event(int EventSet, int socket);
			
 
				 
			
@@ -66,9 +69,6 @@ static int add_event(int EventSet, int socket);
 
				 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
			
 
				 static int EventSet = PAPI_NULL;
			
 
				 
			
 
				-/*This is where we store the values we read from the eventset */
			
 
				-static long long *values;
			
 
				-
			
 
				 #endif
			
 
				 
			
 
				 static double t1;
			
@@ -99,9 +99,6 @@ int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
 
				 
			
 
				 		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
			
 
				 
			
 
				-		values=calloc(nsockets * N_EVTS,sizeof(long long));
			
 
				-		STARPU_ASSERT(values);
			
 
				-
			
 
				 		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
			
 
				 			ERROR_RETURN(retval);
			
 
				 
			
@@ -178,6 +175,9 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
			
 
				 
			
 
				+		/*This is where we store the values we read from the eventset */
			
 
				+		long long values[nsockets*N_EVTS];
			
 
				+
			
 
				 		/* Stop counting and store the values into the array */
			
 
				 		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
			
 
				 			ERROR_RETURN(retval);
			
@@ -196,9 +196,6 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
				 				      delta, t, delta/(t*1.0E-6));
			
 
				 			}
			
 
				 		}
			
 
				-		free(values);
			
 
				-
			
 
				-		energy = energy * 0.23 / 1.0e9 / ntasks;
			
 
				 
			
 
				 		/*removes all events from a PAPI event set */
			
 
				 		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
			
@@ -242,7 +239,7 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
				 
			
 
				 	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
			
 
				 
			
 
				-	starpu_perfmodel_update_history(model, task, arch, cpuid, nimpl, energy);
			
 
				+	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
			
 
				 
			
 
				 	return retval;
			
 
				 }
			
@@ -266,6 +263,12 @@ static int add_event(int eventSet, int socket)
 
				 		retval = PAPI_add_named_event(eventSet, buf);
			
 
				 		if (retval != PAPI_OK)
			
 
				 		{
			
 
				+			if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
			
 
				+			{
			
 
				+				/* Ok, too bad */
			
 
				+				_STARPU_DISP("Note: DRAM energy measurement not available\n");
			
 
				+				return PAPI_OK;
			
 
				+			}
			
 
				 			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
			
 
				 			return retval;
			
 
				 		}
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1243,7 +1243,8 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				 
			
 
				-	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				+	{
			
 
				 		char name[128];
			
 
				 		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				 		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
@@ -1518,8 +1519,8 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
			
 
				-
			
 
				+int starpu_perfmodel_deinit(struct starpu_perfmodel *model)
			
 
				+{
			
 
				 	_starpu_deinitialize_performance_model(model);
			
 
				 	free(model->state);
			
 
				 	model->state = NULL;
			
--- a/src/core/perfmodel/perfmodel_print.c
+++ b/src/core/perfmodel/perfmodel_print.c
@@ -30,7 +30,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 
				 	ptr = per_arch_model->list;
			
 
				 
			
 
				 	if (!parameter && ptr)
			
 
				-		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\tstddev (us)\t\tn\n");
			
 
				+		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n");
			
 
				 
			
 
				 	while (ptr)
			
 
				 	{
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -372,10 +372,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 		}
			
 
				 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
			
 
				 
			
 
				-		if(task->priority > 0)
			
 
				-			return _starpu_push_local_task(worker, task, 1);
			
 
				-		else
			
 
				-			return _starpu_push_local_task(worker, task, 0);
			
 
				+		return _starpu_push_local_task(worker, task);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -406,7 +403,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 
			
 
				 			_STARPU_TRACE_JOB_PUSH(alias, alias->priority);
			
 
				 			worker = _starpu_get_worker_struct(combined_workerid[j]);
			
 
				-			ret |= _starpu_push_local_task(worker, alias, 0);
			
 
				+			ret |= _starpu_push_local_task(worker, alias);
			
 
				 		}
			
 
				 
			
 
				 		return ret;
			
@@ -632,7 +629,8 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 				enum starpu_worker_archtype type;
			
 
				 				for (type = 0; type < STARPU_NARCH; type++)
			
 
				 				{
			
 
				-					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
			
 
				+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type))
			
 
				+					{
			
 
				 						if (config->arch_nodeid[type] >= 0)
			
 
				 							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
			
 
				 						break;
			
@@ -1032,7 +1030,7 @@ pick:
 
				 	}
			
 
				 
			
 
				 	task->mf_skip = 1;
			
 
				-	starpu_task_list_push_back(&worker->local_tasks, task);
			
 
				+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
			
 
				 	goto pick;
			
 
				 
			
 
				 profiling:
			
@@ -1174,16 +1172,11 @@ void _starpu_wait_on_sched_event(void)
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
			
 
				 }
			
 
				 
			
 
				-/* The scheduling policy may put tasks directly into a worker's local queue so
			
 
				- * that it is not always necessary to create its own queue when the local queue
			
 
				- * is sufficient. If "back" not null, the task is put at the back of the queue
			
 
				- * where the worker will pop tasks first. Setting "back" to 0 therefore ensures
			
 
				- * a FIFO ordering. */
			
 
				-int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
			
 
				+int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
			
 
				 
			
 
				-	return  _starpu_push_local_task(worker, task, prio);
			
 
				+	return  _starpu_push_local_task(worker, task);
			
 
				 }
			
 
				 
			
 
				 void _starpu_print_idle_time()
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -357,11 +357,16 @@ void _starpu_start_simgrid(int *argc, char **argv)
 
				 	int limit_bandwidth = starpu_get_env_number("STARPU_LIMIT_BANDWIDTH");
			
 
				 	if (limit_bandwidth >= 0)
			
 
				 	{
			
 
				-#ifdef HAVE_SG_LINK_BANDWIDTH_SET
			
 
				+#if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH)
			
 
				 		sg_link_t *links = sg_link_list();
			
 
				 		int count = sg_link_count(), i;
			
 
				-		for (i = 0; i < count; i++) {
			
 
				+		for (i = 0; i < count; i++)
			
 
				+		{
			
 
				+#ifdef HAVE_SG_LINK_SET_BANDWIDTH
			
 
				+			sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.);
			
 
				+#else
			
 
				 			sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.);
			
 
				+#endif
			
 
				 		}
			
 
				 #else
			
 
				 		_STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth);
			
@@ -492,7 +497,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
				 
			
 
				 #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
			
 
				 		sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+		sg_actor_set_data(actor, tsd);
			
 
				+#else
			
 
				 		sg_actor_data_set(actor, tsd);
			
 
				+#endif
			
 
				 #else
			
 
				 		MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
			
 
				 #endif
			
@@ -519,7 +528,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
				 		void **tsd;
			
 
				 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
			
 
				 #ifdef HAVE_SG_ACTOR_DATA
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+		sg_actor_set_data(sg_actor_self(), tsd);
			
 
				+#else
			
 
				 		sg_actor_data_set(sg_actor_self(), tsd);
			
 
				+#endif
			
 
				 #else
			
 
				 		smpi_process_set_user_data(tsd);
			
 
				 #endif
			
@@ -735,6 +748,9 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 
				 		 * to be able to easily check scheduling robustness */
			
 
				 	}
			
 
				 
			
 
				+#ifdef HAVE_SG_HOST_GET_SPEED
			
 
				+	flops = length/1000000.0*sg_host_get_speed(sg_host_self());
			
 
				+#else
			
 
				 #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
			
 
				 #  if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
			
 
				 	flops = length/1000000.0*sg_host_speed(sg_host_self());
			
@@ -746,6 +762,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 
				 #else
			
 
				 	flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
			
 
				 #endif
			
 
				+#endif
			
 
				 
			
 
				 #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
			
 
				 	simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
			
@@ -1210,14 +1227,22 @@ starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t
 
				 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
			
 
				 #ifdef HAVE_SG_ACTOR_INIT
			
 
				 	actor = sg_actor_init(name, host);
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+	sg_actor_set_data(actor, tsd);
			
 
				+#else
			
 
				 	sg_actor_data_set(actor, tsd);
			
 
				+#endif
			
 
				 	sg_actor_start(actor, code, argc, argv);
			
 
				 #else
			
 
				 	actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
			
 
				 #ifdef HAVE_SG_ACTOR_DATA
			
 
				+#ifdef HAVE_SG_ACTOR_SET_DATA
			
 
				+	sg_actor_set_data(actor, tsd);
			
 
				+#else
			
 
				 	sg_actor_data_set(actor, tsd);
			
 
				 #endif
			
 
				 #endif
			
 
				+#endif
			
 
				 	return actor;
			
 
				 }
			
 
				 
			
@@ -1251,7 +1276,7 @@ starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node)
 
				 
			
 
				 void _starpu_simgrid_count_ngpus(void)
			
 
				 {
			
 
				-#if (defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
			
 
				+#if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
			
 
				 	unsigned src, dst;
			
 
				 	starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
			
 
				 
			
@@ -1261,7 +1286,7 @@ void _starpu_simgrid_count_ngpus(void)
 
				 		{
			
 
				 			int busid;
			
 
				 			starpu_sg_host_t srchost, dsthost;
			
 
				-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
			
 
				 			SD_link_t *route;
			
 
				 #else
			
@@ -1281,8 +1306,12 @@ void _starpu_simgrid_count_ngpus(void)
 
				 
			
 
				 			srchost = _starpu_simgrid_get_memnode_host(src);
			
 
				 			dsthost = _starpu_simgrid_get_memnode_host(dst);
			
 
				-#if defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
			
 
				+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
			
 
				+#ifdef HAVE_SG_HOST_GET_ROUTE
			
 
				+			sg_host_get_route(srchost, dsthost, route_dynar);
			
 
				+#else
			
 
				 			sg_host_route(srchost, dsthost, route_dynar);
			
 
				+#endif
			
 
				 			routesize = xbt_dynar_length(route_dynar);
			
 
				 			route = xbt_dynar_to_array(route_dynar);
			
 
				 #else
			
@@ -1293,7 +1322,13 @@ void _starpu_simgrid_count_ngpus(void)
 
				 			/* If it goes through "Host", do not care, there is no
			
 
				 			 * direct transfer support */
			
 
				 			for (i = 0; i < routesize; i++)
			
 
				-				if (!strcmp(sg_link_name(route[i]), "Host"))
			
 
				+				if (
			
 
				+#ifdef HAVE_SG_LINK_GET_NAME
			
 
				+					!strcmp(sg_link_get_name(route[i]), "Host")
			
 
				+#else
			
 
				+					!strcmp(sg_link_name(route[i]), "Host")
			
 
				+#endif
			
 
				+					)
			
 
				 					break;
			
 
				 			if (i < routesize)
			
 
				 				continue;
			
@@ -1302,7 +1337,11 @@ void _starpu_simgrid_count_ngpus(void)
 
				 			through = -1;
			
 
				 			for (i = 0; i < routesize; i++)
			
 
				 			{
			
 
				+#ifdef HAVE_SG_LINK_GET_NAME
			
 
				+				name = sg_link_get_name(route[i]);
			
 
				+#else
			
 
				 				name = sg_link_name(route[i]);
			
 
				+#endif
			
 
				 				size_t len = strlen(name);
			
 
				 				if (!strcmp(" through", name+len-8))
			
 
				 					through = i;
			
@@ -1315,7 +1354,11 @@ void _starpu_simgrid_count_ngpus(void)
 
				 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
			
 
				 				continue;
			
 
				 			}
			
 
				+#ifdef HAVE_SG_LINK_GET_NAME
			
 
				+			name = sg_link_get_name(route[through]);
			
 
				+#else
			
 
				 			name = sg_link_name(route[through]);
			
 
				+#endif
			
 
				 
			
 
				 			/*
			
 
				 			 * count how many direct routes go through it between
			
@@ -1339,10 +1382,14 @@ void _starpu_simgrid_count_ngpus(void)
 
				 
			
 
				 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
			
 
				 				int routesize2;
			
 
				-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
			
 
				 				SD_link_t *route2;
			
 
				+#ifdef HAVE_SG_HOST_GET_ROUTE
			
 
				+				sg_host_get_route(srchost2, ramhost, route_dynar2);
			
 
				+#else
			
 
				 				sg_host_route(srchost2, ramhost, route_dynar2);
			
 
				+#endif
			
 
				 				routesize2 = xbt_dynar_length(route_dynar2);
			
 
				 				route2 = xbt_dynar_to_array(route_dynar2);
			
 
				 #else
			
@@ -1351,19 +1398,25 @@ void _starpu_simgrid_count_ngpus(void)
 
				 #endif
			
 
				 
			
 
				 				for (i = 0; i < routesize2; i++)
			
 
				-					if (!strcmp(name, sg_link_name(route2[i])))
			
 
				+					if (
			
 
				+#ifdef HAVE_SG_LINK_GET_NAME
			
 
				+						!strcmp(name, sg_link_get_name(route2[i]))
			
 
				+#else
			
 
				+						!strcmp(name, sg_link_name(route2[i]))
			
 
				+#endif
			
 
				+						)
			
 
				 					{
			
 
				 						/* This GPU goes through this PCI bridge to access RAM */
			
 
				 						ngpus++;
			
 
				 						break;
			
 
				 					}
			
 
				-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				 				free(route2);
			
 
				 #endif
			
 
				 			}
			
 
				 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
			
 
				 			starpu_bus_set_ngpus(busid, ngpus);
			
 
				-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
			
 
				 			free(route);
			
 
				 #endif
			
 
				 		}
			
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -24,6 +24,9 @@
 
				 extern "C"
			
 
				 {
			
 
				 #endif
			
 
				+
			
 
				+/* Note: when changing something here, update the include list in configure.ac
			
 
				+ * in the part that tries to enable stdc++11 */
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 #ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				 #include <simgrid/msg.h>
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1084,7 +1084,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
				 
			
 
				 	struct _starpu_worker *worker;
			
 
				 	worker = _starpu_get_worker_struct(workerid);
			
 
				-	starpu_task_list_push_back(&worker->local_tasks, task);
			
 
				+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
			
 
				 	starpu_wake_worker_locked(worker->workerid);
			
 
				 
			
 
				 	_starpu_profiling_set_task_push_end_time(task);
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				  * Copyright (C) 2013       Thibaut Lambert
			
 
				  * Copyright (C) 2016       Uppsala University
			
 
				  *
			
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
 
				  * Discover the topology of the machine
			
 
				  */
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID)
			
 
				 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
			
 
				 						int *current, int *workers_gpuid,
			
 
				 						const char *varname, unsigned nhwgpus,
			
@@ -1817,7 +1817,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 
			
 
				 	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
			
 
				 	for (i = 0; i < nopencl; i++)
			
 
				-		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
			
 
				+		topology->nworker[STARPU_OPENCL_WORKER][i] = 1;
			
 
				 	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	_starpu_initialize_workers_opencl_gpuid(config);
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -688,7 +688,7 @@ void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machin
 
				 	/* memory_node initialized by topology.c */
			
 
				 	STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
			
 
				-	starpu_task_list_init(&workerarg->local_tasks);
			
 
				+	starpu_task_prio_list_init(&workerarg->local_tasks);
			
 
				 	_starpu_ctx_change_list_init(&workerarg->ctx_change_list);
			
 
				 	workerarg->local_ordered_tasks = NULL;
			
 
				 	workerarg->local_ordered_tasks_size = 0;
			
@@ -1039,7 +1039,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				         if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
			
 
				         {
			
 
				                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
			
 
				-                struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
			
 
				+                struct _starpu_worker * worker_zero STARPU_ATTRIBUTE_UNUSED = &worker_set_zero->workers[0];
			
 
				                 STARPU_PTHREAD_CREATE_ON(
			
 
				                                 "zero",
			
 
				                                 &worker_set_zero->worker_thread,
			
@@ -1445,7 +1445,8 @@ int _starpu_get_catch_signals(void)
 
				 	return _starpu_config.conf.catch_signals;
			
 
				 }
			
 
				 
			
 
				-void starpu_drivers_preinit(void) {
			
 
				+void starpu_drivers_preinit(void)
			
 
				+{
			
 
				 	_starpu_cpu_preinit();
			
 
				 	_starpu_cuda_preinit();
			
 
				 	_starpu_opencl_preinit();
			
@@ -1828,7 +1829,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 
				 		}
			
 
				 
			
 
				 out:
			
 
				-		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
			
 
				+		STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks));
			
 
				 		for (n = 0; n < worker->local_ordered_tasks_size; n++)
			
 
				 			STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
			
 
				 		_starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -61,8 +61,6 @@
 
				 
			
 
				 #define STARPU_MAX_PIPELINE 4
			
 
				 
			
 
				-enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
			
 
				-
			
 
				 struct _starpu_ctx_change_list;
			
 
				 
			
 
				 /** This is initialized by _starpu_worker_init() */
			
@@ -125,7 +123,7 @@ LIST_TYPE(_starpu_worker,
 
				 	     * subsequent processing once worker completes the ongoing scheduling
			
 
				 	     * operation */
			
 
				 	struct _starpu_ctx_change_list ctx_change_list;
			
 
				-	struct starpu_task_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
			
 
				+	struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
			
 
				 	struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
			
 
				 	unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */
			
 
				 	unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */
			
@@ -427,7 +425,8 @@ struct _starpu_machine_config
 
				 };
			
 
				 
			
 
				 /** Provides information for a device driver */
			
 
				-struct starpu_driver_info {
			
 
				+struct starpu_driver_info
			
 
				+{
			
 
				 	const char *name_upper;	/**< Name of worker type in upper case */
			
 
				 	const char *name_var;	/**< Name of worker type for environment variables */
			
 
				 	const char *name_lower;	/**< Name of worker type in lower case */
			
@@ -441,7 +440,8 @@ extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
 
				 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
			
 
				 
			
 
				 /** Provides information for a memory node driver */
			
 
				-struct starpu_memory_driver_info {
			
 
				+struct starpu_memory_driver_info
			
 
				+{
			
 
				 	const char *name_upper;	/**< Name of memory in upper case */
			
 
				 	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
			
 
				 };
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1513,7 +1513,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
				 			/* First try to flush data explicitly marked for freeing */
			
 
				 			size_t freed = flush_memchunk_cache(dst_node, reclaim);
			
 
				 
			
 
				-			if (freed >= reclaim) {
			
 
				+			if (freed >= reclaim)
			
 
				+			{
			
 
				 				/* That freed enough data, retry allocating */
			
 
				 				prefetch_out_of_memory[dst_node] = 0;
			
 
				 				continue;
			
@@ -1550,7 +1551,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
				 			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
			
 
				 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
			
 
				 			prefetch_out_of_memory[dst_node] = 0;
			
 
				-		} else
			
 
				+		}
			
 
				+		else
			
 
				 			prefetch_out_of_memory[dst_node] = 0;
			
 
				 	}
			
 
				 	while((allocated_memory == -ENOMEM) && attempts++ < 2);
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -180,7 +180,8 @@ int starpu_memory_node_get_devid(unsigned node)
 
				 	return _starpu_descr.devid[node];
			
 
				 }
			
 
				 
			
 
				-enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
			
 
				+enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind)
			
 
				+{
			
 
				 	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
			
 
				 	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
			
 
				 	return archtype;
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -193,7 +193,8 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 
				 		fprintf(tasks_file, "Name: %s\n", task->name);
			
 
				 	if (task->model_name)
			
 
				 		fprintf(tasks_file, "Model: %s\n", task->model_name);
			
 
				-	if (task->file) {
			
 
				+	if (task->file)
			
 
				+	{
			
 
				 		fprintf(tasks_file, "File: %s\n", task->file);
			
 
				 		fprintf(tasks_file, "Line: %d\n", task->line);
			
 
				 	}
			
@@ -4129,7 +4130,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 
				 
			
 
				 	if (out_paje_file && !options->no_bus)
			
 
				 	{
			
 
				-		while (!_starpu_communication_list_empty(&communication_list)) {
			
 
				+		while (!_starpu_communication_list_empty(&communication_list))
			
 
				+		{
			
 
				 			struct _starpu_communication*itor;
			
 
				 			itor = _starpu_communication_list_pop_front(&communication_list);
			
 
				 
			
@@ -4423,7 +4425,7 @@ void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options)
 
				 			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
			
 
				 
			
 
				 		/* FUT_SETUP_CODE is the event with the maximal value */
			
 
				-		number_events = calloc(FUT_SETUP_CODE+1, sizeof(uint64_t));
			
 
				+		_STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t));
			
 
				 	}
			
 
				 	else
			
 
				 		number_events_file = NULL;
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -60,7 +60,8 @@
 
				 #include <windows.h>
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_driver_info driver_info = {
			
 
				+static struct starpu_driver_info driver_info =
			
 
				+{
			
 
				 	.name_upper = "CPU",
			
 
				 	.name_var = "CPU",
			
 
				 	.name_lower = "cpu",
			
@@ -68,7 +69,8 @@ static struct starpu_driver_info driver_info = {
 
				 	.alpha = 0.5f,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "NUMA",
			
 
				 	.worker_archtype = STARPU_CPU_WORKER,
			
 
				 };
			
--- a/src/drivers/cuda/driver_cuda_init.c
+++ b/src/drivers/cuda/driver_cuda_init.c
@@ -17,7 +17,8 @@
 
				 #include <core/workers.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				 
			
 
				-static struct starpu_driver_info driver_info = {
			
 
				+static struct starpu_driver_info driver_info =
			
 
				+{
			
 
				 	.name_upper = "CUDA",
			
 
				 	.name_var = "CUDA",
			
 
				 	.name_lower = "cuda",
			
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
				 	.alpha = 13.33f,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "CUDA",
			
 
				 	.worker_archtype = STARPU_CUDA_WORKER,
			
 
				 };
			
--- a/src/drivers/disk/driver_disk.c
+++ b/src/drivers/disk/driver_disk.c
@@ -23,7 +23,8 @@
 
				 #include <datawizard/coherency.h>
			
 
				 #include <datawizard/memory_nodes.h>
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "Disk",
			
 
				 	.worker_archtype = (enum starpu_worker_archtype) -1,
			
 
				 };
			
--- a/src/drivers/mic/driver_mic_init.c
+++ b/src/drivers/mic/driver_mic_init.c
@@ -17,7 +17,8 @@
 
				 #include <core/workers.h>
			
 
				 #include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				-static struct starpu_driver_info driver_info = {
			
 
				+static struct starpu_driver_info driver_info =
			
 
				+{
			
 
				 	.name_upper = "MIC",
			
 
				 	.name_var = "MIC",
			
 
				 	.name_lower = "mic",
			
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
				 	.alpha = 0.5f,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "MIC",
			
 
				 	.worker_archtype = STARPU_MIC_WORKER,
			
 
				 };
			
--- a/src/drivers/mpi/driver_mpi_init.c
+++ b/src/drivers/mpi/driver_mpi_init.c
@@ -17,7 +17,8 @@
 
				 #include <core/workers.h>
			
 
				 #include <drivers/mpi/driver_mpi_source.h>
			
 
				 
			
 
				-static struct starpu_driver_info driver_info = {
			
 
				+static struct starpu_driver_info driver_info =
			
 
				+{
			
 
				 	.name_upper = "MPI_MS",
			
 
				 	.name_var = "MPI_MS",
			
 
				 	.name_lower = "mpi_ms",
			
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
				 	.alpha = 1.0f,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "MPI_MS",
			
 
				 	.worker_archtype = STARPU_MPI_MS_WORKER,
			
 
				 };
			
--- a/src/drivers/opencl/driver_opencl_init.c
+++ b/src/drivers/opencl/driver_opencl_init.c
@@ -17,7 +17,8 @@
 
				 #include <core/workers.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 
			
 
				-static struct starpu_driver_info driver_info = {
			
 
				+static struct starpu_driver_info driver_info =
			
 
				+{
			
 
				 	.name_upper = "OpenCL",
			
 
				 	.name_var = "OPENCL",
			
 
				 	.name_lower = "opencl",
			
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
				 	.alpha = 12.22f,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+static struct starpu_memory_driver_info memory_driver_info =
			
 
				+{
			
 
				 	.name_upper = "OpenCL",
			
 
				 	.worker_archtype = STARPU_OPENCL_WORKER,
			
 
				 };
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -201,11 +201,13 @@ void _starpu_profiling_papi_task_start_counters(struct starpu_task *task)
 
				 		for(i=0; i<papi_nevents; i++)
			
 
				 		{
			
 
				 			int ret = PAPI_add_event(profiling_info->papi_event_set, papi_events[i]);
			
 
				+#ifdef PAPI_ECMP_DISABLED
			
 
				 			if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable)
			
 
				 			{
			
 
				 				_STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n");
			
 
				 				warned_component_unavailable = 1;
			
 
				 			}
			
 
				+#endif
			
 
				 			profiling_info->papi_values[i]=0;
			
 
				 		}
			
 
				 		PAPI_reset(profiling_info->papi_event_set);
			
--- a/src/sched_policies/component_heteroprio.c
+++ b/src/sched_policies/component_heteroprio.c
@@ -434,9 +434,12 @@ static int heteroprio_push_task(struct starpu_sched_component * component, struc
 
				 			/* Didn't find it, add one */
			
 
				 			data->naccel++;
			
 
				 
			
 
				-			float *newaccel = malloc(data->naccel * sizeof(*newaccel));
			
 
				-			struct _starpu_prio_deque **newbuckets = malloc(data->naccel * sizeof(*newbuckets));
			
 
				-			struct _starpu_prio_deque *newbucket = malloc(sizeof(*newbucket));
			
 
				+			float *newaccel;
			
 
				+			_STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel));
			
 
				+			struct _starpu_prio_deque **newbuckets;
			
 
				+			_STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets));
			
 
				+			struct _starpu_prio_deque *newbucket;
			
 
				+			_STARPU_MALLOC(newbucket, sizeof(*newbucket));
			
 
				 			_starpu_prio_deque_init(newbucket);
			
 
				 			int inserted = 0;
			
 
				 
			
--- a/src/sched_policies/component_worker.c
+++ b/src/sched_policies/component_worker.c
@@ -510,11 +510,11 @@ static double simple_worker_estimated_load(struct starpu_sched_component * compo
 
				 	struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
			
 
				 	int nb_task = 0;
			
 
				 	STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
			
 
				-	struct starpu_task_list list = worker->local_tasks;
			
 
				+	struct starpu_task_prio_list *list = &worker->local_tasks;
			
 
				 	struct starpu_task * task;
			
 
				-	for(task = starpu_task_list_front(&list);
			
 
				-	    task != starpu_task_list_end(&list);
			
 
				-	    task = starpu_task_list_next(task))
			
 
				+	for(task = starpu_task_prio_list_begin(list);
			
 
				+	    task != starpu_task_prio_list_end(list);
			
 
				+	    task = starpu_task_prio_list_next(list, task))
			
 
				 		nb_task++;
			
 
				 	STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
			
 
				 	struct _starpu_worker_component_data * d = component->data;
			
--- a/src/sched_policies/helper_mct.c
+++ b/src/sched_policies/helper_mct.c
@@ -88,6 +88,11 @@ static double compute_expected_time(double now, double predicted_end, double pre
 
				 
			
 
				 double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
			
 
				 {
			
 
				+	if(isnan(local_energy))
			
 
				+		/* Energy not calibrated yet, but we cannot do this
			
 
				+		 * automatically anyway, so ignoring this for now */
			
 
				+		local_energy = 0.;
			
 
				+
			
 
				 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
			
 
				 	
			
 
				 	/* min_exp_end_of_task is the minimum end time of the task over all workers */
			
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -145,7 +145,8 @@ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsi
 
				 		if (!ws->per_worker[workerids[worker]].notask)
			
 
				 		{
			
 
				 			if (ws->per_worker[workerids[worker]].busy
			
 
				-						   || starpu_worker_is_blocked_in_parallel(workerids[worker])) {
			
 
				+			    || starpu_worker_is_blocked_in_parallel(workerids[worker]))
			
 
				+			{
			
 
				 				ntasks = 1;
			
 
				 				break;
			
 
				 			}
			
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -86,7 +86,7 @@ void mp_cpy_kernel(void *descr[], void *cl_arg)
 
				 
			
 
				 	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
			
 
				 	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
			
 
				-	
			
 
				+
			
 
				 	void *dst_interface = descr[0];
			
 
				 	void *src_interface = descr[1];
			
 
				 
			
@@ -151,7 +151,7 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
				 
			
 
				 	unsigned *interface_id;
			
 
				 	_STARPU_MALLOC(interface_id, sizeof(*interface_id));
			
 
				-	*interface_id = dst_handle->ops->interfaceid; 
			
 
				+	*interface_id = dst_handle->ops->interfaceid;
			
 
				 	task->cl_arg = interface_id;
			
 
				 	task->cl_arg_size = sizeof(*interface_id);
			
 
				 	task->cl_arg_free = 1;
			
@@ -181,7 +181,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 
				 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
			
 
				 {
			
 
				 	_starpu_spin_lock(&src_handle->header_lock);
			
 
				-	if (src_handle->readonly_dup) {
			
 
				+	if (src_handle->readonly_dup)
			
 
				+	{
			
 
				 		/* Already a ro duplicate, just return it with one more ref */
			
 
				 		*dst_handle = src_handle->readonly_dup;
			
 
				 		_starpu_spin_unlock(&src_handle->header_lock);
			
@@ -190,7 +191,8 @@ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t sr
 
				 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
			
 
				 		return 0;
			
 
				 	}
			
 
				-	if (src_handle->readonly) {
			
 
				+	if (src_handle->readonly)
			
 
				+	{
			
 
				 		src_handle->aliases++;
			
 
				 		_starpu_spin_unlock(&src_handle->header_lock);
			
 
				 		*dst_handle = src_handle;
			
--- a/starpu-1.0-mic.pc.in
+++ b/starpu-1.0-mic.pc.in
@@ -23,7 +23,7 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
 
				 Requires.private: @GORDON_REQUIRES@
			
--- a/starpu-1.0.pc.in
+++ b/starpu-1.0.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/starpu-1.1.pc.in
+++ b/starpu-1.1.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/starpu-1.2.pc.in
+++ b/starpu-1.2.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/starpu-1.3.pc.in
+++ b/starpu-1.3.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
			
 
				-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/starpufft/src/starpufft-double.h
+++ b/starpufft/src/starpufft-double.h
@@ -25,8 +25,8 @@
 
				 #include <cufft.h>
			
 
				 #endif
			
 
				 
			
 
				-#undef  FLOAT
			
 
				-#define DOUBLE
			
 
				+#undef  STARPUFFT_FLOAT
			
 
				+#define STARPUFFT_DOUBLE
			
 
				 
			
 
				 typedef double real;
			
 
				 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
			
--- a/starpufft/src/starpufft-float.h
+++ b/starpufft/src/starpufft-float.h
@@ -25,8 +25,8 @@
 
				 #include <cufft.h>
			
 
				 #endif
			
 
				 
			
 
				-#undef  DOUBLE
			
 
				-#define FLOAT
			
 
				+#undef  STARPUFFT_DOUBLE
			
 
				+#define STARPUFFT_FLOAT
			
 
				 
			
 
				 typedef float real;
			
 
				 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
			
--- a/starpufft/src/starpufftx.c
+++ b/starpufft/src/starpufftx.c
@@ -28,7 +28,7 @@
 
				 #define _externC extern
			
 
				 #include "cudax_kernels.h"
			
 
				 
			
 
				-#if defined(FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
			
 
				+#if defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
			
 
				 #  define __STARPU_USE_CUDA
			
 
				 #else
			
 
				 #  undef __STARPU_USE_CUDA
			
@@ -172,7 +172,7 @@ compute_roots(STARPUFFT(plan) plan)
 
				 }
			
 
				 
			
 
				 /* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
			
 
				-#ifdef DOUBLE
			
 
				+#ifdef STARPUFFT_DOUBLE
			
 
				 static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) {
			
 
				 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
			
 
				 		return 1;
			
--- a/starpupy/Makefile.am
+++ b/starpupy/Makefile.am
@@ -0,0 +1,21 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+include $(top_srcdir)/starpu-subdirtests.mk
			
 
				+
			
 
				+SUBDIRS  = src
			
 
				+SUBDIRS += examples
			
 
				+
			
--- a/starpupy/examples/Makefile.am
+++ b/starpupy/examples/Makefile.am
@@ -0,0 +1,43 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+include $(top_srcdir)/starpu.mk
			
 
				+
			
 
				+SUBDIRS =
			
 
				+
			
 
				+CLEANFILES = *.gcno *.gcda *.linkinfo
			
 
				+
			
 
				+TESTS	=
			
 
				+TESTS	+=	starpu_py.sh
			
 
				+TESTS	+=	starpu_py_parallel.sh
			
 
				+
			
 
				+if STARPU_STARPUPY_NUMPY
			
 
				+TESTS	+=	starpu_py_np.sh
			
 
				+endif
			
 
				+
			
 
				+EXTRA_DIST	=		\
			
 
				+	starpu_py_parallel.py	\
			
 
				+	starpu_py_parallel.sh	\
			
 
				+	starpu_py.py		\
			
 
				+	starpu_py.sh		\
			
 
				+	starpu_py_np.py		\
			
 
				+	starpu_py_np.sh
			
 
				+
			
 
				+python_sourcesdir = $(libdir)/starpu/python
			
 
				+dist_python_sources_DATA	=	\
			
 
				+	starpu_py_parallel.py	\
			
 
				+	starpu_py.py
			
 
				+
			
--- a/starpupy/examples/execute.sh.in
+++ b/starpupy/examples/execute.sh.in
@@ -0,0 +1,59 @@
 
				+#!@REALBASH@
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+exampledir=@STARPU_SRC_DIR@/starpupy/examples
			
 
				+
			
 
				+modpath=@STARPU_BUILD_DIR@/src/.libs:
			
 
				+pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH
			
 
				+
			
 
				+valgrind=""
			
 
				+gdb=""
			
 
				+if test "$1" == "--valgrind"
			
 
				+then
			
 
				+    valgrind=1
			
 
				+    shift
			
 
				+fi
			
 
				+if test "$1" == "--gdb"
			
 
				+then
			
 
				+    gdb=1
			
 
				+    shift
			
 
				+fi
			
 
				+
			
 
				+examplefile=$1
			
 
				+if test -f $examplefile
			
 
				+then
			
 
				+    pythonscript=$examplefile
			
 
				+elif test -f $exampledir/$examplefile
			
 
				+then
			
 
				+    pythonscript=$exampledir/$examplefile
			
 
				+else
			
 
				+    echo "Error. Python script $examplefile not found in current directory or in $exampledir"
			
 
				+    exit 1
			
 
				+fi
			
 
				+shift
			
 
				+
			
 
				+set -x
			
 
				+if test "$valgrind" == "1"
			
 
				+then
			
 
				+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath PYTHONMALLOC=malloc valgrind --track-origins=yes @PYTHON@ $pythonscript $*
			
 
				+elif test "$gdb" == "1"
			
 
				+then
			
 
				+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath gdb --args @PYTHON@ $pythonscript $*
			
 
				+else
			
 
				+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath @PYTHON@ $pythonscript $*
			
 
				+fi
			
 
				+
			
--- a/starpupy/tests/starpu_py.py
+++ b/starpupy/tests/starpu_py.py
@@ -73,7 +73,7 @@ def sub(a,b,c):
 
				 ###############################################################################
			
 
				 
			
 
				 #using decorator wrap the function with input
			
 
				-@starpu.delayed
			
 
				+@starpu.delayed(name="test")
			
 
				 def add_deco(a,b,c):
			
 
				 	#time.sleep(1)
			
 
				 	print ("Example 8:")
			
@@ -83,7 +83,7 @@ def add_deco(a,b,c):
 
				 ###############################################################################
			
 
				 
			
 
				 #using decorator wrap the function with input
			
 
				-@starpu.delayed
			
 
				+@starpu.delayed(color=1)
			
 
				 def sub_deco(x,a):
			
 
				 	print ("Example 9:")
			
 
				 	print ("This is a function with input and output wrapped by the decorator function:")
			
@@ -93,34 +93,34 @@ def sub_deco(x,a):
 
				 
			
 
				 async def main():
			
 
				 	#submit function "hello"
			
 
				-    fut = starpu.task_submit(hello)
			
 
				+    fut = starpu.task_submit()(hello)
			
 
				     await fut
			
 
				 
			
 
				     #submit function "func1"
			
 
				-    fut1 = starpu.task_submit(func1)
			
 
				+    fut1 = starpu.task_submit()(func1)
			
 
				     await fut1
			
 
				 
			
 
				     #apply starpu.delayed(func1_deco())
			
 
				     await func1_deco()
			
 
				 
			
 
				 	#submit function "func2"
			
 
				-    fut2 = starpu.task_submit(func2)
			
 
				+    fut2 = starpu.task_submit()(func2)
			
 
				     res2 = await fut2
			
 
				 	#print the result of function
			
 
				     print("This is a function no input and the return value is", res2)
			
 
				 
			
 
				     #submit function "multi"
			
 
				-    fut3 = starpu.task_submit(multi, 2, 3)
			
 
				+    fut3 = starpu.task_submit()(multi, 2, 3)
			
 
				     res3 = await fut3
			
 
				     print("The result of function multi is :", res3)
			
 
				 
			
 
				 	#submit function "add"
			
 
				-    fut4 = starpu.task_submit(add, 1.2, 2.5, 3.6, 4.9)
			
 
				+    fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9)
			
 
				     res4 = await fut4
			
 
				     print("The result of function add is :", res4)
			
 
				 
			
 
				 	#submit function "sub"
			
 
				-    fut5 = starpu.task_submit(sub, 6, 2, 5.9)
			
 
				+    fut5 = starpu.task_submit()(sub, 6, 2, 5.9)
			
 
				     res5 = await fut5
			
 
				     print("The result of function sub is:", res5)
			
 
				 
			
@@ -138,4 +138,4 @@ async def main():
 
				 asyncio.run(main())
			
 
				 
			
 
				 
			
 
				-#starpu.task_wait_for_all()
			
 
				+#starpu.task_wait_for_all()
			
--- a/starpupy/examples/starpu_py.sh
+++ b/starpupy/examples/starpu_py.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+$(dirname $0)/execute.sh starpu_py.py $*
			
 
				+
			
--- a/starpupy/examples/starpu_py_np.py
+++ b/starpupy/examples/starpu_py_np.py
@@ -0,0 +1,40 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+import starpu
			
 
				+import asyncio
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+###############################################################################
			
 
				+
			
 
				+def scal(a, t):
			
 
				+	for i in range(len(t)):
			
 
				+		t[i]=t[i]*a
			
 
				+	return t
			
 
				+
			
 
				+t=np.array([1,2,3,4,5,6,7,8,9,10])
			
 
				+
			
 
				+async def main():
			
 
				+    fut8 = starpu.task_submit()(scal, 2, t)
			
 
				+    res8 = await fut8
			
 
				+    print("The result of Example 10 is", res8)
			
 
				+    print("The return array is", t)
			
 
				+    #print("The result type is", type(res8))
			
 
				+
			
 
				+asyncio.run(main())
			
 
				+
			
 
				+
			
 
				+#starpu.task_wait_for_all()
			
--- a/starpupy/src/starpu/delay.py
+++ b/starpupy/src/starpu/delay.py
@@ -1,3 +1,4 @@
 
				+#!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
@@ -13,11 +14,6 @@
 
				 #
			
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 #
			
 
				-from starpu import starpupy
			
 
				-import asyncio
			
 
				 
			
 
				-def delayed(f):
			
 
				-	def submit(*args,**kwargs):
			
 
				-		fut = starpupy.task_submit(f, *args,**kwargs)
			
 
				-		return fut
			
 
				-	return submit
			
 
				+$(dirname $0)/execute.sh starpu_py_np.py $*
			
 
				+
			
--- a/starpupy/examples/starpu_py_parallel.py
+++ b/starpupy/examples/starpu_py_parallel.py
@@ -0,0 +1,350 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+import starpu
			
 
				+import starpu.joblib
			
 
				+import time
			
 
				+import asyncio
			
 
				+from math import sqrt
			
 
				+from math import log10
			
 
				+import numpy as np
			
 
				+import sys
			
 
				+
			
 
				+#generate a list to store functions
			
 
				+g_func=[]
			
 
				+
			
 
				+#function no input no output print hello world
			
 
				+def hello():
			
 
				+	print ("Example 1: Hello, world!")
			
 
				+g_func.append(starpu.joblib.delayed(hello)())
			
 
				+
			
 
				+#function no input no output
			
 
				+def func1():
			
 
				+	print ("Example 2: This is a function no input no output")
			
 
				+g_func.append(starpu.joblib.delayed(func1)())
			
 
				+
			
 
				+#function no input return a value
			
 
				+def func2():
			
 
				+	print ("Example 3:")
			
 
				+	return 12
			
 
				+g_func.append(starpu.joblib.delayed(func2)())
			
 
				+
			
 
				+#function has 2 int inputs and 1 int output
			
 
				+def exp(a,b):
			
 
				+	res_exp=a**b
			
 
				+	print("Example 4: The result of ",a,"^",b,"is",res_exp)
			
 
				+	return res_exp
			
 
				+g_func.append(starpu.joblib.delayed(exp)(2, 3))
			
 
				+
			
 
				+#function has 4 float inputs and 1 float output
			
 
				+def add(a,b,c,d):
			
 
				+	res_add=a+b+c+d
			
 
				+	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
			
 
				+	return res_add
			
 
				+g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
			
 
				+
			
 
				+#function has 2 int inputs 1 float input and 1 float output 1 int output
			
 
				+def sub(a,b,c):
			
 
				+	res_sub1=a-b-c
			
 
				+	res_sub2=a-b
			
 
				+	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
			
 
				+	return res_sub1, res_sub2
			
 
				+g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
			
 
				+
			
 
				+##########functions of array calculation###############
			
 
				+
			
 
				+def scal(a, t):
			
 
				+	for i in range(len(t)):
			
 
				+		t[i]=t[i]*a
			
 
				+	return t
			
 
				+
			
 
				+def add_scal(a, t1, t2):
			
 
				+	for i in range(len(t1)):
			
 
				+		t1[i]=t1[i]*a+t2[i]
			
 
				+	return t1
			
 
				+
			
 
				+def scal_arr(a, t):
			
 
				+	for i in range(len(t)):
			
 
				+		t[i]=t[i]*a[i]
			
 
				+	return t
			
 
				+
			
 
				+def multi(a,b):
			
 
				+	res_multi=a*b
			
 
				+	return res_multi
			
 
				+
			
 
				+def multi_2arr(a, b):
			
 
				+        for i in range(len(a)):
			
 
				+                a[i]=a[i]*b[i]
			
 
				+        return a
			
 
				+
			
 
				+def multi_list(l):
			
 
				+	res = []
			
 
				+	for (a,b) in l:
			
 
				+		res.append(a*b)
			
 
				+	return res
			
 
				+
			
 
				+def log10_arr(t):
			
 
				+	for i in range(len(t)):
			
 
				+		t[i]=log10(t[i])
			
 
				+	return t
			
 
				+########################################################
			
 
				+
			
 
				+#################scikit test###################
			
 
				+# DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__
			
 
				+# class MyBackend(DEFAULT_JOBLIB_BACKEND):  # type: ignore
			
 
				+#         def __init__(self, *args, **kwargs):
			
 
				+#                 self.count = 0
			
 
				+#                 super().__init__(*args, **kwargs)
			
 
				+
			
 
				+#         def start_call(self):
			
 
				+#                 self.count += 1
			
 
				+#                 return super().start_call()
			
 
				+
			
 
				+# starpu.joblib.register_parallel_backend('testing', MyBackend)
			
 
				+
			
 
				+# with starpu.joblib.parallel_backend("testing") as (ba, n_jobs):
			
 
				+# 	print("backend and n_jobs is", ba, n_jobs)
			
 
				+###############################################
			
 
				+
			
 
				+N=100
			
 
				+# A=np.arange(N)
			
 
				+# B=np.arange(N)
			
 
				+# a=np.arange(N)
			
 
				+# b=np.arange(N, 2*N, 1)
			
 
				+
			
 
				+displayPlot=False
			
 
				+listX=[10, 100, 1000, 10000]
			
 
				+for arg in sys.argv[1:]:
			
 
				+        if arg == "-long":
			
 
				+                listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000]
			
 
				+        if arg == "-plot":
			
 
				+                displayPlot=True
			
 
				+
			
 
				+for x in listX:
			
 
				+	for X in range(x, x*10, x):
			
 
				+		print("X=",X)
			
 
				+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X))
			
 
				+		A=np.arange(1,X+1,1)
			
 
				+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A))
			
 
				+
			
 
				+print("************************")
			
 
				+print("parallel Normal version:")
			
 
				+print("************************")
			
 
				+print("--(sqrt)(i**2)for i in range(N)")
			
 
				+start_exec1=time.time()
			
 
				+start_cpu1=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
			
 
				+end_exec1=time.time()
			
 
				+end_cpu1=time.process_time()
			
 
				+print("the program execution time is", end_exec1-start_exec1)
			
 
				+print("the cpu execution time is", end_cpu1-start_cpu1)
			
 
				+
			
 
				+print("--(multi)(i,j) for i,j in zip(a,b)")
			
 
				+a=np.arange(N)
			
 
				+b=np.arange(N, 2*N, 1)
			
 
				+start_exec2=time.time()
			
 
				+start_cpu2=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
			
 
				+end_exec2=time.time()
			
 
				+end_cpu2=time.process_time()
			
 
				+print("the program execution time is", end_exec2-start_exec2)
			
 
				+print("the cpu execution time is", end_cpu2-start_cpu2)
			
 
				+
			
 
				+print("--(scal_arr)((i for i in b), A)")
			
 
				+A=np.arange(N)
			
 
				+b=np.arange(N, 2*N, 1)
			
 
				+start_exec3=time.time()
			
 
				+start_cpu3=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
			
 
				+end_exec3=time.time()
			
 
				+end_cpu3=time.process_time()
			
 
				+print("the program execution time is", end_exec3-start_exec3)
			
 
				+print("the cpu execution time is", end_cpu3-start_cpu3)
			
 
				+
			
 
				+print("--(multi_list)((i,j) for i,j in zip(a,b))")
			
 
				+a=np.arange(N)
			
 
				+b=np.arange(N, 2*N, 1)
			
 
				+start_exec4=time.time()
			
 
				+start_cpu4=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
			
 
				+end_exec4=time.time()
			
 
				+end_cpu4=time.process_time()
			
 
				+print("the program execution time is", end_exec4-start_exec4)
			
 
				+print("the cpu execution time is", end_cpu4-start_cpu4)
			
 
				+
			
 
				+print("--(multi_2arr)((i for i in a), (j for j in b))")
			
 
				+a=np.arange(N)
			
 
				+b=np.arange(N, 2*N, 1)
			
 
				+start_exec5=time.time()
			
 
				+start_cpu5=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
			
 
				+end_exec5=time.time()
			
 
				+end_cpu5=time.process_time()
			
 
				+print("the program execution time is", end_exec5-start_exec5)
			
 
				+print("the cpu execution time is", end_cpu5-start_cpu5)
			
 
				+
			
 
				+print("--(multi_2arr)(A, B)")
			
 
				+# A=np.arange(N)
			
 
				+# B=np.arange(N, 2*N, 1)
			
 
				+n, m = 4, 5
			
 
				+A = np.arange(n*m).reshape(n, m)
			
 
				+B = np.arange(n*m, 2*n*m, 1).reshape(n, m)
			
 
				+print("The input arrays are A", A, "B", B)
			
 
				+start_exec6=time.time()
			
 
				+start_cpu6=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(A, B))
			
 
				+end_exec6=time.time()
			
 
				+end_cpu6=time.process_time()
			
 
				+print("the program execution time is", end_exec6-start_exec6)
			
 
				+print("the cpu execution time is", end_cpu6-start_cpu6)
			
 
				+print("The return arrays are A", A, "B", B)
			
 
				+
			
 
				+print("--(scal)(2, t=(j for j in a))")
			
 
				+a=np.arange(N)
			
 
				+start_exec7=time.time()
			
 
				+start_cpu7=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a)))
			
 
				+end_exec7=time.time()
			
 
				+end_cpu7=time.process_time()
			
 
				+print("the program execution time is", end_exec7-start_exec7)
			
 
				+print("the cpu execution time is", end_cpu7-start_cpu7)
			
 
				+
			
 
				+print("--(scal)(2,A)")
			
 
				+A=np.arange(N)
			
 
				+print("The input array is", A)
			
 
				+start_exec8=time.time()
			
 
				+start_cpu8=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,A))
			
 
				+end_exec8=time.time()
			
 
				+end_cpu8=time.process_time()
			
 
				+print("the program execution time is", end_exec8-start_exec8)
			
 
				+print("the cpu execution time is", end_cpu8-start_cpu8)
			
 
				+print("The return array is", A)
			
 
				+
			
 
				+print("--(add_scal)(t1=A,t2=B,a=2)")
			
 
				+A=np.arange(N)
			
 
				+B=np.arange(N)
			
 
				+print("The input arrays are A", A, "B", B)
			
 
				+start_exec9=time.time()
			
 
				+start_cpu9=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2))
			
 
				+end_exec9=time.time()
			
 
				+end_cpu9=time.process_time()
			
 
				+print("the program execution time is", end_exec9-start_exec9)
			
 
				+print("the cpu execution time is", end_cpu9-start_cpu9)
			
 
				+print("The return arrays are A", A, "B", B)
			
 
				+
			
 
				+
			
 
				+print("--input is iterable function list")
			
 
				+start_exec10=time.time()
			
 
				+start_cpu10=time.process_time()
			
 
				+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func)
			
 
				+end_exec10=time.time()
			
 
				+end_cpu10=time.process_time()
			
 
				+print("the program execution time is", end_exec10-start_exec10)
			
 
				+print("the cpu execution time is", end_cpu10-start_cpu10)
			
 
				+
			
 
				+# def producer():
			
 
				+# 	for i in range(6):
			
 
				+# 		print('Produced %s' % i)
			
 
				+# 		yield i
			
 
				+#starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer())
			
 
				+
			
 
				+print("************************")
			
 
				+print("parallel Future version:")
			
 
				+print("************************")
			
 
				+async def main():
			
 
				+
			
 
				+	print("--(sqrt)(i**2)for i in range(N)")
			
 
				+	fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
			
 
				+	res1=await fut1
			
 
				+	#print(res1)
			
 
				+
			
 
				+	print("--(multi)(i,j) for i,j in zip(a,b)")
			
 
				+	a=np.arange(N)
			
 
				+	b=np.arange(N, 2*N, 1)
			
 
				+	fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
			
 
				+	res2=await fut2
			
 
				+	#print(res2)
			
 
				+
			
 
				+	print("--(scal_arr)((i for i in b), A)")
			
 
				+	A=np.arange(N)
			
 
				+	b=np.arange(N, 2*N, 1)
			
 
				+	fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
			
 
				+	res3=await fut3
			
 
				+	#print(res3)
			
 
				+
			
 
				+	print("--(multi_list)((i,j) for i,j in zip(a,b))")
			
 
				+	a=np.arange(N)
			
 
				+	b=np.arange(N, 2*N, 1)
			
 
				+	fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
			
 
				+	res4=await fut4
			
 
				+	#print(res4)
			
 
				+
			
 
				+	print("--(multi_2arr)((i for i in a), (j for j in b))")
			
 
				+	a=np.arange(N)
			
 
				+	b=np.arange(N, 2*N, 1)
			
 
				+	fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
			
 
				+	res5=await fut5
			
 
				+	#print(res5)
			
 
				+
			
 
				+	print("--(multi_2arr)(b=B, a=A)")
			
 
				+	A=np.arange(N)
			
 
				+	B=np.arange(N, 2*N, 1)
			
 
				+	print("The input arrays are A", A, "B", B)
			
 
				+	fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(b=B, a=A))
			
 
				+	res6=await fut6
			
 
				+	print("The return arrays are A", A, "B", B)
			
 
				+
			
 
				+
			
 
				+	print("--(scal)(2, (j for j in a))")
			
 
				+	a=np.arange(N)
			
 
				+	fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a)))
			
 
				+	res7=await fut7
			
 
				+	#print(res6)
			
 
				+
			
 
				+	print("--(scal)(2,t=A)")
			
 
				+	A=np.arange(N)
			
 
				+	print("The input array is", A)
			
 
				+	fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,t=A))
			
 
				+	res8=await fut8
			
 
				+	print("The return array is", A)
			
 
				+
			
 
				+	print("--(scal)(2,A,B)")
			
 
				+	A=np.arange(N)
			
 
				+	B=np.arange(N)
			
 
				+	print("The input arrays are A", A, "B", B)
			
 
				+	fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B))
			
 
				+	res9=await fut9
			
 
				+	print("The return arrays are A", A, "B", B)
			
 
				+
			
 
				+	print("--input is iterable function list")
			
 
				+	fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1)(g_func)
			
 
				+	res10=await fut10
			
 
				+	#print(res9)
			
 
				+
			
 
				+asyncio.run(main())
			
 
				+
			
 
				+starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="multi",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="scal",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="func",view=displayPlot)
			
 
				+
			
 
				+starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot)
			
 
				+starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot)
			
--- a/starpupy/examples/starpu_py_parallel.sh
+++ b/starpupy/examples/starpu_py_parallel.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+STARPU_CALIBRATE=1 $(dirname $0)/execute.sh starpu_py_parallel.py $*
			
 
				+
			
--- a/starpupy/src/Makefile.am
+++ b/starpupy/src/Makefile.am
@@ -0,0 +1,63 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+include $(top_srcdir)/starpu-notests.mk
			
 
				+
			
 
				+SUBDIRS =
			
 
				+
			
 
				+PYTHON_PY_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*py)
			
 
				+PYTHON_PY_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC)))
			
 
				+
			
 
				+PYTHON_C_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*c)
			
 
				+PYTHON_C_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC)))
			
 
				+
			
 
				+$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py
			
 
				+	$(MKDIR_P) starpu
			
 
				+	$(V_ln) $(LN_S) $< $@
			
 
				+$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c
			
 
				+	@$(MKDIR_P) starpu
			
 
				+	$(V_ln) $(LN_S) $< $@
			
 
				+
			
 
				+all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD)
			
 
				+	$(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS)
			
 
				+
			
 
				+install-exec-local:
			
 
				+	@if test -d $(prefix)/lib/python* ; \
			
 
				+	then	\
			
 
				+		chmod u+w $(prefix)/lib/python* ; \
			
 
				+		chmod u+w $(prefix)/lib/python*/site-packages ; \
			
 
				+	fi
			
 
				+	$(PYTHON) setup.py install
			
 
				+
			
 
				+if STARPU_BUILD_STARPUPY
			
 
				+clean-local:
			
 
				+	$(PYTHON) setup.py clean
			
 
				+	rm -f starpu/*py starpu/*c
			
 
				+endif
			
 
				+
			
 
				+distclean-local:
			
 
				+	rm -rf build
			
 
				+
			
 
				+uninstall-local:
			
 
				+	rm -rf $(prefix)/lib/python*/site-packages/starpu*
			
 
				+	rm -rf $(prefix)/lib/python*/site-packages/tmp/starpu*
			
 
				+
			
 
				+EXTRA_DIST	=		\
			
 
				+	delay.py		\
			
 
				+	__init__.py	\
			
 
				+	intermedia.py	\
			
 
				+	joblib.py	\
			
 
				+	starpu_task_wrapper.c
			
--- a/starpupy/src/starpu/__init__.py
+++ b/starpupy/src/starpu/__init__.py
@@ -17,4 +17,5 @@
 
				 
			
 
				 from.starpupy import *
			
 
				 from .delay import *
			
 
				-from . import joblib
			
 
				+#from . import joblib
			
 
				+from .intermedia import *
			
--- a/starpupy/src/delay.py
+++ b/starpupy/src/delay.py
@@ -0,0 +1,29 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+from starpu import starpupy
			
 
				+import starpu
			
 
				+import asyncio
			
 
				+from functools import partial
			
 
				+
			
 
				+def delayed(f=None,*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
			
 
				+	# add options of task_submit
			
 
				+	if f is None:
			
 
				+		return partial(delayed, name=name, synchronous=synchronous, priority=priority, color=color, flops=flops, perfmodel=perfmodel)
			
 
				+	def submit(*args):
			
 
				+		fut = starpu.task_submit(name=name, synchronous=synchronous, priority=priority,\
			
 
				+								 color=color, flops=flops, perfmodel=perfmodel)(f, *args)
			
 
				+		return fut
			
 
				+	return submit
			
--- a/starpupy/src/intermedia.py
+++ b/starpupy/src/intermedia.py
@@ -0,0 +1,63 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+from starpu import starpupy
			
 
				+import os
			
 
				+
			
 
				+#class perfmodel
			
 
				+class Perfmodel(object):
			
 
				+	def __init__(self, symbol):
			
 
				+		self.symbol=symbol
			
 
				+		self.pstruct=starpupy.init_perfmodel(self.symbol)
			
 
				+
			
 
				+	def get_struct(self):
			
 
				+		return self.pstruct
			
 
				+
			
 
				+	def __del__(self):
			
 
				+	#def free_struct(self):
			
 
				+		starpupy.free_perfmodel(self.pstruct)
			
 
				+
			
 
				+# generate the dictionary which contains the perfmodel symbol and its struct pointer
			
 
				+dict_perf={}
			
 
				+def dict_perf_generator(perfsymbol):
			
 
				+	if dict_perf.get(perfsymbol)==None:
			
 
				+		p=Perfmodel(perfsymbol)
			
 
				+		dict_perf[perfsymbol]=p
			
 
				+	else:
			
 
				+		p=dict_perf[perfsymbol]
			
 
				+	return p
			
 
				+
			
 
				+#add options in function task_submit
			
 
				+def task_submit(*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
			
 
				+	if perfmodel==None:
			
 
				+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': None}
			
 
				+	else:
			
 
				+		p=dict_perf_generator(perfmodel)
			
 
				+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': p.get_struct()}
			
 
				+
			
 
				+	def call_task_submit(f, *args):
			
 
				+		fut=starpupy._task_submit(f, *args, dict_option)
			
 
				+		return fut
			
 
				+	return call_task_submit
			
 
				+
			
 
				+# dump performance model and show the plot
			
 
				+def perfmodel_plot(perfmodel, view=True):
			
 
				+	p=dict_perf[perfmodel]
			
 
				+	starpupy.save_history_based_model(p.get_struct())
			
 
				+	if view == True:
			
 
				+		os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
			
 
				+		os.system('gnuplot starpu_'+perfmodel+'.gp')
			
 
				+		os.system('gv starpu_'+perfmodel+'.eps')
			
--- a/starpupy/src/joblib.py
+++ b/starpupy/src/joblib.py
@@ -0,0 +1,324 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+import sys
			
 
				+import types
			
 
				+import joblib as jl
			
 
				+from joblib import logger
			
 
				+from joblib._parallel_backends import ParallelBackendBase
			
 
				+from starpu import starpupy
			
 
				+import starpu
			
 
				+import asyncio
			
 
				+import math
			
 
				+import functools
			
 
				+import numpy as np
			
 
				+import inspect
			
 
				+import threading
			
 
				+
			
 
				+BACKENDS={
			
 
				+	#'loky': LokyBackend,
			
 
				+}
			
 
				+_backend = threading.local()
			
 
				+
			
 
				+# get the number of CPUs controlled by StarPU
			
 
				+def cpu_count():
			
 
				+	n_cpus=starpupy.cpu_worker_get_count()
			
 
				+	return n_cpus
			
 
				+
			
 
				+# split a list ls into n_block numbers of sub-lists 
			
 
				+def partition(ls, n_block):
			
 
				+	if len(ls)>=n_block:
			
 
				+		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
			
 
				+		q1=math.ceil(len(ls)/n_block)
			
 
				+		q2=math.floor(len(ls)/n_block)
			
 
				+		n1=len(ls)%n_block
			
 
				+		#n2=n_block-n1
			
 
				+		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
			
 
				+		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
			
 
				+		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
			
 
				+
			
 
				+		L=L1+L2
			
 
				+	else:
			
 
				+		# if the block number is larger than the length of list, each element in the list is a sub-list
			
 
				+		L=[ls[i:i+1] for i in range (len(ls))]
			
 
				+	return L
			
 
				+
			
 
				+# split a two-dimension numpy matrix into n_block numbers of sub-matrices
			
 
				+def array2d_split(a, n_block):
			
 
				+	# decompose number of n_jobs to two integers multiply
			
 
				+	c_tmp=math.floor(math.sqrt(n_block))
			
 
				+	for i in range (c_tmp,0,-1):
			
 
				+		if n_block%i==0:
			
 
				+			c=i
			
 
				+			r=int(n_block/c)
			
 
				+			break
			
 
				+	# split column
			
 
				+	arr_split_c=np.array_split(a,c,0)
			
 
				+	arr_split=[]
			
 
				+	# split row
			
 
				+	for i in range(c):
			
 
				+		arr_split_r=np.array_split(arr_split_c[i],r,1)
			
 
				+		for j in range(r):
			
 
				+			arr_split.append(arr_split_r[j])
			
 
				+	return arr_split
			
 
				+
			
 
				+
			
 
				+def future_generator(iterable, n_jobs, dict_task):
			
 
				+	# iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
			
 
				+	#print("iterable type is ", type(iterable))
			
 
				+	#print("iterable is", iterable)
			
 
				+	# get the number of block
			
 
				+	if n_jobs<-cpu_count()-1 or n_jobs>cpu_count():
			
 
				+		raise SystemExit('Error: n_jobs is out of range')
			
 
				+		#print("Error: n_jobs is out of range, number of CPUs is", cpu_count())
			
 
				+	elif n_jobs<0:
			
 
				+		n_block=cpu_count()+1+n_jobs
			
 
				+	else:
			
 
				+		n_block=n_jobs
			
 
				+
			
 
				+	# if arguments is tuple format
			
 
				+	if type(iterable) is tuple:
			
 
				+		# the function is always the first element
			
 
				+		f=iterable[0]
			
 
				+		# get the name of formal arguments of f
			
 
				+		formal_args=inspect.getargspec(f).args
			
 
				+		# get the arguments list
			
 
				+		args=[]
			
 
				+		# argument is arbitrary in iterable[1]
			
 
				+		args=list(iterable[1])
			
 
				+		# argument is keyword argument in iterable[2]
			
 
				+		for i in range(len(formal_args)):
			
 
				+			for j in iterable[2].keys():
			
 
				+				if j==formal_args[i]:
			
 
				+					args.append(iterable[2][j])
			
 
				+		# check whether all arrays have the same size
			
 
				+		l_arr=[]
			
 
				+		# list of Future result
			
 
				+		L_fut=[]
			
 
				+		# split the vector
			
 
				+		args_split=[]
			
 
				+		for i in range(len(args)):
			
 
				+			args_split.append([])
			
 
				+			# if the array is an numpy array
			
 
				+			if type(args[i]) is np.ndarray:
			
 
				+				# one-dimension matrix
			
 
				+				if args[i].ndim==1:
			
 
				+					# split numpy array
			
 
				+					args_split[i]=np.array_split(args[i],n_block)
			
 
				+					# get the length of numpy array
			
 
				+					l_arr.append(args[i].size)
			
 
				+				# two-dimension matrix
			
 
				+				elif args[i].ndim==2:
			
 
				+					# split numpy 2D array
			
 
				+					args_split[i]=array2d_split(args[i],n_block)
			
 
				+			# if the array is a generator
			
 
				+			elif isinstance(args[i],types.GeneratorType):
			
 
				+				# split generator
			
 
				+				args_split[i]=partition(list(args[i]),n_block)
			
 
				+				# get the length of generator
			
 
				+				l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i]))))
			
 
				+		if len(set(l_arr))>1:
			
 
				+			raise SystemExit('Error: all arrays should have the same size')
			
 
				+		#print("args list is", args_split)
			
 
				+		for i in range(n_block):
			
 
				+			# generate the argument list
			
 
				+			L_args=[]
			
 
				+			for j in range(len(args)):
			
 
				+				if type(args[j]) is np.ndarray or isinstance(args[j],types.GeneratorType):
			
 
				+					L_args.append(args_split[j][i])
			
 
				+				else:
			
 
				+					L_args.append(args[j])
			
 
				+			#print("L_args is", L_args)
			
 
				+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
			
 
				+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
			
 
				+				                  (f, *L_args)
			
 
				+			L_fut.append(fut)
			
 
				+		return L_fut
			
 
				+
			
 
				+	# if iterable is a generator or a list of function
			
 
				+	else:
			
 
				+		L=list(iterable)
			
 
				+		#print(L)
			
 
				+		# generate a list of function according to iterable
			
 
				+		def lf(ls):
			
 
				+			L_func=[]
			
 
				+			for i in range(len(ls)):
			
 
				+				# the first element is the function
			
 
				+				f=ls[i][0]
			
 
				+				# the second element is the args list of a type tuple
			
 
				+				L_args=list(ls[i][1])
			
 
				+				# generate a list of function
			
 
				+				L_func.append(f(*L_args))
			
 
				+			return L_func
			
 
				+
			
 
				+		# generate the split function list
			
 
				+		L_split=partition(L,n_block)
			
 
				+		# operation in each split list
			
 
				+		L_fut=[]
			
 
				+		for i in range(len(L_split)):
			
 
				+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
			
 
				+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
			
 
				+				                  (lf, L_split[i])
			
 
				+			L_fut.append(fut)
			
 
				+		return L_fut
			
 
				+
			
 
				+class Parallel(object):
			
 
				+	def __init__(self, mode="normal", perfmodel=None, end_msg=None,\
			
 
				+			 name=None, synchronous=0, priority=0, color=None, flops=None,\
			
 
				+	         n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
			
 
				+	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
			
 
				+	         mmap_mode='r', prefer=None, require=None):
			
 
				+		#active_backend= get_active_backend()
			
 
				+		# nesting_level = active_backend.nesting_level
			
 
				+
			
 
				+		# if backend is None:
			
 
				+		# 	backend = active_backend
			
 
				+
			
 
				+		# else:
			
 
				+		# 	try:
			
 
				+		# 		backend_factory = BACKENDS[backend]
			
 
				+		# 	except KeyError as e:
			
 
				+		# 		raise ValueError("Invalid backend: %s, expected one of %r"
			
 
				+  #                                % (backend, sorted(BACKENDS.keys()))) from e
			
 
				+		# 	backend = backend_factory(nesting_level=nesting_level)
			
 
				+
			
 
				+		if n_jobs is None:
			
 
				+			n_jobs = 1
			
 
				+
			
 
				+		self.mode=mode
			
 
				+		self.perfmodel=perfmodel
			
 
				+		self.end_msg=end_msg
			
 
				+		self.name=name
			
 
				+		self.synchronous=synchronous
			
 
				+		self.priority=priority
			
 
				+		self.color=color
			
 
				+		self.flops=flops
			
 
				+		self.n_jobs=n_jobs
			
 
				+		self._backend=backend
			
 
				+
			
 
				+	def print_progress(self):
			
 
				+		#pass
			
 
				+		print("", starpupy.task_nsubmitted())
			
 
				+
			
 
				+	def __call__(self,iterable):
			
 
				+		#generate the dictionary of task_submit
			
 
				+		dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel}
			
 
				+		if hasattr(self._backend, 'start_call'):
			
 
				+			self._backend.start_call()
			
 
				+		# the mode normal, user can call the function directly without using async
			
 
				+		if self.mode=="normal":
			
 
				+			async def asy_main():
			
 
				+				L_fut=future_generator(iterable, self.n_jobs, dict_task)
			
 
				+				res=[]
			
 
				+				for i in range(len(L_fut)):
			
 
				+					L_res=await L_fut[i]
			
 
				+					res.extend(L_res)
			
 
				+				#print(res)
			
 
				+				#print("type of result is", type(res))
			
 
				+				return res
			
 
				+			#asyncio.run(asy_main())
			
 
				+			#retVal=asy_main
			
 
				+			loop = asyncio.get_event_loop()
			
 
				+			results = loop.run_until_complete(asy_main())
			
 
				+			retVal = results
			
 
				+		# the mode future, user needs to use asyncio module and await the Future result in main function
			
 
				+		elif self.mode=="future":
			
 
				+			L_fut=future_generator(iterable, self.n_jobs, dict_task)
			
 
				+			fut=asyncio.gather(*L_fut)
			
 
				+			if self.end_msg!=None:
			
 
				+				fut.add_done_callback(functools.partial(print, self.end_msg))
			
 
				+			retVal=fut
			
 
				+		if hasattr(self._backend, 'stop_call'):
			
 
				+			self._backend.stop_call()
			
 
				+		return retVal
			
 
				+
			
 
				+def delayed(function):
			
 
				+	def delayed_function(*args, **kwargs):
			
 
				+		return function, args, kwargs
			
 
				+	return delayed_function
			
 
				+
			
 
				+
			
 
				+######################################################################
			
 
				+__version__ = jl.__version__
			
 
				+
			
 
				+class Memory(jl.Memory):
			
 
				+	def __init__(self,location=None, backend='local', cachedir=None,
			
 
				+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
			
 
				+                 backend_options=None):
			
 
				+		super(Memory, self).__init__(location=None, backend='local', cachedir=None,
			
 
				+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
			
 
				+                 backend_options=None)
			
 
				+
			
 
				+
			
 
				+def dump(value, filename, compress=0, protocol=None, cache_size=None):
			
 
				+	return jl.dump(value, filename, compress, protocol, cache_size)
			
 
				+
			
 
				+def load(filename, mmap_mode=None):
			
 
				+	return jl.load(filename, mmap_mode)
			
 
				+
			
 
				+def hash(obj, hash_name='md5', coerce_mmap=False):
			
 
				+	return jl.hash(obj, hash_name, coerce_mmap)
			
 
				+
			
 
				+def register_compressor(compressor_name, compressor, force=False):
			
 
				+	return jl.register_compressor(compressor_name, compressor, force)
			
 
				+
			
 
				+def effective_n_jobs(n_jobs=-1):
			
 
				+	return cpu_count()
			
 
				+
			
 
				+def get_active_backend():
			
 
				+	backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
			
 
				+	if backend_and_jobs is not None:
			
 
				+		backend,n_jobs=backend_and_jobs
			
 
				+		return backend
			
 
				+	backend = BACKENDS[loky](nesting_level=0)
			
 
				+	return backend
			
 
				+
			
 
				+class parallel_backend(object):
			
 
				+	def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None,
			
 
				+                 **backend_params):
			
 
				+		if isinstance(backend, str):
			
 
				+			backend = BACKENDS[backend](**backend_params)
			
 
				+
			
 
				+		current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
			
 
				+		if backend.nesting_level is None:
			
 
				+			if current_backend_and_jobs is None:
			
 
				+				nesting_level = 0
			
 
				+			else:
			
 
				+				nesting_level = current_backend_and_jobs[0].nesting_level
			
 
				+
			
 
				+			backend.nesting_level = nesting_level
			
 
				+
			
 
				+		# Save the backends info and set the active backend
			
 
				+		self.old_backend_and_jobs = current_backend_and_jobs
			
 
				+		self.new_backend_and_jobs = (backend, n_jobs)
			
 
				+
			
 
				+		_backend.backend_and_jobs = (backend, n_jobs)
			
 
				+
			
 
				+	def __enter__(self):
			
 
				+		return self.new_backend_and_jobs
			
 
				+
			
 
				+	def __exit__(self, type, value, traceback):
			
 
				+		self.unregister()
			
 
				+
			
 
				+	def unregister(self):
			
 
				+		if self.old_backend_and_jobs is None:
			
 
				+			if getattr(_backend, 'backend_and_jobs', None) is not None:
			
 
				+				del _backend.backend_and_jobs
			
 
				+		else:
			
 
				+			_backend.backend_and_jobs = self.old_backend_and_jobs
			
 
				+
			
 
				+def register_parallel_backend(name, factory):
			
 
				+	BACKENDS[name] = factory
			
--- a/starpupy/src/setup.cfg.in
+++ b/starpupy/src/setup.cfg.in
@@ -0,0 +1,23 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+[build]
			
 
				+build-platlib=build
			
 
				+build-temp=build/tmp
			
 
				+
			
 
				+[install]
			
 
				+prefix=@prefix@
			
 
				+
			
 
				+
			
--- a/starpupy/src/setup.py.in
+++ b/starpupy/src/setup.py.in
@@ -0,0 +1,40 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+from distutils.core import setup, Extension
			
 
				+
			
 
				+numpy_dir = '@PYTHON_NUMPY_DIR@'
			
 
				+if numpy_dir != '':
			
 
				+    numpy_include_dir = [numpy_dir]
			
 
				+else:
			
 
				+    numpy_include_dir = []
			
 
				+starpupy = Extension('starpu.starpupy',
			
 
				+                     include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include'] + numpy_include_dir,
			
 
				+                     libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@'],
			
 
				+                     library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'],
			
 
				+	             sources = ['starpu/starpu_task_wrapper.c'])
			
 
				+
			
 
				+setup(
			
 
				+    name = 'starpupy',
			
 
				+    version = '0.5',
			
 
				+    description = 'Python bindings for StarPU',
			
 
				+    author = 'StarPU team',
			
 
				+    author_email = 'starpu-devel@lists.gforge.inria.fr',
			
 
				+    url = 'https://starpu.gitlabpages.inria.fr/',
			
 
				+    license = 'GPL',
			
 
				+    platforms = 'posix',
			
 
				+    ext_modules = [starpupy],
			
 
				+    packages = ['starpu'],
			
 
				+    )
			
--- a/starpupy/src/starpu/Makefile
+++ b/starpupy/src/starpu/Makefile
@@ -1,13 +0,0 @@
 
				-PYTHON ?= python3
			
 
				-
			
 
				-CPPFLAGS = $(shell $(PYTHON)-config --includes) -Wall -O2 -g
			
 
				-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
			
 
				-LDLIBS += $(shell pkg-config --libs starpu-1.3)
			
 
				-
			
 
				-all: starpupy.so
			
 
				-
			
 
				-starpupy.so: starpu_task_wrapper.c Makefile
			
 
				-	$(CC) -fPIC $(CFLAGS) $< -o $@ -shared  $(CPPFLAGS) $(LDLIBS)
			
 
				-
			
 
				-clean:
			
 
				-	rm -f starpupy.so
			
--- a/starpupy/src/starpu/joblib.py
+++ b/starpupy/src/starpu/joblib.py
@@ -1,147 +0,0 @@
 
				-# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				-#
			
 
				-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				-#
			
 
				-# StarPU is free software; you can redistribute it and/or modify
			
 
				-# it under the terms of the GNU Lesser General Public License as published by
			
 
				-# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				-# your option) any later version.
			
 
				-#
			
 
				-# StarPU is distributed in the hope that it will be useful, but
			
 
				-# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-#
			
 
				-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				-#
			
 
				-from starpu import starpupy
			
 
				-import asyncio
			
 
				-import math
			
 
				-import os
			
 
				-import pickle
			
 
				-import json
			
 
				-import functools
			
 
				-
			
 
				-# get the number of CPUs controlled by StarPU
			
 
				-n_cpus=starpupy.cpu_worker_get_count()
			
 
				-
			
 
				-#class perfmodel
			
 
				-class Perfmodel(object):
			
 
				-	def __init__(self, symbol):
			
 
				-		self.symbol=symbol
			
 
				-		self.pstruct=starpupy.init_perfmodel(self.symbol)
			
 
				-
			
 
				-	def get_struct(self):
			
 
				-		return self.pstruct
			
 
				-
			
 
				-	def __del__(self):
			
 
				-	#def free_struct(self):
			
 
				-		starpupy.free_perfmodel(self.pstruct)
			
 
				-
			
 
				-# split a list ls into n_block numbers of sub-lists 
			
 
				-def partition(ls, n_block):
			
 
				-	if len(ls)>=n_block:
			
 
				-		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
			
 
				-		q1=math.ceil(len(ls)/n_block)
			
 
				-		q2=math.floor(len(ls)/n_block)
			
 
				-		n1=len(ls)%n_block
			
 
				-		#n2=n_block-n1
			
 
				-		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
			
 
				-		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
			
 
				-		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
			
 
				-
			
 
				-		L=L1+L2
			
 
				-	else:
			
 
				-		# if the block number is larger than the length of list, each element in the list is a sub-list
			
 
				-		L=[ls[i:i+1] for i in range (len(ls))]
			
 
				-	return L
			
 
				-
			
 
				-# generate the dictionary which contains the perfmodel symbol and its struct pointer
			
 
				-dict_perf={}
			
 
				-def dict_perf_generator(perfsymbol):
			
 
				-	if dict_perf.get(perfsymbol)==None:
			
 
				-		p=Perfmodel(perfsymbol)
			
 
				-		dict_perf[perfsymbol]=p
			
 
				-	else:
			
 
				-		p=dict_perf[perfsymbol]
			
 
				-	return p
			
 
				-
			
 
				-def future_generator(g, n_jobs, perfsymbol):
			
 
				-	# g is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
			
 
				-	L=list(g)
			
 
				-	# generate a list of function according to g
			
 
				-	def lf(ls):
			
 
				-		L_func=[]
			
 
				-		for i in range(len(ls)):
			
 
				-			# the first element is the function
			
 
				-			f=ls[i][0]
			
 
				-			# the second element is the args list of a type tuple
			
 
				-			L_args=list(ls[i][1])
			
 
				-			# generate a list of function
			
 
				-			L_func.append(f(*L_args))
			
 
				-		return L_func
			
 
				-	# get the number of block
			
 
				-	if n_jobs<-n_cpus-1 or n_jobs>n_cpus:
			
 
				-		print("Error: n_jobs is out of range, number of CPUs is", n_cpus)
			
 
				-	elif n_jobs<0:
			
 
				-		n_block=n_cpus+1+n_jobs
			
 
				-	else:
			
 
				-		n_block=n_jobs
			
 
				-	# generate the split function list
			
 
				-	L_split=partition(L,n_block)
			
 
				-	# operation in each split list
			
 
				-	L_fut=[]
			
 
				-	for i in range(len(L_split)):
			
 
				-		if perfsymbol==None:
			
 
				-			fut=starpupy.task_submit(lf, L_split[i])
			
 
				-			L_fut.append(fut)
			
 
				-		else:
			
 
				-			p=dict_perf_generator(perfsymbol)
			
 
				-			fut=starpupy.task_submit(lf, L_split[i], p.get_struct())
			
 
				-			L_fut.append(fut)
			
 
				-	return L_fut
			
 
				-
			
 
				-def parallel(*, mode="normal", n_jobs=1, perfmodel=None, end_msg=None,\
			
 
				-	         backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
			
 
				-	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
			
 
				-	         mmap_mode='r', prefer=None, require=None):
			
 
				-	# the mode normal, user can call the function directly without using async
			
 
				-	if mode=="normal":
			
 
				-		def parallel_normal(g):
			
 
				-			async def asy_main():
			
 
				-				L_fut=future_generator(g, n_jobs, perfmodel)
			
 
				-				res=[]
			
 
				-				for i in range(len(L_fut)):
			
 
				-					L_res=await L_fut[i]
			
 
				-					res.extend(L_res)
			
 
				-				#print(res)
			
 
				-				return res
			
 
				-			asyncio.run(asy_main())
			
 
				-			return asy_main
			
 
				-		return parallel_normal
			
 
				-	# the mode future, user needs to use asyncio module and await the Future result in main function
			
 
				-	elif mode=="future":
			
 
				-		def parallel_future(g):
			
 
				-			L_fut=future_generator(g, n_jobs, perfmodel)
			
 
				-			fut=asyncio.gather(*L_fut)
			
 
				-			if end_msg==None:
			
 
				-				return fut
			
 
				-			else:
			
 
				-				fut.add_done_callback(functools.partial(print, end_msg))
			
 
				-				return fut
			
 
				-			#return fut
			
 
				-		return parallel_future
			
 
				-
			
 
				-def delayed(f):
			
 
				-	def delayed_func(*args):
			
 
				-		return f, args
			
 
				-	return delayed_func
			
 
				-
			
 
				-
			
 
				-######################################################################
			
 
				-# dump performance model
			
 
				-def perfmodel_plot(perfmodel):
			
 
				-	p=dict_perf[perfmodel]
			
 
				-	starpupy.save_history_based_model(p.get_struct())
			
 
				-	os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
			
 
				-	os.system('gnuplot starpu_'+perfmodel+'.gp')
			
 
				-	os.system('gv starpu_'+perfmodel+'.eps')
			
--- a/starpupy/src/starpu/starpu_task_wrapper.c
+++ b/starpupy/src/starpu/starpu_task_wrapper.c
@@ -1,416 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-#include <starpu.h>
			
 
				-
			
 
				-#define PY_SSIZE_T_CLEAN
			
 
				-#include <Python.h>
			
 
				-
			
 
				-/*macro*/
			
 
				-#if defined(Py_DEBUG) || defined(DEBUG)
			
 
				-extern void _Py_CountReferences(FILE*);
			
 
				-#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
			
 
				-#else
			
 
				-#define CURIOUS(x)
			
 
				-#endif
			
 
				-#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
			
 
				-#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
			
 
				-#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
			
 
				-#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
			
 
				-/*******/
			
 
				-
			
 
				-/*********************Functions passed in task_submit wrapper***********************/
			
 
				-
			
 
				-static PyObject* asyncio_module; /*python asyncio library*/
			
 
				-
			
 
				-/*structure contains parameters which are passed to starpu_task.cl_arg*/
			
 
				-struct codelet_struct { 
			
 
				-    PyObject* f; /*the python function passed in*/
			
 
				-    PyObject* argList; /*argument list of python function passed in*/
			
 
				-    PyObject* rv; /*return value when using PyObject_CallObject call the function f*/
			
 
				-    PyObject* fut; /*asyncio.Future*/
			
 
				-    PyObject* lp; /*asyncio.Eventloop*/
			
 
				-};
			
 
				-typedef struct codelet_struct codelet_st;
			
 
				-
			
 
				-/*function passed to starpu_codelet.cpu_func*/
			
 
				-void codelet_func(void *buffers[], void *cl_arg){
			
 
				-
			
 
				-    codelet_st* cst = (codelet_st*) cl_arg;
			
 
				-
			
 
				-    /*make sure we own the GIL*/
			
 
				-    PyGILState_STATE state = PyGILState_Ensure();
			
 
				-
			
 
				-    /*verify that the function is a proper callable*/
			
 
				-    if (!PyCallable_Check(cst->f)) {
			
 
				-
			
 
				-        printf("py_callback: expected a callable function\n"); 
			
 
				-        exit(1);
			
 
				-    }
			
 
				-    
			
 
				-    /*check the arguments of python function passed in*/
			
 
				-    for (int i=0; i < PyTuple_Size(cst->argList); i++){
			
 
				-      PyObject* obj=PyTuple_GetItem(cst->argList, i);
			
 
				-      const char* tp = Py_TYPE(obj)->tp_name;
			
 
				-      if(strcmp(tp, "_asyncio.Future") == 0){
			
 
				-        /*if one of arguments is Future, get its result*/
			
 
				-        PyObject * fut_result = PyObject_CallMethod(obj, "result", NULL);
			
 
				-        /*replace the Future argument to its result*/
			
 
				-        PyTuple_SetItem(cst->argList, i, fut_result);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    /*call the python function*/
			
 
				-    PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
			
 
				-    cst->rv=pRetVal;
			
 
				-
			
 
				-    //Py_DECREF(cst->f);
			
 
				-
			
 
				-    /*restore previous GIL state*/
			
 
				-    PyGILState_Release(state);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/*function passed to starpu_task.callback_func*/
			
 
				-void cb_func(void *v){
			
 
				-
			
 
				-	struct starpu_task *task=starpu_task_get_current();
			
 
				-    codelet_st* cst = (codelet_st*) task->cl_arg;
			
 
				-
			
 
				-    /*make sure we own the GIL*/
			
 
				-    PyGILState_STATE state = PyGILState_Ensure();
			
 
				-
			
 
				-    /*set the Future result and mark the Future as done*/
			
 
				-    PyObject * set_result = PyObject_GetAttrString(cst->fut, "set_result");
			
 
				-    PyObject * loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
			
 
				-
			
 
				-    Py_DECREF(loop_callback);
			
 
				-    Py_DECREF(set_result);
			
 
				-    Py_DECREF(cst->rv);
			
 
				-    Py_DECREF(cst->fut);
			
 
				-    Py_DECREF(cst->lp);
			
 
				-
			
 
				-    //Py_DECREF(perfmodel);
			
 
				-    struct starpu_codelet * func_cl=(struct starpu_codelet *) task->cl;
			
 
				-    if (func_cl->model != NULL){
			
 
				-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
			
 
				-      PyObject* perfmodel=PyCapsule_New(perf, "Perf", 0);
			
 
				-      Py_DECREF(perfmodel);
			
 
				-    }
			
 
				-
			
 
				-    for(int i = 0; i < PyTuple_Size(cst->argList); i++){
			
 
				-        Py_DECREF(PyTuple_GetItem(cst->argList, i));
			
 
				-    }
			
 
				-    Py_DECREF(cst->argList);
			
 
				-
			
 
				-    /*restore previous GIL state*/
			
 
				-    PyGILState_Release(state);
			
 
				-
			
 
				-    /*deallocate task*/
			
 
				-    free(task->cl);
			
 
				-	  free(task->cl_arg);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/***********************************************************************************/
			
 
				-/*PyObject*->struct starpu_task**/
			
 
				-static struct starpu_task *PyTask_AsTask(PyObject* obj){
			
 
				-  return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
			
 
				-}
			
 
				-
			
 
				-/* destructor function for task */
			
 
				-static void del_Task(PyObject *obj) {
			
 
				-  struct starpu_task* obj_task=PyTask_AsTask(obj);
			
 
				-  obj_task->destroy=1; /*XXX we should call starpu task destroy*/
			
 
				-}
			
 
				-
			
 
				-/*struct starpu_task*->PyObject**/
			
 
				-static PyObject *PyTask_FromTask(struct starpu_task *task) {
			
 
				-  return PyCapsule_New(task, "Task", del_Task);
			
 
				-}
			
 
				-
			
 
				-/***********************************************************************************/
			
 
				-static size_t sizebase (struct starpu_task * task, unsigned nimpl){
			
 
				-
			
 
				-  codelet_st* cst = (codelet_st*) task->cl_arg;
			
 
				-
			
 
				-  PyObject* obj=PyTuple_GetItem(cst->argList, 0);
			
 
				-  /*get the length of arguments*/
			
 
				-  int n = PyList_Size(obj);
			
 
				-
			
 
				-  return n;
			
 
				-}
			
 
				-
			
 
				-static void del_Perf(PyObject *obj){
			
 
				-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
			
 
				-  free(perf);
			
 
				-}
			
 
				-/*initialization of perfmodel*/
			
 
				-static PyObject* init_perfmodel(PyObject *self, PyObject *args){
			
 
				-
			
 
				-  char* sym;
			
 
				-
			
 
				-  if (!PyArg_ParseTuple(args, "s", &sym))
			
 
				-    return NULL;
			
 
				-
			
 
				-  /*allocate a perfmodel structure*/
			
 
				-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
			
 
				-
			
 
				-  /*get the perfmodel symbol*/
			
 
				-  char* p =strdup(sym);
			
 
				-  perf->symbol=p;
			
 
				-  perf->type=STARPU_HISTORY_BASED;
			
 
				-
			
 
				-  /*struct perfmodel*->PyObject**/
			
 
				-  PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
			
 
				-
			
 
				-  return perfmodel;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*free perfmodel*/
			
 
				-static PyObject* free_perfmodel(PyObject *self, PyObject *args){
			
 
				-
			
 
				-  PyObject* perfmodel;
			
 
				-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
			
 
				-    return NULL;
			
 
				-
			
 
				-  /*PyObject*->struct perfmodel**/
			
 
				-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				-
			
 
				-  starpu_save_history_based_model(perf);
			
 
				-  //starpu_perfmodel_unload_model(perf);
			
 
				-  free(perf->symbol);
			
 
				-  starpu_perfmodel_deinit(perf);
			
 
				-  free(perf);
			
 
				-
			
 
				-  /*return type is void*/
			
 
				-  Py_INCREF(Py_None);
			
 
				-  return Py_None;
			
 
				-}
			
 
				-
			
 
				-static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-  PyObject* perfmodel;
			
 
				-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
			
 
				-    return NULL;
			
 
				-
			
 
				-  /*PyObject*->struct perfmodel**/
			
 
				-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				-
			
 
				-  starpu_save_history_based_model(perf);
			
 
				-
			
 
				-  /*return type is void*/
			
 
				-  Py_INCREF(Py_None);
			
 
				-  return Py_None;
			
 
				-}
			
 
				-
			
 
				-/*****************************Wrappers of StarPU methods****************************/
			
 
				-/*wrapper submit method*/
			
 
				-static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-    /*get the running Event loop*/
			
 
				-    PyObject* loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
			
 
				-    /*create a asyncio.Future object*/
			
 
				-    PyObject* fut = PyObject_CallMethod(loop, "create_future", NULL);
			
 
				-
			
 
				-    /*first argument in args is always the python function passed in*/
			
 
				-    PyObject* func_py = PyTuple_GetItem(args, 0);
			
 
				-    Py_INCREF(func_py);
			
 
				-
			
 
				-	  /*allocate a task structure and initialize it with default values*/
			
 
				-    struct starpu_task *task=starpu_task_create();
			
 
				-    task->destroy=0;
			
 
				-
			
 
				-    PyObject* PyTask=PyTask_FromTask(task);
			
 
				-
			
 
				-    /*set one of fut attribute to the task pointer*/
			
 
				-    PyObject_SetAttrString(fut, "starpu_task", PyTask);
			
 
				-    /*check the arguments of python function passed in*/
			
 
				-    for (int i=1; i < PyTuple_Size(args); i++){
			
 
				-      PyObject* obj=PyTuple_GetItem(args, i);
			
 
				-      const char* tp = Py_TYPE(obj)->tp_name;
			
 
				-      if(strcmp(tp, "_asyncio.Future") == 0){
			
 
				-        /*if one of arguments is Future, get its corresponding task*/
			
 
				-        PyObject* fut_task=PyObject_GetAttrString(obj, "starpu_task");
			
 
				-        /*declare task dependencies between the current task and the corresponding task of Future argument*/
			
 
				-        starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
			
 
				-
			
 
				-        Py_DECREF(fut_task);
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    /*allocate a codelet structure*/
			
 
				-    struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
			
 
				-    /*initialize func_cl with default values*/
			
 
				-    starpu_codelet_init(func_cl);
			
 
				-    func_cl->cpu_func=&codelet_func;
			
 
				-    
			
 
				-    /*check whether the last argument in args is the perfmodel*/
			
 
				-    PyObject* perfmodel=PyTuple_GetItem(args, PyTuple_Size(args)-1);
			
 
				-    const char* tp_perf = Py_TYPE(perfmodel)->tp_name;
			
 
				-    if (strcmp(tp_perf, "PyCapsule")==0){
			
 
				-      /*PyObject*->struct perfmodel**/
			
 
				-      struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				-      func_cl->model=perf;
			
 
				-      Py_INCREF(perfmodel);
			
 
				-    }
			
 
				-    
			
 
				-
			
 
				-    /*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
			
 
				-    codelet_st *cst = (codelet_st*)malloc(sizeof(codelet_st));
			
 
				-    cst->f = func_py;
			
 
				-    cst->fut = fut;
			
 
				-    cst->lp = loop;
			
 
				-    
			
 
				-    Py_INCREF(fut);
			
 
				-    Py_INCREF(loop);
			
 
				-
			
 
				-    /*pass args in argList*/
			
 
				-    if (PyTuple_Size(args)==1 || (PyTuple_Size(args)==2 && strcmp(tp_perf, "PyCapsule")==0))/*function no arguments*/
			
 
				-      cst->argList = PyTuple_New(0);
			
 
				-    else if(PyTuple_Size(args)>2 && strcmp(tp_perf, "PyCapsule")==0){/*function has arguments and the last argument in args is the perfmodel*/
			
 
				-      cst->argList = PyTuple_New(PyTuple_Size(args)-2);
			
 
				-      for (int i=0; i < PyTuple_Size(args)-2; i++){
			
 
				-        PyObject* tmp=PyTuple_GetItem(args, i+1);
			
 
				-        PyTuple_SetItem(cst->argList, i, tmp);
			
 
				-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
			
 
				-      }
			
 
				-    }
			
 
				-    else{/*function has arguments and no perfmodel*/
			
 
				-      cst->argList = PyTuple_New(PyTuple_Size(args)-1);
			
 
				-      for (int i=0; i < PyTuple_Size(args)-1; i++){
			
 
				-        PyObject* tmp=PyTuple_GetItem(args, i+1);
			
 
				-        PyTuple_SetItem(cst->argList, i, tmp);
			
 
				-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    task->cl=func_cl;
			
 
				-    task->cl_arg=cst;
			
 
				-    /*call starpu_task_submit method*/
			
 
				-    starpu_task_submit(task);
			
 
				-    task->callback_func=&cb_func;
			
 
				-    if (strcmp(tp_perf, "PyCapsule")==0){
			
 
				-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
			
 
				-      perf->size_base=&sizebase;
			
 
				-    }
			
 
				-
			
 
				-    //printf("the number of reference is %ld\n", Py_REFCNT(func_py));
			
 
				-    //_Py_PrintReferences(stderr);
			
 
				-    //COUNTREFS();
			
 
				-    return fut;
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/*wrapper wait for all method*/
			
 
				-static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-	/*call starpu_task_wait_for_all method*/
			
 
				-	Py_BEGIN_ALLOW_THREADS
			
 
				-	starpu_task_wait_for_all();
			
 
				-	Py_END_ALLOW_THREADS
			
 
				-
			
 
				-	/*return type is void*/
			
 
				-	Py_INCREF(Py_None);
			
 
				-  return Py_None;
			
 
				-}
			
 
				-
			
 
				-/*wrapper pause method*/
			
 
				-static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-	/*call starpu_pause method*/
			
 
				-	starpu_pause();
			
 
				-
			
 
				-	/*return type is void*/
			
 
				-	Py_INCREF(Py_None);
			
 
				-  return Py_None;
			
 
				-}
			
 
				-
			
 
				-/*wrapper resume method*/
			
 
				-static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-	/*call starpu_resume method*/
			
 
				-	starpu_resume();
			
 
				-
			
 
				-	/*return type is void*/
			
 
				-	Py_INCREF(Py_None);
			
 
				-  return Py_None;
			
 
				-}
			
 
				-
			
 
				-/*wrapper get count cpu method*/
			
 
				-static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args){
			
 
				-
			
 
				-  /*call starpu_cpu_worker_get_count method*/
			
 
				-  int num_cpu=starpu_cpu_worker_get_count();
			
 
				-
			
 
				-  /*return type is unsigned*/
			
 
				-  return Py_BuildValue("I", num_cpu);
			
 
				-}
			
 
				-
			
 
				-/***********************************************************************************/
			
 
				-
			
 
				-/***************The module’s method table and initialization function**************/
			
 
				-/*method table*/
			
 
				-static PyMethodDef starpupyMethods[] = 
			
 
				-{ 
			
 
				-  {"task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
			
 
				-  {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
			
 
				-  {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
			
 
				-  {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
			
 
				-  {"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
			
 
				-  {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
			
 
				-  {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
			
 
				-  {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
			
 
				-  {NULL, NULL}
			
 
				-};
			
 
				-
			
 
				-/*deallocation function*/
			
 
				-static void starpupyFree(void* self){
			
 
				-	starpu_shutdown();
			
 
				-  Py_DECREF(asyncio_module);
			
 
				-  //COUNTREFS();
			
 
				-}
			
 
				-
			
 
				-/*module definition structure*/
			
 
				-static struct PyModuleDef starpupymodule={
			
 
				-  PyModuleDef_HEAD_INIT,
			
 
				-  "starpupy", /*name of module*/
			
 
				-  NULL,
			
 
				-  -1,
			
 
				-  starpupyMethods, /*method table*/
			
 
				-  NULL,
			
 
				-  NULL,
			
 
				-  NULL,
			
 
				-  starpupyFree /*deallocation function*/
			
 
				-};
			
 
				-
			
 
				-/*initialization function*/
			
 
				-PyMODINIT_FUNC
			
 
				-PyInit_starpupy(void)
			
 
				-{
			
 
				-    PyEval_InitThreads();
			
 
				-    /*starpu initialization*/
			
 
				-	  starpu_init(NULL);
			
 
				-    /*python asysncio import*/
			
 
				-    asyncio_module = PyImport_ImportModule("asyncio");
			
 
				-    /*module import initialization*/
			
 
				-    return PyModule_Create(&starpupymodule);
			
 
				-}
			
 
				-/***********************************************************************************/
			
--- a/starpupy/src/starpu_task_wrapper.c
+++ b/starpupy/src/starpu_task_wrapper.c
@@ -0,0 +1,536 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#undef NDEBUG
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#define PY_SSIZE_T_CLEAN
			
 
				+#include <Python.h>
			
 
				+
			
 
				+#ifdef STARPU_PYTHON_HAVE_NUMPY
			
 
				+#include <numpy/arrayobject.h>
			
 
				+#endif
			
 
				+
			
 
				+/*macro*/
			
 
				+#if defined(Py_DEBUG) || defined(DEBUG)
			
 
				+extern void _Py_CountReferences(FILE*);
			
 
				+#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
			
 
				+#else
			
 
				+#define CURIOUS(x)
			
 
				+#endif
			
 
				+#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
			
 
				+#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
			
 
				+#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
			
 
				+#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
			
 
				+/*******/
			
 
				+
			
 
				+/*********************Functions passed in task_submit wrapper***********************/
			
 
				+
			
 
				+static PyObject *asyncio_module; /*python asyncio library*/
			
 
				+
			
 
				+/*structure contains parameters which are passed to starpu_task.cl_arg*/
			
 
				+struct codelet_args
			
 
				+{
			
 
				+	PyObject *f; /*the python function passed in*/
			
 
				+	PyObject *argList; /*argument list of python function passed in*/
			
 
				+	PyObject *rv; /*return value when using PyObject_CallObject call the function f*/
			
 
				+	PyObject *fut; /*asyncio.Future*/
			
 
				+	PyObject *lp; /*asyncio.Eventloop*/
			
 
				+};
			
 
				+
			
 
				+/*function passed to starpu_codelet.cpu_func*/
			
 
				+void codelet_func(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	struct codelet_args *cst = (struct codelet_args*) cl_arg;
			
 
				+
			
 
				+	/*make sure we own the GIL*/
			
 
				+	PyGILState_STATE state = PyGILState_Ensure();
			
 
				+
			
 
				+	/*verify that the function is a proper callable*/
			
 
				+	if (!PyCallable_Check(cst->f))
			
 
				+	{
			
 
				+		printf("py_callback: expected a callable function\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+
			
 
				+	/*check the arguments of python function passed in*/
			
 
				+	int i;
			
 
				+	for(i=0; i < PyTuple_Size(cst->argList); i++)
			
 
				+	{
			
 
				+		PyObject *obj = PyTuple_GetItem(cst->argList, i);
			
 
				+		const char *tp = Py_TYPE(obj)->tp_name;
			
 
				+		if(strcmp(tp, "_asyncio.Future") == 0)
			
 
				+		{
			
 
				+			/*if one of arguments is Future, get its result*/
			
 
				+			PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL);
			
 
				+			/*replace the Future argument to its result*/
			
 
				+			PyTuple_SetItem(cst->argList, i, fut_result);
			
 
				+		}
			
 
				+		/*else if (strcmp(tp, "numpy.ndarray")==0)
			
 
				+		  {
			
 
				+		  printf("array is %p\n", obj);
			
 
				+		  }*/
			
 
				+	}
			
 
				+
			
 
				+	/*call the python function*/
			
 
				+	PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
			
 
				+	//const char *tp = Py_TYPE(pRetVal)->tp_name;
			
 
				+	//printf("return value type is %s\n", tp);
			
 
				+	cst->rv = pRetVal;
			
 
				+
			
 
				+	//Py_DECREF(cst->f);
			
 
				+
			
 
				+	/*restore previous GIL state*/
			
 
				+	PyGILState_Release(state);
			
 
				+}
			
 
				+
			
 
				+/*function passed to starpu_task.callback_func*/
			
 
				+void cb_func(void *v)
			
 
				+{
			
 
				+	struct starpu_task *task = starpu_task_get_current();
			
 
				+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
			
 
				+
			
 
				+	/*make sure we own the GIL*/
			
 
				+	PyGILState_STATE state = PyGILState_Ensure();
			
 
				+
			
 
				+	/*set the Future result and mark the Future as done*/
			
 
				+	PyObject *set_result = PyObject_GetAttrString(cst->fut, "set_result");
			
 
				+	PyObject *loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
			
 
				+
			
 
				+	Py_DECREF(loop_callback);
			
 
				+	Py_DECREF(set_result);
			
 
				+	Py_DECREF(cst->rv);
			
 
				+	Py_DECREF(cst->fut);
			
 
				+	Py_DECREF(cst->lp);
			
 
				+	Py_DECREF(cst->argList);
			
 
				+
			
 
				+	//Py_DECREF(perfmodel);
			
 
				+	struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl;
			
 
				+	if (func_cl->model != NULL)
			
 
				+	{
			
 
				+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
			
 
				+		PyObject *perfmodel=PyCapsule_New(perf, "Perf", 0);
			
 
				+		Py_DECREF(perfmodel);
			
 
				+	}
			
 
				+
			
 
				+	/*restore previous GIL state*/
			
 
				+	PyGILState_Release(state);
			
 
				+
			
 
				+	/*deallocate task*/
			
 
				+	free(task->cl);
			
 
				+	free(task->cl_arg);
			
 
				+}
			
 
				+
			
 
				+/***********************************************************************************/
			
 
				+/*PyObject*->struct starpu_task**/
			
 
				+static struct starpu_task *PyTask_AsTask(PyObject *obj)
			
 
				+{
			
 
				+	return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
			
 
				+}
			
 
				+
			
 
				+/* destructor function for task */
			
 
				+static void del_Task(PyObject *obj)
			
 
				+{
			
 
				+	struct starpu_task *obj_task=PyTask_AsTask(obj);
			
 
				+	obj_task->destroy=1; /*XXX we should call starpu task destroy*/
			
 
				+}
			
 
				+
			
 
				+/*struct starpu_task*->PyObject**/
			
 
				+static PyObject *PyTask_FromTask(struct starpu_task *task)
			
 
				+{
			
 
				+	return PyCapsule_New(task, "Task", del_Task);
			
 
				+}
			
 
				+
			
 
				+/***********************************************************************************/
			
 
				+static size_t sizebase (struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	int n=0;
			
 
				+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
			
 
				+
			
 
				+	/*get the result of function*/
			
 
				+	PyObject *obj=cst->rv;
			
 
				+	/*get the length of result*/
			
 
				+	const char *tp = Py_TYPE(obj)->tp_name;
			
 
				+#ifdef STARPU_PYTHON_HAVE_NUMPY
			
 
				+	/*if the result is a numpy array*/
			
 
				+	if (strcmp(tp, "numpy.ndarray")==0)
			
 
				+		n = PyArray_SIZE(obj);
			
 
				+	else
			
 
				+#endif
			
 
				+	/*if the result is a list*/
			
 
				+	if (strcmp(tp, "list")==0)
			
 
				+		n = PyList_Size(obj);
			
 
				+	/*else error*/
			
 
				+	else
			
 
				+	{
			
 
				+		printf("starpu_perfmodel::size_base: the type of function result is unrecognized\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				+static void del_Perf(PyObject *obj)
			
 
				+{
			
 
				+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
			
 
				+	free(perf);
			
 
				+}
			
 
				+
			
 
				+/*initialization of perfmodel*/
			
 
				+static PyObject* init_perfmodel(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	char *sym;
			
 
				+
			
 
				+	if (!PyArg_ParseTuple(args, "s", &sym))
			
 
				+		return NULL;
			
 
				+
			
 
				+	/*allocate a perfmodel structure*/
			
 
				+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
			
 
				+
			
 
				+	/*get the perfmodel symbol*/
			
 
				+	char *p =strdup(sym);
			
 
				+	perf->symbol=p;
			
 
				+	perf->type=STARPU_HISTORY_BASED;
			
 
				+
			
 
				+	/*struct perfmodel*->PyObject**/
			
 
				+	PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
			
 
				+
			
 
				+	return perfmodel;
			
 
				+}
			
 
				+
			
 
				+/*free perfmodel*/
			
 
				+static PyObject* free_perfmodel(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	PyObject *perfmodel;
			
 
				+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
			
 
				+		return NULL;
			
 
				+
			
 
				+	/*PyObject*->struct perfmodel**/
			
 
				+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				+
			
 
				+	starpu_save_history_based_model(perf);
			
 
				+	//starpu_perfmodel_unload_model(perf);
			
 
				+	//free(perf->symbol);
			
 
				+	starpu_perfmodel_deinit(perf);
			
 
				+	free(perf);
			
 
				+
			
 
				+	/*return type is void*/
			
 
				+	Py_INCREF(Py_None);
			
 
				+	return Py_None;
			
 
				+}
			
 
				+
			
 
				+static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	PyObject *perfmodel;
			
 
				+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
			
 
				+		return NULL;
			
 
				+
			
 
				+	/*PyObject*->struct perfmodel**/
			
 
				+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				+
			
 
				+	starpu_save_history_based_model(perf);
			
 
				+
			
 
				+	/*return type is void*/
			
 
				+	Py_INCREF(Py_None);
			
 
				+	return Py_None;
			
 
				+}
			
 
				+
			
 
				+/*****************************Wrappers of StarPU methods****************************/
			
 
				+/*wrapper submit method*/
			
 
				+static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*get the running Event loop*/
			
 
				+	PyObject *loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
			
 
				+	/*create a asyncio.Future object*/
			
 
				+	PyObject *fut = PyObject_CallMethod(loop, "create_future", NULL);
			
 
				+
			
 
				+	/*first argument in args is always the python function passed in*/
			
 
				+	PyObject *func_py = PyTuple_GetItem(args, 0);
			
 
				+	Py_INCREF(func_py);
			
 
				+
			
 
				+	/*allocate a task structure and initialize it with default values*/
			
 
				+	struct starpu_task *task=starpu_task_create();
			
 
				+	task->destroy=0;
			
 
				+
			
 
				+	PyObject *PyTask=PyTask_FromTask(task);
			
 
				+
			
 
				+	/*set one of fut attribute to the task pointer*/
			
 
				+	PyObject_SetAttrString(fut, "starpu_task", PyTask);
			
 
				+	/*check the arguments of python function passed in*/
			
 
				+	int i;
			
 
				+	for(i=1; i < PyTuple_Size(args)-1; i++)
			
 
				+	{
			
 
				+		PyObject *obj=PyTuple_GetItem(args, i);
			
 
				+		const char* tp = Py_TYPE(obj)->tp_name;
			
 
				+		if(strcmp(tp, "_asyncio.Future") == 0)
			
 
				+		{
			
 
				+			/*if one of arguments is Future, get its corresponding task*/
			
 
				+			PyObject *fut_task=PyObject_GetAttrString(obj, "starpu_task");
			
 
				+			/*declare task dependencies between the current task and the corresponding task of Future argument*/
			
 
				+			starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
			
 
				+
			
 
				+			Py_DECREF(fut_task);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*allocate a codelet structure*/
			
 
				+	struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
			
 
				+	/*initialize func_cl with default values*/
			
 
				+	starpu_codelet_init(func_cl);
			
 
				+	func_cl->cpu_funcs[0]=&codelet_func;
			
 
				+	func_cl->cpu_funcs_name[0]="codelet_func";
			
 
				+
			
 
				+	/*check whether the option perfmodel is None*/
			
 
				+	PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1);/*the last argument is the option dictionary*/
			
 
				+	PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel");
			
 
				+	const char *tp_perf = Py_TYPE(perfmodel)->tp_name;
			
 
				+	if (strcmp(tp_perf, "PyCapsule")==0)
			
 
				+	{
			
 
				+		/*PyObject*->struct perfmodel**/
			
 
				+		struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
			
 
				+		func_cl->model=perf;
			
 
				+		Py_INCREF(perfmodel);
			
 
				+	}
			
 
				+
			
 
				+	/*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
			
 
				+	struct codelet_args *cst = (struct codelet_args*)malloc(sizeof(struct codelet_args));
			
 
				+	cst->f = func_py;
			
 
				+	cst->fut = fut;
			
 
				+	cst->lp = loop;
			
 
				+
			
 
				+	Py_INCREF(fut);
			
 
				+	Py_INCREF(loop);
			
 
				+
			
 
				+	/*pass args in argList*/
			
 
				+	if (PyTuple_Size(args)==2)/*function no arguments*/
			
 
				+		cst->argList = PyTuple_New(0);
			
 
				+	else
			
 
				+	{/*function has arguments*/
			
 
				+		cst->argList = PyTuple_New(PyTuple_Size(args)-2);
			
 
				+		int i;
			
 
				+		for(i=0; i < PyTuple_Size(args)-2; i++)
			
 
				+		{
			
 
				+			PyObject *tmp=PyTuple_GetItem(args, i+1);
			
 
				+			PyTuple_SetItem(cst->argList, i, tmp);
			
 
				+			Py_INCREF(PyTuple_GetItem(cst->argList, i));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	task->cl=func_cl;
			
 
				+	task->cl_arg=cst;
			
 
				+
			
 
				+	/*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None*/
			
 
				+	/*const char * name*/
			
 
				+	PyObject *PyName = PyDict_GetItemString(dict_option, "name");
			
 
				+	const char *name_type = Py_TYPE(PyName)->tp_name;
			
 
				+	if (strcmp(name_type, "NoneType")!=0)
			
 
				+	{
			
 
				+		PyObject *pStrObj = PyUnicode_AsUTF8String(PyName);
			
 
				+		char* name_str = PyBytes_AsString(pStrObj);
			
 
				+		char* name = strdup(name_str);
			
 
				+		//printf("name is %s\n", name);
			
 
				+		task->name=name;
			
 
				+		Py_DECREF(pStrObj);
			
 
				+	}
			
 
				+
			
 
				+	/*unsigned synchronous:1*/
			
 
				+	PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous");
			
 
				+	unsigned sync=PyLong_AsUnsignedLong(PySync);
			
 
				+	//printf("sync is %u\n", sync);
			
 
				+	task->synchronous=sync;
			
 
				+
			
 
				+	/*int priority*/
			
 
				+	PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority");
			
 
				+	int prio=PyLong_AsLong(PyPrio);
			
 
				+	//printf("prio is %d\n", prio);
			
 
				+	task->priority=prio;
			
 
				+
			
 
				+	/*unsigned color*/
			
 
				+	PyObject *PyColor = PyDict_GetItemString(dict_option, "color");
			
 
				+	const char *color_type = Py_TYPE(PyColor)->tp_name;
			
 
				+	if (strcmp(color_type, "NoneType")!=0)
			
 
				+	{
			
 
				+		unsigned color=PyLong_AsUnsignedLong(PyColor);
			
 
				+		//printf("color is %u\n", color);
			
 
				+		task->color=color;
			
 
				+	}
			
 
				+
			
 
				+	/*double flops*/
			
 
				+	PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops");
			
 
				+	const char *flops_type = Py_TYPE(PyFlops)->tp_name;
			
 
				+	if (strcmp(flops_type, "NoneType")!=0)
			
 
				+	{
			
 
				+		double flops=PyFloat_AsDouble(PyFlops);
			
 
				+		//printf("flops is %f\n", flop);
			
 
				+		task->flops=flops;
			
 
				+	}
			
 
				+
			
 
				+	task->callback_func=&cb_func;
			
 
				+
			
 
				+	/*call starpu_task_submit method*/
			
 
				+	Py_BEGIN_ALLOW_THREADS
			
 
				+		int ret = starpu_task_submit(task);
			
 
				+		assert(ret==0);
			
 
				+	Py_END_ALLOW_THREADS
			
 
				+
			
 
				+	if (strcmp(tp_perf, "PyCapsule")==0)
			
 
				+	{
			
 
				+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
			
 
				+		perf->size_base=&sizebase;
			
 
				+	}
			
 
				+
			
 
				+	//printf("the number of reference is %ld\n", Py_REFCNT(func_py));
			
 
				+	//_Py_PrintReferences(stderr);
			
 
				+	//COUNTREFS();
			
 
				+	return fut;
			
 
				+}
			
 
				+
			
 
				+/*wrapper wait for all method*/
			
 
				+static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_task_wait_for_all method*/
			
 
				+	Py_BEGIN_ALLOW_THREADS
			
 
				+		starpu_task_wait_for_all();
			
 
				+	Py_END_ALLOW_THREADS
			
 
				+
			
 
				+	/*return type is void*/
			
 
				+	Py_INCREF(Py_None);
			
 
				+	return Py_None;
			
 
				+}
			
 
				+
			
 
				+/*wrapper pause method*/
			
 
				+static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_pause method*/
			
 
				+	starpu_pause();
			
 
				+
			
 
				+	/*return type is void*/
			
 
				+	Py_INCREF(Py_None);
			
 
				+	return Py_None;
			
 
				+}
			
 
				+
			
 
				+/*wrapper resume method*/
			
 
				+static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_resume method*/
			
 
				+	starpu_resume();
			
 
				+
			
 
				+	/*return type is void*/
			
 
				+	Py_INCREF(Py_None);
			
 
				+	return Py_None;
			
 
				+}
			
 
				+
			
 
				+/*wrapper get count cpu method*/
			
 
				+static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_cpu_worker_get_count method*/
			
 
				+	int num_cpu=starpu_cpu_worker_get_count();
			
 
				+
			
 
				+	/*return type is unsigned*/
			
 
				+	return Py_BuildValue("I", num_cpu);
			
 
				+}
			
 
				+
			
 
				+/*wrapper get min priority method*/
			
 
				+static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_sched_get_min_priority*/
			
 
				+	int min_prio=starpu_sched_get_min_priority();
			
 
				+
			
 
				+	/*return type is int*/
			
 
				+	return Py_BuildValue("i", min_prio);
			
 
				+}
			
 
				+
			
 
				+/*wrapper get max priority method*/
			
 
				+static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_sched_get_max_priority*/
			
 
				+	int max_prio=starpu_sched_get_max_priority();
			
 
				+
			
 
				+	/*return type is int*/
			
 
				+	return Py_BuildValue("i", max_prio);
			
 
				+}
			
 
				+
			
 
				+/*wrapper get the number of no completed submitted tasks method*/
			
 
				+static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+	/*call starpu_task_nsubmitted*/
			
 
				+	int num_task=starpu_task_nsubmitted();
			
 
				+
			
 
				+	/*Return the number of submitted tasks which have not completed yet */
			
 
				+	return Py_BuildValue("i", num_task);
			
 
				+}
			
 
				+/***********************************************************************************/
			
 
				+
			
 
				+/***************The module’s method table and initialization function**************/
			
 
				+/*method table*/
			
 
				+static PyMethodDef starpupyMethods[] =
			
 
				+{
			
 
				+	{"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
			
 
				+	{"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
			
 
				+	{"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
			
 
				+	{"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
			
 
				+	{"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
			
 
				+	{"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
			
 
				+	{"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
			
 
				+	{"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
			
 
				+	{"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/
			
 
				+	{"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/
			
 
				+	{"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/
			
 
				+	{NULL, NULL}
			
 
				+};
			
 
				+
			
 
				+/*deallocation function*/
			
 
				+static void starpupyFree(void *self)
			
 
				+{
			
 
				+	starpu_shutdown();
			
 
				+	Py_DECREF(asyncio_module);
			
 
				+	//COUNTREFS();
			
 
				+}
			
 
				+
			
 
				+/*module definition structure*/
			
 
				+static struct PyModuleDef starpupymodule =
			
 
				+{
			
 
				+	PyModuleDef_HEAD_INIT,
			
 
				+	"starpupy", /*name of module*/
			
 
				+	NULL,
			
 
				+	-1,
			
 
				+	starpupyMethods, /*method table*/
			
 
				+	NULL,
			
 
				+	NULL,
			
 
				+	NULL,
			
 
				+	starpupyFree /*deallocation function*/
			
 
				+};
			
 
				+
			
 
				+/*initialization function*/
			
 
				+PyMODINIT_FUNC
			
 
				+PyInit_starpupy(void)
			
 
				+{
			
 
				+	PyEval_InitThreads();
			
 
				+	/*starpu initialization*/
			
 
				+	int ret = starpu_init(NULL);
			
 
				+	assert(ret==0);
			
 
				+	/*python asysncio import*/
			
 
				+	asyncio_module = PyImport_ImportModule("asyncio");
			
 
				+#ifdef STARPU_PYTHON_HAVE_NUMPY
			
 
				+	/*numpy import array*/
			
 
				+	import_array();
			
 
				+#endif
			
 
				+	/*module import initialization*/
			
 
				+	return PyModule_Create(&starpupymodule);
			
 
				+}
			
 
				+/***********************************************************************************/
			
--- a/starpupy/tests/Makefile
+++ b/starpupy/tests/Makefile
@@ -1,6 +0,0 @@
 
				-PYTHON ?= python3
			
 
				-
			
 
				-all:
			
 
				-	PYTHONPATH=../src $(PYTHON) starpu_py.py
			
 
				-	PYTHONPATH=../src STARPU_CALIBRATE=1 $(PYTHON) starpu_py_parallel.py
			
 
				-
			
--- a/starpupy/tests/starpu_py_parallel.py
+++ b/starpupy/tests/starpu_py_parallel.py
@@ -1,101 +0,0 @@
 
				-# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				-#
			
 
				-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				-#
			
 
				-# StarPU is free software; you can redistribute it and/or modify
			
 
				-# it under the terms of the GNU Lesser General Public License as published by
			
 
				-# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				-# your option) any later version.
			
 
				-#
			
 
				-# StarPU is distributed in the hope that it will be useful, but
			
 
				-# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-#
			
 
				-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				-#
			
 
				-import starpu
			
 
				-import time
			
 
				-import asyncio
			
 
				-from math import sqrt
			
 
				-from math import log10
			
 
				-
			
 
				-#generate a list to store functions
			
 
				-g_func=[]
			
 
				-
			
 
				-#function no input no output print hello world
			
 
				-def hello():
			
 
				-	print ("Example 1: Hello, world!")
			
 
				-g_func.append(starpu.joblib.delayed(hello)())
			
 
				-
			
 
				-#function no input no output
			
 
				-def func1():
			
 
				-	print ("Example 2: This is a function no input no output")
			
 
				-g_func.append(starpu.joblib.delayed(func1)())
			
 
				-
			
 
				-#function no input return a value
			
 
				-def func2():
			
 
				-	print ("Example 3:")
			
 
				-	return 12
			
 
				-g_func.append(starpu.joblib.delayed(func2)())
			
 
				- 
			
 
				-#function has 2 int inputs and 1 int output
			
 
				-def multi(a,b):
			
 
				-	res_multi=a*b
			
 
				-	print("Example 4: The result of ",a,"*",b,"is",res_multi)
			
 
				-	return res_multi
			
 
				-g_func.append(starpu.joblib.delayed(multi)(2, 3))
			
 
				-
			
 
				-#function has 4 float inputs and 1 float output
			
 
				-def add(a,b,c,d):
			
 
				-	res_add=a+b+c+d
			
 
				-	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
			
 
				-	return res_add
			
 
				-g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
			
 
				-
			
 
				-#function has 2 int inputs 1 float input and 1 float output 1 int output
			
 
				-def sub(a,b,c):
			
 
				-	res_sub1=a-b-c
			
 
				-	res_sub2=a-b
			
 
				-	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
			
 
				-	return res_sub1, res_sub2
			
 
				-g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
			
 
				-
			
 
				-#the size of generator
			
 
				-N=1000000
			
 
				-
			
 
				-print("************************")
			
 
				-print("parallel Normal version:")
			
 
				-print("************************")
			
 
				-print("--input is iterable argument list, example 1")
			
 
				-starpu.joblib.parallel(mode="normal", n_jobs=-2, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
			
 
				-
			
 
				-print("--input is iterable argument list, example 2")
			
 
				-starpu.joblib.parallel(mode="normal", n_jobs=2, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
			
 
				-
			
 
				-print("--input is iterable function list")
			
 
				-starpu.joblib.parallel(mode="normal", n_jobs=3, perfmodel="third")(g_func)
			
 
				-
			
 
				-
			
 
				-print("************************")
			
 
				-print("parallel Future version:")
			
 
				-print("************************")
			
 
				-async def main():
			
 
				-	print("--input is iterable argument list, example 1")
			
 
				-	fut1=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
			
 
				-	res1=await fut1
			
 
				-	#print(res1)
			
 
				-
			
 
				-	print("--input is iterable argument list, example 2")
			
 
				-	fut2=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
			
 
				-	res2=await fut2
			
 
				-	#print(res2)
			
 
				-
			
 
				-	print("--input is iterable function list")
			
 
				-	fut3=starpu.joblib.parallel(mode="future", n_jobs=2, perfmodel="third")(g_func)
			
 
				-	res3=await fut3
			
 
				-	#print(res3)
			
 
				-asyncio.run(main())
			
 
				-
			
 
				-starpu.joblib.perfmodel_plot(perfmodel="first")
			
 
				-starpu.joblib.perfmodel_plot(perfmodel="second")
			
 
				-starpu.joblib.perfmodel_plot(perfmodel="third")
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -17,8 +17,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += -Wno-unused
			
 
				 AM_CXXFLAGS += -Wno-unused
			
 
				-AM_FFLAGS += -Wno-unused
			
 
				-AM_FCFLAGS += -Wno-unused
			
 
				+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
			
 
				+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
			
--- a/tests/datawizard/bcsr.c
+++ b/tests/datawizard/bcsr.c
@@ -123,7 +123,8 @@ int main(int argc, char **argv)
 
				 	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV)
			
 
				 		return STARPU_TEST_SKIPPED;
			
 
				 
			
 
				-	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) {
			
 
				+	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1)
			
 
				+	{
			
 
				 		starpu_shutdown();
			
 
				 		return STARPU_TEST_SKIPPED;
			
 
				 	}
			
--- a/tests/datawizard/interfaces/test_interfaces.c
+++ b/tests/datawizard/interfaces/test_interfaces.c
@@ -16,8 +16,7 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 
			
 
				-/* XXX Why cant we dereference a handle without this one ? */
			
 
				-#include <core/sched_policy.h>
			
 
				+#include <datawizard/coherency.h>
			
 
				 
			
 
				 #include <assert.h>
			
 
				 
			
--- a/tests/main/starpu_worker_exists.c
+++ b/tests/main/starpu_worker_exists.c
@@ -14,6 +14,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+#define BUILDING_STARPU
			
 
				 #include <starpu.h>
			
 
				 #include "core/workers.h"
			
 
				 #include "../helper.h"
			
--- a/tests/microbenchs/bandwidth.c
+++ b/tests/microbenchs/bandwidth.c
@@ -170,7 +170,8 @@ static unsigned interleave(unsigned i)
 
				 		return 0;
			
 
				 }
			
 
				 
			
 
				-enum sleep_type {
			
 
				+enum sleep_type
			
 
				+{
			
 
				 	PAUSE,
			
 
				 	NOP,
			
 
				 	SYNC,
			
--- a/tests/microbenchs/tasks_size_overhead.c
+++ b/tests/microbenchs/tasks_size_overhead.c
@@ -228,6 +228,8 @@ int main(int argc, char **argv)
 
				 		goto error;
			
 
				 	}
			
 
				 
			
 
				+	if (mincpus <= 0)
			
 
				+		mincpus = 1;
			
 
				 	/* For each number of cpus, benchmark */
			
 
				 	for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep)
			
 
				 	{
			
--- a/tests/perfmodels/regression_based_memset.c
+++ b/tests/perfmodels/regression_based_memset.c
@@ -213,7 +213,7 @@ static int bench_energy(int workerid, int where, enum starpu_worker_archtype arc
 
				 		if ( (retval = starpu_energy_start(workerid, archtype)) != 0)
			
 
				 		{
			
 
				 			starpu_data_unregister(handle);
			
 
				-			_STARPU_DISP("Energy measurement not supported for archtype %d\n", archtype);
			
 
				+			_STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype));
			
 
				 			return -1;
			
 
				 		}
			
 
				 
			
@@ -328,6 +328,9 @@ int main(int argc, char **argv)
 
				 	starpu_conf_init(&conf);
			
 
				 
			
 
				 	/* Use a scheduler which doesn't choose the implementation */
			
 
				+#ifdef STARPU_HAVE_UNSETENV
			
 
				+	unsetenv("STARPU_SCHED");
			
 
				+#endif
			
 
				 	conf.sched_policy_name = "eager";
			
 
				 	conf.calibrate = 1;
			
 
				 
			
@@ -345,15 +348,19 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		memset_cl.cpu_funcs[1] = NULL;
			
 
				 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl);
			
 
				+#ifdef STARPU_HAVE_UNSETENV
			
 
				 		memset_cl.cpu_funcs[1] = memset_cpu;
			
 
				 		memset_cl.cpu_funcs[0] = NULL;
			
 
				 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl);
			
 
				+#endif
			
 
				 
			
 
				 		nl_memset_cl.cpu_funcs[1] = NULL;
			
 
				 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl);
			
 
				+#ifdef STARPU_HAVE_UNSETENV
			
 
				 		nl_memset_cl.cpu_funcs[1] = memset_cpu;
			
 
				 		nl_memset_cl.cpu_funcs[0] = NULL;
			
 
				 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl);
			
 
				+#endif
			
 
				 	}
			
 
				 
			
 
				 	for (i = 0; i < starpu_cuda_worker_get_count(); i++)
			
--- a/tools/dev/checker/starpu_check_copyright.sh
+++ b/tools/dev/checker/starpu_check_copyright.sh