4 years ago · 0465129baa
--- a/Makefile.am
+++ b/Makefile.am
@@ -53,6 +53,10 @@ if STARPU_BUILD_STARPURM
 
																 SUBDIRS += starpurm
															
 
																 endif
															
 
																+if STARPU_BUILD_STARPUPY
															
 
																+SUBDIRS += starpupy
															
 
																+endif
															
 
																+
															
 
																 if STARPU_BUILD_SC_HYPERVISOR
															
 
																 SUBDIRS += sc_hypervisor
															
 
																 endif
															
--- a/configure.ac
+++ b/configure.ac
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
@@ -34,7 +34,7 @@ pipeline
 
																 		{
															
 
																 			steps
															
 
																 			{
															
 
																-				node('autotools')
															
 
																+				node('autotools2')
															
 
																 				{
															
 
																 					checkout scm
															
 
																 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
															
@@ -62,7 +62,7 @@ pipeline
 
																 			{
															
 
																 				script
															
 
																 				{
															
 
																-					labelToSelect = 'unix'
															
 
																+					labelToSelect = 'unix2'
															
 
																 					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
															
 
																 					{
															
 
																 						node -> node.getLabelString().contains(labelToSelect) ? node.name : null
															
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-windows
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-windows
@@ -34,7 +34,7 @@ pipeline
 
																 		{
															
 
																 			steps
															
 
																 			{
															
 
																-				node('autotools')
															
 
																+				node('autotools2')
															
 
																 				{
															
 
																 					checkout scm
															
 
																 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
															
--- a/contrib/ci.inria.fr/job-1-check-windows.bat
+++ b/contrib/ci.inria.fr/job-1-check-windows.bat
@@ -14,9 +14,9 @@ REM
 
																 REM See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																 REM
															
 
																-set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin
															
 
																+set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
															
 
																 sh -c "./job-1-build-windows.sh"
															
 
																-set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;
															
 
																+set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
															
 
																 set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0
															
 
																 cd starpu_install
															
--- a/contrib/ci.inria.fr/job-1-check.sh
+++ b/contrib/ci.inria.fr/job-1-check.sh
@@ -41,6 +41,7 @@ env > $PWD/env
 
																 test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
															
 
																 tar xfz ../$tarball
															
 
																+touch --date="last hour" $(find $basename)
															
 
																 cd $basename
															
 
																 mkdir build
															
 
																 cd build
															
--- a/doc/doxygen/chapters/400_python.doxy
+++ b/doc/doxygen/chapters/400_python.doxy
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -23,7 +23,7 @@
 
																 StarPU can use Simgrid in order to simulate execution on an arbitrary
															
 
																 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
															
 
																-3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
															
 
																+3.26. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
															
 
																 Other versions may have compatibility issues. 3.17 notably does not build at
															
 
																 all. MPI simulation does not work with version 3.22.
															
--- a/doc/doxygen/chapters/images/starpu_log.png
+++ b/doc/doxygen/chapters/images/starpu_log.png
--- a/doc/doxygen/chapters/images/starpu_log_arr.eps
+++ b/doc/doxygen/chapters/images/starpu_log_arr.eps
--- a/doc/doxygen/chapters/images/starpu_log_arr.png
+++ b/doc/doxygen/chapters/images/starpu_log_arr.png
--- a/doc/doxygen/chapters/images/starpu_log.eps
+++ b/doc/doxygen/chapters/images/starpu_log.eps
--- a/doc/doxygen/chapters/images/starpu_log_list.png
+++ b/doc/doxygen/chapters/images/starpu_log_list.png
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -138,7 +138,7 @@ Documentation License”.
 
																 \part{StarPU Extensions}
															
 
																-\chapter{PythonInterface}
															
 
																+\chapter{Python Interface}
															
 
																 \label{PythonInterface}
															
 
																 \hypertarget{PythonInterface}{}
															
 
																 \input{PythonInterface}
															
--- a/doc/doxygen_dev/refman.tex
+++ b/doc/doxygen_dev/refman.tex
@@ -148,7 +148,6 @@ Documentation License”.
 
																 \input{starpu__data__cpy_8h}
															
 
																 \input{starpu__debug__helpers_8h}
															
 
																 \input{starpu__fxt_8h}
															
 
																-\input{starpu__parameters_8h}
															
 
																 \input{starpu__spinlock_8h}
															
 
																 \input{starpu__task__insert__utils_8h}
															
 
																 \input{tags_8h}
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,8 +20,8 @@ include $(top_srcdir)/starpu.mk
 
																 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																-AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																-AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
															
 
																+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
															
 
																 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
															
 
																 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
															
--- a/examples/cpp/add_vectors_interface.cpp
+++ b/examples/cpp/add_vectors_interface.cpp
@@ -61,9 +61,9 @@ class my_allocator
 
																 		node = a.get_node();
															
 
																 	}
															
 
																-	explicit my_allocator(const unsigned node)
															
 
																+	explicit my_allocator(const unsigned thenode)
															
 
																 	{
															
 
																-		this->node = node;
															
 
																+		this->node = thenode;
															
 
																 	}
															
 
																 	pointer allocate(size_type n, const void * = 0)
															
--- a/examples/tag_example/tag_example.c
+++ b/examples/tag_example/tag_example.c
@@ -223,7 +223,8 @@ int main(int argc, char **argv)
 
																 	int ret;
															
 
																 #ifdef STARPU_HAVE_HELGRIND_H
															
 
																-	if (RUNNING_ON_VALGRIND) {
															
 
																+	if (RUNNING_ON_VALGRIND)
															
 
																+	{
															
 
																 		ni /= 2;
															
 
																 		nj /= 2;
															
 
																 		nk /= 2;
															
--- a/include/fstarpu_mod.f90
+++ b/include/fstarpu_mod.f90
@@ -1054,7 +1054,7 @@ module fstarpu_mod
 
																                 end subroutine fstarpu_vector_data_register
															
 
																                 ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
															
 
																-                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
															
 
																+                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) &
															
 
																                                 bind(C,name="starpu_vector_ptr_register")
															
 
																                         use iso_c_binding, only: c_ptr, c_int, c_size_t
															
 
																                         type(c_ptr), intent(out) :: dh
															
@@ -1092,7 +1092,7 @@ module fstarpu_mod
 
																                 end subroutine fstarpu_variable_data_register
															
 
																                 ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
															
 
																-                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
															
 
																+                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) &
															
 
																                                 bind(C,name="starpu_variable_ptr_register")
															
 
																                         use iso_c_binding, only: c_ptr, c_int, c_size_t
															
 
																                         type(c_ptr), intent(out) :: dh
															
@@ -1758,7 +1758,7 @@ module fstarpu_mod
 
																                 end function fstarpu_data_descr_array_alloc
															
 
																                 ! struct starpu_data_descr *fstarpu_data_descr_alloc(void);
															
 
																-                function fstarpu_data_descr_alloc (nb) bind(C)
															
 
																+                function fstarpu_data_descr_alloc () bind(C)
															
 
																                         use iso_c_binding, only: c_ptr
															
 
																                         type(c_ptr) :: fstarpu_data_descr_alloc
															
 
																                 end function fstarpu_data_descr_alloc
															
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -331,4 +331,6 @@ typedef ssize_t starpu_ssize_t;
 
																 #undef STARPU_HAVE_STATEMENT_EXPRESSIONS
															
 
																 #undef STARPU_PERF_MODEL_DIR
															
 
																+#undef STARPU_PYTHON_HAVE_NUMPY
															
 
																+
															
 
																 #endif
															
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -294,9 +294,8 @@ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_t
 
																 /**
															
 
																    The scheduling policy may put tasks directly into a worker’s local
															
 
																    queue so that it is not always necessary to create its own queue
															
 
																-   when the local queue is sufficient. If \p back is not 0, \p task is
															
 
																-   put at the back of the queue where the worker will pop tasks first.
															
 
																-   Setting \p back to 0 therefore ensures a FIFO ordering.
															
 
																+   when the local queue is sufficient. \p back is ignored: the task priority is
															
 
																+   used to order tasks in this queue.
															
 
																 */
															
 
																 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
															
--- a/libstarpu-mic.pc.in
+++ b/libstarpu-mic.pc.in
@@ -22,6 +22,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/libstarpu.pc.in
+++ b/libstarpu.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/m4/libs.m4
+++ b/m4/libs.m4
@@ -202,3 +202,15 @@ AC_DEFUN([IS_SUPPORTED_FLAG],
 
																 	IS_SUPPORTED_FFLAG($1)
															
 
																 	IS_SUPPORTED_FCFLAG($1)
															
 
																 ])
															
 
																+
															
 
																+# AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found])
															
 
																+# Check if the given python module is available
															
 
																+AC_DEFUN([AC_PYTHON_MODULE],
															
 
																+[
															
 
																+	echo "import $1" | $PYTHON - 2>/dev/null
															
 
																+	if test $? -ne 0 ; then
															
 
																+	   	$3
															
 
																+	else
															
 
																+		$2
															
 
																+	fi
															
 
																+])
															
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -108,6 +108,9 @@ endif
 
																 endif
															
 
																 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																+AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
															
 
																+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
															
 
																+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS)
															
 
																 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
															
 
																 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS)
															
--- a/mpi/examples/native_fortran/nf_mm_task_build.f90
+++ b/mpi/examples/native_fortran/nf_mm_task_build.f90
@@ -169,7 +169,7 @@ program nf_mm
 
																         do b_col=1,NB
															
 
																            do b_row=1,NB
															
 
																               task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
															
 
																-                   				FSTARPU_R,  dh_A(b_row), &
															
 
																+                                                FSTARPU_R,  dh_A(b_row), &
															
 
																                                                 FSTARPU_R,  dh_B(b_col), &
															
 
																                                                 FSTARPU_RW, dh_C(b_row,b_col), &
															
 
																                                                 C_NULL_PTR /))
															
@@ -177,7 +177,7 @@ program nf_mm
 
																                  ret = fstarpu_task_submit(task)
															
 
																               endif
															
 
																               call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
															
 
																-                   				FSTARPU_R,  dh_A(b_row), &
															
 
																+                                                FSTARPU_R,  dh_A(b_row), &
															
 
																                                                 FSTARPU_R,  dh_B(b_col), &
															
 
																                                                 FSTARPU_RW, dh_C(b_row,b_col), &
															
 
																                                                 C_NULL_PTR /))
															
--- a/mpi/src/mpi/starpu_mpi_mpi.c
+++ b/mpi/src/mpi/starpu_mpi_mpi.c
@@ -41,7 +41,6 @@
 
																 #include <core/simgrid.h>
															
 
																 #include <core/task.h>
															
 
																 #include <core/topology.h>
															
 
																-#include <core/workers.h>
															
 
																 #ifdef STARPU_USE_MPI_MPI
															
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -33,7 +33,6 @@
 
																 #include <core/simgrid.h>
															
 
																 #include <core/task.h>
															
 
																 #include <core/topology.h>
															
 
																-#include <core/workers.h>
															
 
																 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
															
 
																 {
															
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -84,6 +84,9 @@ endif
 
																 endif
															
 
																 AM_CFLAGS += -Wno-unused
															
 
																+AM_CXXFLAGS += -Wno-unused
															
 
																+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
															
 
																+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS)
															
 
																 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
															
 
																 LIBS += $(STARPU_CUDA_LDFLAGS)
															
--- a/socl/src/init.c
+++ b/socl/src/init.c
@@ -16,7 +16,7 @@
 
																  */
															
 
																 #include <stdlib.h>
															
 
																-#include "../src/core/workers.h"
															
 
																+#include "../src/common/utils.h"
															
 
																 #include "socl.h"
															
 
																 #include "gc.h"
															
 
																 #include "mem_objects.h"
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,6 +1,6 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																-# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+# Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																 # Copyright (C) 2013       Simon Archipoff
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
@@ -21,6 +21,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU -DSTARPU_DATADIR='"$(da
 
																 AM_CPPFLAGS += $(STARPU_H_CPPFLAGS)
															
 
																 AM_CPPFLAGS += $(FXT_CFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(STARPU_RCCE_CPPFLAGS)
															
 
																 LIBS += -lm $(LIBSTARPU_LDFLAGS)
															
 
																+if STARPU_USE_MPI_MASTER_SLAVE
															
 
																+LIBS += $(MPICC_LDFLAGS)
															
 
																+endif
															
 
																 SUBDIRS =
															
@@ -60,8 +63,7 @@ endif STARPU_HAVE_WINDOWS
 
																 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
															
 
																-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined									\
															
 
																-  -version-info $(libstarpu_so_version)
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpu_so_version)
															
 
																 if STARPU_HAVE_DARWIN
															
 
																 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS += \
															
--- a/src/common/rbtree_i.h
+++ b/src/common/rbtree_i.h
@@ -44,7 +44,8 @@
 
																  * architectures, as long as the nodes aren't embedded in structures with
															
 
																  * special alignment constraints such as member packing.
															
 
																  */
															
 
																-struct starpu_rbtree_node {
															
 
																+struct starpu_rbtree_node
															
 
																+{
															
 
																     uintptr_t parent;
															
 
																     struct starpu_rbtree_node *children[2];
															
 
																 };
															
@@ -52,7 +53,8 @@ struct starpu_rbtree_node {
 
																 /**
															
 
																  * Red-black tree structure.
															
 
																  */
															
 
																-struct starpu_rbtree {
															
 
																+struct starpu_rbtree
															
 
																+{
															
 
																     struct starpu_rbtree_node *root;
															
 
																 };
															
--- a/src/common/thread.c
+++ b/src/common/thread.c
@@ -96,14 +96,22 @@ int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const s
 
																 	if (attr && attr->stacksize)
															
 
																 		sg_actor_set_stacksize(*thread, attr->stacksize);
															
 
																 #endif
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+	sg_actor_set_data(*thread, tsd);
															
 
																+#else
															
 
																 	sg_actor_data_set(*thread, tsd);
															
 
																+#endif
															
 
																 	sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args);
															
 
																 #else
															
 
																 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
															
 
																 #ifdef HAVE_SG_ACTOR_DATA
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+	sg_actor_set_data(*thread, tsd);
															
 
																+#else
															
 
																 	sg_actor_data_set(*thread, tsd);
															
 
																 #endif
															
 
																 #endif
															
 
																+#endif
															
 
																 #ifndef HAVE_SG_ACTOR_SET_STACKSIZE
															
 
																 	if (attr && attr->stacksize)
															
 
																 		_starpu_simgrid_set_stack_size(_starpu_default_stack_size);
															
@@ -328,7 +336,9 @@ extern void *smpi_process_get_user_data();
 
																 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
															
 
																 {
															
 
																 	void **array;
															
 
																-#ifdef HAVE_SG_ACTOR_DATA
															
 
																+#ifdef HAVE_SG_ACTOR_GET_DATA
															
 
																+	array = sg_actor_get_data(sg_actor_self());
															
 
																+#elif defined(HAVE_SG_ACTOR_DATA)
															
 
																 	array = sg_actor_data(sg_actor_self());
															
 
																 #else
															
 
																 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
															
@@ -355,7 +365,9 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 
																 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
															
 
																 {
															
 
																 	void **array;
															
 
																-#ifdef HAVE_SG_ACTOR_DATA
															
 
																+#ifdef HAVE_SG_ACTOR_GET_DATA
															
 
																+	array = sg_actor_get_data(sg_actor_self());
															
 
																+#elif defined(HAVE_SG_ACTOR_DATA)
															
 
																 	array = sg_actor_data(sg_actor_self());
															
 
																 #else
															
 
																 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
															
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -183,4 +183,6 @@ int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
																 void _starpu_util_init(void);
															
 
																+enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
															
 
																+
															
 
																 #endif // __COMMON_UTILS_H__
															
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -221,7 +221,8 @@ void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg
 
																 					tag_successors->ndeps_completed = 0;
															
 
																 					/* This releases the lock */
															
 
																 					_starpu_tag_set_ready(tag);
															
 
																-				} else
															
 
																+				}
															
 
																+				else
															
 
																 					_starpu_spin_unlock(&tag->lock);
															
 
																 				break;
															
 
																 			}
															
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -347,19 +347,10 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
																 				_starpu_spin_unlock(&handle->header_lock);
															
 
																 		}
															
 
																 	}
															
 
																+
															
 
																 	/* Check nowhere before releasing the sequential consistency (which may
															
 
																 	 * unregister the handle and free its switch_cl, and thus task->cl here.  */
															
 
																 	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
															
 
																-	/* If this is a continuation, we do not release task dependencies now.
															
 
																-	 * Task dependencies will be released only when the continued task
															
 
																-	 * fully completes */
															
 
																-	if (!continuation)
															
 
																-	{
															
 
																-		/* Tell other tasks that we don't exist any more, thus no need for
															
 
																-		 * implicit dependencies any more.  */
															
 
																-		_starpu_release_task_enforce_sequential_consistency(j);
															
 
																-	}
															
 
																-
															
 
																 	/* If the job was executed on a combined worker there is no need for the
															
 
																 	 * scheduler to process it : the task structure doesn't contain any valuable
															
 
																 	 * data as it's not linked to an actual worker */
															
@@ -395,6 +386,16 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
																 	if (!callback && task->cl)
															
 
																 		callback = task->cl->callback_func;
															
 
																+	/* If this is a continuation, we do not release task dependencies now.
															
 
																+	 * Task dependencies will be released only when the continued task
															
 
																+	 * fully completes */
															
 
																+	if (!continuation)
															
 
																+	{
															
 
																+		/* Tell other tasks that we don't exist any more, thus no need for
															
 
																+		 * implicit dependencies any more.  */
															
 
																+		_starpu_release_task_enforce_sequential_consistency(j);
															
 
																+	}
															
 
																+
															
 
																 	/* Task does not have a cl, but has explicit data dependencies, we need
															
 
																 	 * to tell them that we will not exist any more before notifying the
															
 
																 	 * tasks waiting for us
															
@@ -764,14 +765,14 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker)
 
																 		}
															
 
																 	}
															
 
																-	if (!starpu_task_list_empty(&worker->local_tasks))
															
 
																-		task = starpu_task_list_pop_front(&worker->local_tasks);
															
 
																+	if (!starpu_task_prio_list_empty(&worker->local_tasks))
															
 
																+		task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks);
															
 
																 	_starpu_pop_task_end(task);
															
 
																 	return task;
															
 
																 }
															
 
																-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio)
															
 
																+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task)
															
 
																 {
															
 
																 	/* Check that the worker is able to execute the task ! */
															
 
																 	STARPU_ASSERT(task && task->cl);
															
@@ -814,13 +815,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning FIXME use a prio_list
															
 
																-#endif
															
 
																-		if (prio)
															
 
																-			starpu_task_list_push_front(&worker->local_tasks, task);
															
 
																-		else
															
 
																-			starpu_task_list_push_back(&worker->local_tasks, task);
															
 
																+		starpu_task_prio_list_push_back(&worker->local_tasks, task);
															
 
																 	}
															
 
																 	starpu_wake_worker_locked(worker->workerid);
															
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -269,10 +269,8 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 
																 struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
															
 
																 /** Put a task into the pool of tasks that are explicitly attributed to the
															
 
																- * specified worker. If "back" is set, the task is put at the back of the list.
															
 
																- * Considering the tasks are popped from the back, this value should be 0 to
															
 
																- * enforce a FIFO ordering. */
															
 
																-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio);
															
 
																+ * specified worker. */
															
 
																+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task);
															
 
																 #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index)
															
 
																 #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
															
--- a/src/core/perfmodel/energy_model.c
+++ b/src/core/perfmodel/energy_model.c
@@ -56,8 +56,11 @@ static const int N_EVTS = 2;
 
																 static int nsockets;
															
 
																-static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
															
 
																-				     "rapl::RAPL_ENERGY_DRAM:cpu=%d"};
															
 
																+static const char* event_names[] =
															
 
																+{
															
 
																+	"rapl::RAPL_ENERGY_PKG:cpu=%d",
															
 
																+	"rapl::RAPL_ENERGY_DRAM:cpu=%d"
															
 
																+};
															
 
																 static int add_event(int EventSet, int socket);
															
@@ -66,9 +69,6 @@ static int add_event(int EventSet, int socket);
 
																 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
															
 
																 static int EventSet = PAPI_NULL;
															
 
																-/*This is where we store the values we read from the eventset */
															
 
																-static long long *values;
															
 
																-
															
 
																 #endif
															
 
																 static double t1;
															
@@ -99,9 +99,6 @@ int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
 
																 		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
															
 
																-		values=calloc(nsockets * N_EVTS,sizeof(long long));
															
 
																-		STARPU_ASSERT(values);
															
 
																-
															
 
																 		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
															
 
																 			ERROR_RETURN(retval);
															
@@ -178,6 +175,9 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
																 	{
															
 
																 		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
															
 
																+		/*This is where we store the values we read from the eventset */
															
 
																+		long long values[nsockets*N_EVTS];
															
 
																+
															
 
																 		/* Stop counting and store the values into the array */
															
 
																 		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
															
 
																 			ERROR_RETURN(retval);
															
@@ -196,9 +196,6 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
																 				      delta, t, delta/(t*1.0E-6));
															
 
																 			}
															
 
																 		}
															
 
																-		free(values);
															
 
																-
															
 
																-		energy = energy * 0.23 / 1.0e9 / ntasks;
															
 
																 		/*removes all events from a PAPI event set */
															
 
																 		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
															
@@ -242,7 +239,7 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
																 	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
															
 
																-	starpu_perfmodel_update_history(model, task, arch, cpuid, nimpl, energy);
															
 
																+	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
															
 
																 	return retval;
															
 
																 }
															
@@ -266,6 +263,12 @@ static int add_event(int eventSet, int socket)
 
																 		retval = PAPI_add_named_event(eventSet, buf);
															
 
																 		if (retval != PAPI_OK)
															
 
																 		{
															
 
																+			if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
															
 
																+			{
															
 
																+				/* Ok, too bad */
															
 
																+				_STARPU_DISP("Note: DRAM energy measurement not available\n");
															
 
																+				return PAPI_OK;
															
 
																+			}
															
 
																 			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
															
 
																 			return retval;
															
 
																 		}
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1243,7 +1243,8 @@ void _starpu_initialize_registered_performance_models(void)
 
																 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
															
 
																 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
															
 
																-	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
															
 
																+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
															
 
																+	{
															
 
																 		char name[128];
															
 
																 		const char *arch = starpu_worker_get_type_as_env_var(archtype);
															
 
																 		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
															
@@ -1518,8 +1519,8 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
 
																 	return 0;
															
 
																 }
															
 
																-int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
															
 
																-
															
 
																+int starpu_perfmodel_deinit(struct starpu_perfmodel *model)
															
 
																+{
															
 
																 	_starpu_deinitialize_performance_model(model);
															
 
																 	free(model->state);
															
 
																 	model->state = NULL;
															
--- a/src/core/perfmodel/perfmodel_print.c
+++ b/src/core/perfmodel/perfmodel_print.c
@@ -30,7 +30,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 
																 	ptr = per_arch_model->list;
															
 
																 	if (!parameter && ptr)
															
 
																-		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\tstddev (us)\t\tn\n");
															
 
																+		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n");
															
 
																 	while (ptr)
															
 
																 	{
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -372,10 +372,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 		}
															
 
																 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
															
 
																-		if(task->priority > 0)
															
 
																-			return _starpu_push_local_task(worker, task, 1);
															
 
																-		else
															
 
																-			return _starpu_push_local_task(worker, task, 0);
															
 
																+		return _starpu_push_local_task(worker, task);
															
 
																 	}
															
 
																 	else
															
 
																 	{
															
@@ -406,7 +403,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 			_STARPU_TRACE_JOB_PUSH(alias, alias->priority);
															
 
																 			worker = _starpu_get_worker_struct(combined_workerid[j]);
															
 
																-			ret |= _starpu_push_local_task(worker, alias, 0);
															
 
																+			ret |= _starpu_push_local_task(worker, alias);
															
 
																 		}
															
 
																 		return ret;
															
@@ -632,7 +629,8 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
																 				enum starpu_worker_archtype type;
															
 
																 				for (type = 0; type < STARPU_NARCH; type++)
															
 
																 				{
															
 
																-					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
															
 
																+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type))
															
 
																+					{
															
 
																 						if (config->arch_nodeid[type] >= 0)
															
 
																 							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
															
 
																 						break;
															
@@ -1032,7 +1030,7 @@ pick:
 
																 	}
															
 
																 	task->mf_skip = 1;
															
 
																-	starpu_task_list_push_back(&worker->local_tasks, task);
															
 
																+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
															
 
																 	goto pick;
															
 
																 profiling:
															
@@ -1174,16 +1172,11 @@ void _starpu_wait_on_sched_event(void)
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
															
 
																 }
															
 
																-/* The scheduling policy may put tasks directly into a worker's local queue so
															
 
																- * that it is not always necessary to create its own queue when the local queue
															
 
																- * is sufficient. If "back" not null, the task is put at the back of the queue
															
 
																- * where the worker will pop tasks first. Setting "back" to 0 therefore ensures
															
 
																- * a FIFO ordering. */
															
 
																-int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
															
 
																+int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED)
															
 
																 {
															
 
																 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
															
 
																-	return  _starpu_push_local_task(worker, task, prio);
															
 
																+	return  _starpu_push_local_task(worker, task);
															
 
																 }
															
 
																 void _starpu_print_idle_time()
															
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -357,11 +357,16 @@ void _starpu_start_simgrid(int *argc, char **argv)
 
																 	int limit_bandwidth = starpu_get_env_number("STARPU_LIMIT_BANDWIDTH");
															
 
																 	if (limit_bandwidth >= 0)
															
 
																 	{
															
 
																-#ifdef HAVE_SG_LINK_BANDWIDTH_SET
															
 
																+#if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH)
															
 
																 		sg_link_t *links = sg_link_list();
															
 
																 		int count = sg_link_count(), i;
															
 
																-		for (i = 0; i < count; i++) {
															
 
																+		for (i = 0; i < count; i++)
															
 
																+		{
															
 
																+#ifdef HAVE_SG_LINK_SET_BANDWIDTH
															
 
																+			sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.);
															
 
																+#else
															
 
																 			sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.);
															
 
																+#endif
															
 
																 		}
															
 
																 #else
															
 
																 		_STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth);
															
@@ -492,7 +497,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
																 #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
															
 
																 		sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+		sg_actor_set_data(actor, tsd);
															
 
																+#else
															
 
																 		sg_actor_data_set(actor, tsd);
															
 
																+#endif
															
 
																 #else
															
 
																 		MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
															
 
																 #endif
															
@@ -519,7 +528,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
																 		void **tsd;
															
 
																 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
															
 
																 #ifdef HAVE_SG_ACTOR_DATA
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+		sg_actor_set_data(sg_actor_self(), tsd);
															
 
																+#else
															
 
																 		sg_actor_data_set(sg_actor_self(), tsd);
															
 
																+#endif
															
 
																 #else
															
 
																 		smpi_process_set_user_data(tsd);
															
 
																 #endif
															
@@ -735,6 +748,9 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 
																 		 * to be able to easily check scheduling robustness */
															
 
																 	}
															
 
																+#ifdef HAVE_SG_HOST_GET_SPEED
															
 
																+	flops = length/1000000.0*sg_host_get_speed(sg_host_self());
															
 
																+#else
															
 
																 #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
															
 
																 #  if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
															
 
																 	flops = length/1000000.0*sg_host_speed(sg_host_self());
															
@@ -746,6 +762,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 
																 #else
															
 
																 	flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
															
 
																 #endif
															
 
																+#endif
															
 
																 #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
															
 
																 	simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
															
@@ -1210,14 +1227,22 @@ starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t
 
																 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
															
 
																 #ifdef HAVE_SG_ACTOR_INIT
															
 
																 	actor = sg_actor_init(name, host);
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+	sg_actor_set_data(actor, tsd);
															
 
																+#else
															
 
																 	sg_actor_data_set(actor, tsd);
															
 
																+#endif
															
 
																 	sg_actor_start(actor, code, argc, argv);
															
 
																 #else
															
 
																 	actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
															
 
																 #ifdef HAVE_SG_ACTOR_DATA
															
 
																+#ifdef HAVE_SG_ACTOR_SET_DATA
															
 
																+	sg_actor_set_data(actor, tsd);
															
 
																+#else
															
 
																 	sg_actor_data_set(actor, tsd);
															
 
																 #endif
															
 
																 #endif
															
 
																+#endif
															
 
																 	return actor;
															
 
																 }
															
@@ -1251,7 +1276,7 @@ starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node)
 
																 void _starpu_simgrid_count_ngpus(void)
															
 
																 {
															
 
																-#if (defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
															
 
																+#if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
															
 
																 	unsigned src, dst;
															
 
																 	starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
															
@@ -1261,7 +1286,7 @@ void _starpu_simgrid_count_ngpus(void)
 
																 		{
															
 
																 			int busid;
															
 
																 			starpu_sg_host_t srchost, dsthost;
															
 
																-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
															
 
																 			SD_link_t *route;
															
 
																 #else
															
@@ -1281,8 +1306,12 @@ void _starpu_simgrid_count_ngpus(void)
 
																 			srchost = _starpu_simgrid_get_memnode_host(src);
															
 
																 			dsthost = _starpu_simgrid_get_memnode_host(dst);
															
 
																-#if defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
															
 
																+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
															
 
																+#ifdef HAVE_SG_HOST_GET_ROUTE
															
 
																+			sg_host_get_route(srchost, dsthost, route_dynar);
															
 
																+#else
															
 
																 			sg_host_route(srchost, dsthost, route_dynar);
															
 
																+#endif
															
 
																 			routesize = xbt_dynar_length(route_dynar);
															
 
																 			route = xbt_dynar_to_array(route_dynar);
															
 
																 #else
															
@@ -1293,7 +1322,13 @@ void _starpu_simgrid_count_ngpus(void)
 
																 			/* If it goes through "Host", do not care, there is no
															
 
																 			 * direct transfer support */
															
 
																 			for (i = 0; i < routesize; i++)
															
 
																-				if (!strcmp(sg_link_name(route[i]), "Host"))
															
 
																+				if (
															
 
																+#ifdef HAVE_SG_LINK_GET_NAME
															
 
																+					!strcmp(sg_link_get_name(route[i]), "Host")
															
 
																+#else
															
 
																+					!strcmp(sg_link_name(route[i]), "Host")
															
 
																+#endif
															
 
																+					)
															
 
																 					break;
															
 
																 			if (i < routesize)
															
 
																 				continue;
															
@@ -1302,7 +1337,11 @@ void _starpu_simgrid_count_ngpus(void)
 
																 			through = -1;
															
 
																 			for (i = 0; i < routesize; i++)
															
 
																 			{
															
 
																+#ifdef HAVE_SG_LINK_GET_NAME
															
 
																+				name = sg_link_get_name(route[i]);
															
 
																+#else
															
 
																 				name = sg_link_name(route[i]);
															
 
																+#endif
															
 
																 				size_t len = strlen(name);
															
 
																 				if (!strcmp(" through", name+len-8))
															
 
																 					through = i;
															
@@ -1315,7 +1354,11 @@ void _starpu_simgrid_count_ngpus(void)
 
																 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
															
 
																 				continue;
															
 
																 			}
															
 
																+#ifdef HAVE_SG_LINK_GET_NAME
															
 
																+			name = sg_link_get_name(route[through]);
															
 
																+#else
															
 
																 			name = sg_link_name(route[through]);
															
 
																+#endif
															
 
																 			/*
															
 
																 			 * count how many direct routes go through it between
															
@@ -1339,10 +1382,14 @@ void _starpu_simgrid_count_ngpus(void)
 
																 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
															
 
																 				int routesize2;
															
 
																-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
															
 
																 				SD_link_t *route2;
															
 
																+#ifdef HAVE_SG_HOST_GET_ROUTE
															
 
																+				sg_host_get_route(srchost2, ramhost, route_dynar2);
															
 
																+#else
															
 
																 				sg_host_route(srchost2, ramhost, route_dynar2);
															
 
																+#endif
															
 
																 				routesize2 = xbt_dynar_length(route_dynar2);
															
 
																 				route2 = xbt_dynar_to_array(route_dynar2);
															
 
																 #else
															
@@ -1351,19 +1398,25 @@ void _starpu_simgrid_count_ngpus(void)
 
																 #endif
															
 
																 				for (i = 0; i < routesize2; i++)
															
 
																-					if (!strcmp(name, sg_link_name(route2[i])))
															
 
																+					if (
															
 
																+#ifdef HAVE_SG_LINK_GET_NAME
															
 
																+						!strcmp(name, sg_link_get_name(route2[i]))
															
 
																+#else
															
 
																+						!strcmp(name, sg_link_name(route2[i]))
															
 
																+#endif
															
 
																+						)
															
 
																 					{
															
 
																 						/* This GPU goes through this PCI bridge to access RAM */
															
 
																 						ngpus++;
															
 
																 						break;
															
 
																 					}
															
 
																-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																 				free(route2);
															
 
																 #endif
															
 
																 			}
															
 
																 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
															
 
																 			starpu_bus_set_ngpus(busid, ngpus);
															
 
																-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
															
 
																 			free(route);
															
 
																 #endif
															
 
																 		}
															
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -24,6 +24,9 @@
 
																 extern "C"
															
 
																 {
															
 
																 #endif
															
 
																+
															
 
																+/* Note: when changing something here, update the include list in configure.ac
															
 
																+ * in the part that tries to enable stdc++11 */
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 #ifdef STARPU_HAVE_SIMGRID_MSG_H
															
 
																 #include <simgrid/msg.h>
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1084,7 +1084,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
																 	struct _starpu_worker *worker;
															
 
																 	worker = _starpu_get_worker_struct(workerid);
															
 
																-	starpu_task_list_push_back(&worker->local_tasks, task);
															
 
																+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
															
 
																 	starpu_wake_worker_locked(worker->workerid);
															
 
																 	_starpu_profiling_set_task_push_end_time(task);
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+ * Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																  * Copyright (C) 2013       Thibaut Lambert
															
 
																  * Copyright (C) 2016       Uppsala University
															
 
																  *
															
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
 
																  * Discover the topology of the machine
															
 
																  */
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID)
															
 
																 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
															
 
																 						int *current, int *workers_gpuid,
															
 
																 						const char *varname, unsigned nhwgpus,
															
@@ -1817,7 +1817,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
																 	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
															
 
																 	for (i = 0; i < nopencl; i++)
															
 
																-		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
															
 
																+		topology->nworker[STARPU_OPENCL_WORKER][i] = 1;
															
 
																 	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																 	_starpu_initialize_workers_opencl_gpuid(config);
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -688,7 +688,7 @@ void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machin
 
																 	/* memory_node initialized by topology.c */
															
 
																 	STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
															
 
																 	STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
															
 
																-	starpu_task_list_init(&workerarg->local_tasks);
															
 
																+	starpu_task_prio_list_init(&workerarg->local_tasks);
															
 
																 	_starpu_ctx_change_list_init(&workerarg->ctx_change_list);
															
 
																 	workerarg->local_ordered_tasks = NULL;
															
 
																 	workerarg->local_ordered_tasks_size = 0;
															
@@ -1039,7 +1039,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																         if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
															
 
																         {
															
 
																                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
															
 
																-                struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
															
 
																+                struct _starpu_worker * worker_zero STARPU_ATTRIBUTE_UNUSED = &worker_set_zero->workers[0];
															
 
																                 STARPU_PTHREAD_CREATE_ON(
															
 
																                                 "zero",
															
 
																                                 &worker_set_zero->worker_thread,
															
@@ -1445,7 +1445,8 @@ int _starpu_get_catch_signals(void)
 
																 	return _starpu_config.conf.catch_signals;
															
 
																 }
															
 
																-void starpu_drivers_preinit(void) {
															
 
																+void starpu_drivers_preinit(void)
															
 
																+{
															
 
																 	_starpu_cpu_preinit();
															
 
																 	_starpu_cuda_preinit();
															
 
																 	_starpu_opencl_preinit();
															
@@ -1828,7 +1829,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 
																 		}
															
 
																 out:
															
 
																-		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
															
 
																+		STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks));
															
 
																 		for (n = 0; n < worker->local_ordered_tasks_size; n++)
															
 
																 			STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
															
 
																 		_starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -61,8 +61,6 @@
 
																 #define STARPU_MAX_PIPELINE 4
															
 
																-enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
															
 
																-
															
 
																 struct _starpu_ctx_change_list;
															
 
																 /** This is initialized by _starpu_worker_init() */
															
@@ -125,7 +123,7 @@ LIST_TYPE(_starpu_worker,
 
																 	     * subsequent processing once worker completes the ongoing scheduling
															
 
																 	     * operation */
															
 
																 	struct _starpu_ctx_change_list ctx_change_list;
															
 
																-	struct starpu_task_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
															
 
																+	struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
															
 
																 	struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
															
 
																 	unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */
															
 
																 	unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */
															
@@ -427,7 +425,8 @@ struct _starpu_machine_config
 
																 };
															
 
																 /** Provides information for a device driver */
															
 
																-struct starpu_driver_info {
															
 
																+struct starpu_driver_info
															
 
																+{
															
 
																 	const char *name_upper;	/**< Name of worker type in upper case */
															
 
																 	const char *name_var;	/**< Name of worker type for environment variables */
															
 
																 	const char *name_lower;	/**< Name of worker type in lower case */
															
@@ -441,7 +440,8 @@ extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
 
																 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
															
 
																 /** Provides information for a memory node driver */
															
 
																-struct starpu_memory_driver_info {
															
 
																+struct starpu_memory_driver_info
															
 
																+{
															
 
																 	const char *name_upper;	/**< Name of memory in upper case */
															
 
																 	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
															
 
																 };
															
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1513,7 +1513,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
																 			/* First try to flush data explicitly marked for freeing */
															
 
																 			size_t freed = flush_memchunk_cache(dst_node, reclaim);
															
 
																-			if (freed >= reclaim) {
															
 
																+			if (freed >= reclaim)
															
 
																+			{
															
 
																 				/* That freed enough data, retry allocating */
															
 
																 				prefetch_out_of_memory[dst_node] = 0;
															
 
																 				continue;
															
@@ -1550,7 +1551,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
																 			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
															
 
																 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
															
 
																 			prefetch_out_of_memory[dst_node] = 0;
															
 
																-		} else
															
 
																+		}
															
 
																+		else
															
 
																 			prefetch_out_of_memory[dst_node] = 0;
															
 
																 	}
															
 
																 	while((allocated_memory == -ENOMEM) && attempts++ < 2);
															
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -180,7 +180,8 @@ int starpu_memory_node_get_devid(unsigned node)
 
																 	return _starpu_descr.devid[node];
															
 
																 }
															
 
																-enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
															
 
																+enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind)
															
 
																+{
															
 
																 	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
															
 
																 	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
															
 
																 	return archtype;
															
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -193,7 +193,8 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 
																 		fprintf(tasks_file, "Name: %s\n", task->name);
															
 
																 	if (task->model_name)
															
 
																 		fprintf(tasks_file, "Model: %s\n", task->model_name);
															
 
																-	if (task->file) {
															
 
																+	if (task->file)
															
 
																+	{
															
 
																 		fprintf(tasks_file, "File: %s\n", task->file);
															
 
																 		fprintf(tasks_file, "Line: %d\n", task->line);
															
 
																 	}
															
@@ -4129,7 +4130,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 
																 	if (out_paje_file && !options->no_bus)
															
 
																 	{
															
 
																-		while (!_starpu_communication_list_empty(&communication_list)) {
															
 
																+		while (!_starpu_communication_list_empty(&communication_list))
															
 
																+		{
															
 
																 			struct _starpu_communication*itor;
															
 
																 			itor = _starpu_communication_list_pop_front(&communication_list);
															
@@ -4423,7 +4425,7 @@ void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options)
 
																 			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
															
 
																 		/* FUT_SETUP_CODE is the event with the maximal value */
															
 
																-		number_events = calloc(FUT_SETUP_CODE+1, sizeof(uint64_t));
															
 
																+		_STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t));
															
 
																 	}
															
 
																 	else
															
 
																 		number_events_file = NULL;
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -60,7 +60,8 @@
 
																 #include <windows.h>
															
 
																 #endif
															
 
																-static struct starpu_driver_info driver_info = {
															
 
																+static struct starpu_driver_info driver_info =
															
 
																+{
															
 
																 	.name_upper = "CPU",
															
 
																 	.name_var = "CPU",
															
 
																 	.name_lower = "cpu",
															
@@ -68,7 +69,8 @@ static struct starpu_driver_info driver_info = {
 
																 	.alpha = 0.5f,
															
 
																 };
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "NUMA",
															
 
																 	.worker_archtype = STARPU_CPU_WORKER,
															
 
																 };
															
--- a/src/drivers/cuda/driver_cuda_init.c
+++ b/src/drivers/cuda/driver_cuda_init.c
@@ -17,7 +17,8 @@
 
																 #include <core/workers.h>
															
 
																 #include <drivers/cuda/driver_cuda.h>
															
 
																-static struct starpu_driver_info driver_info = {
															
 
																+static struct starpu_driver_info driver_info =
															
 
																+{
															
 
																 	.name_upper = "CUDA",
															
 
																 	.name_var = "CUDA",
															
 
																 	.name_lower = "cuda",
															
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
																 	.alpha = 13.33f,
															
 
																 };
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "CUDA",
															
 
																 	.worker_archtype = STARPU_CUDA_WORKER,
															
 
																 };
															
--- a/src/drivers/disk/driver_disk.c
+++ b/src/drivers/disk/driver_disk.c
@@ -23,7 +23,8 @@
 
																 #include <datawizard/coherency.h>
															
 
																 #include <datawizard/memory_nodes.h>
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "Disk",
															
 
																 	.worker_archtype = (enum starpu_worker_archtype) -1,
															
 
																 };
															
--- a/src/drivers/mic/driver_mic_init.c
+++ b/src/drivers/mic/driver_mic_init.c
@@ -17,7 +17,8 @@
 
																 #include <core/workers.h>
															
 
																 #include <drivers/mic/driver_mic_source.h>
															
 
																-static struct starpu_driver_info driver_info = {
															
 
																+static struct starpu_driver_info driver_info =
															
 
																+{
															
 
																 	.name_upper = "MIC",
															
 
																 	.name_var = "MIC",
															
 
																 	.name_lower = "mic",
															
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
																 	.alpha = 0.5f,
															
 
																 };
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "MIC",
															
 
																 	.worker_archtype = STARPU_MIC_WORKER,
															
 
																 };
															
--- a/src/drivers/mpi/driver_mpi_init.c
+++ b/src/drivers/mpi/driver_mpi_init.c
@@ -17,7 +17,8 @@
 
																 #include <core/workers.h>
															
 
																 #include <drivers/mpi/driver_mpi_source.h>
															
 
																-static struct starpu_driver_info driver_info = {
															
 
																+static struct starpu_driver_info driver_info =
															
 
																+{
															
 
																 	.name_upper = "MPI_MS",
															
 
																 	.name_var = "MPI_MS",
															
 
																 	.name_lower = "mpi_ms",
															
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
																 	.alpha = 1.0f,
															
 
																 };
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "MPI_MS",
															
 
																 	.worker_archtype = STARPU_MPI_MS_WORKER,
															
 
																 };
															
--- a/src/drivers/opencl/driver_opencl_init.c
+++ b/src/drivers/opencl/driver_opencl_init.c
@@ -17,7 +17,8 @@
 
																 #include <core/workers.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-static struct starpu_driver_info driver_info = {
															
 
																+static struct starpu_driver_info driver_info =
															
 
																+{
															
 
																 	.name_upper = "OpenCL",
															
 
																 	.name_var = "OPENCL",
															
 
																 	.name_lower = "opencl",
															
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 
																 	.alpha = 12.22f,
															
 
																 };
															
 
																-static struct starpu_memory_driver_info memory_driver_info = {
															
 
																+static struct starpu_memory_driver_info memory_driver_info =
															
 
																+{
															
 
																 	.name_upper = "OpenCL",
															
 
																 	.worker_archtype = STARPU_OPENCL_WORKER,
															
 
																 };
															
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -201,11 +201,13 @@ void _starpu_profiling_papi_task_start_counters(struct starpu_task *task)
 
																 		for(i=0; i<papi_nevents; i++)
															
 
																 		{
															
 
																 			int ret = PAPI_add_event(profiling_info->papi_event_set, papi_events[i]);
															
 
																+#ifdef PAPI_ECMP_DISABLED
															
 
																 			if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable)
															
 
																 			{
															
 
																 				_STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n");
															
 
																 				warned_component_unavailable = 1;
															
 
																 			}
															
 
																+#endif
															
 
																 			profiling_info->papi_values[i]=0;
															
 
																 		}
															
 
																 		PAPI_reset(profiling_info->papi_event_set);
															
--- a/src/sched_policies/component_heteroprio.c
+++ b/src/sched_policies/component_heteroprio.c
@@ -434,9 +434,12 @@ static int heteroprio_push_task(struct starpu_sched_component * component, struc
 
																 			/* Didn't find it, add one */
															
 
																 			data->naccel++;
															
 
																-			float *newaccel = malloc(data->naccel * sizeof(*newaccel));
															
 
																-			struct _starpu_prio_deque **newbuckets = malloc(data->naccel * sizeof(*newbuckets));
															
 
																-			struct _starpu_prio_deque *newbucket = malloc(sizeof(*newbucket));
															
 
																+			float *newaccel;
															
 
																+			_STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel));
															
 
																+			struct _starpu_prio_deque **newbuckets;
															
 
																+			_STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets));
															
 
																+			struct _starpu_prio_deque *newbucket;
															
 
																+			_STARPU_MALLOC(newbucket, sizeof(*newbucket));
															
 
																 			_starpu_prio_deque_init(newbucket);
															
 
																 			int inserted = 0;
															
--- a/src/sched_policies/component_worker.c
+++ b/src/sched_policies/component_worker.c
@@ -510,11 +510,11 @@ static double simple_worker_estimated_load(struct starpu_sched_component * compo
 
																 	struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
															
 
																 	int nb_task = 0;
															
 
																 	STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
															
 
																-	struct starpu_task_list list = worker->local_tasks;
															
 
																+	struct starpu_task_prio_list *list = &worker->local_tasks;
															
 
																 	struct starpu_task * task;
															
 
																-	for(task = starpu_task_list_front(&list);
															
 
																-	    task != starpu_task_list_end(&list);
															
 
																-	    task = starpu_task_list_next(task))
															
 
																+	for(task = starpu_task_prio_list_begin(list);
															
 
																+	    task != starpu_task_prio_list_end(list);
															
 
																+	    task = starpu_task_prio_list_next(list, task))
															
 
																 		nb_task++;
															
 
																 	STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
															
 
																 	struct _starpu_worker_component_data * d = component->data;
															
--- a/src/sched_policies/helper_mct.c
+++ b/src/sched_policies/helper_mct.c
@@ -88,6 +88,11 @@ static double compute_expected_time(double now, double predicted_end, double pre
 
																 double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
															
 
																 {
															
 
																+	if(isnan(local_energy))
															
 
																+		/* Energy not calibrated yet, but we cannot do this
															
 
																+		 * automatically anyway, so ignoring this for now */
															
 
																+		local_energy = 0.;
															
 
																+
															
 
																 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
															
 
																 	/* min_exp_end_of_task is the minimum end time of the task over all workers */
															
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -145,7 +145,8 @@ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsi
 
																 		if (!ws->per_worker[workerids[worker]].notask)
															
 
																 		{
															
 
																 			if (ws->per_worker[workerids[worker]].busy
															
 
																-						   || starpu_worker_is_blocked_in_parallel(workerids[worker])) {
															
 
																+			    || starpu_worker_is_blocked_in_parallel(workerids[worker]))
															
 
																+			{
															
 
																 				ntasks = 1;
															
 
																 				break;
															
 
																 			}
															
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -86,7 +86,7 @@ void mp_cpy_kernel(void *descr[], void *cl_arg)
 
																 	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
															
 
																 	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
															
 
																-	
															
 
																+
															
 
																 	void *dst_interface = descr[0];
															
 
																 	void *src_interface = descr[1];
															
@@ -151,7 +151,7 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
																 	unsigned *interface_id;
															
 
																 	_STARPU_MALLOC(interface_id, sizeof(*interface_id));
															
 
																-	*interface_id = dst_handle->ops->interfaceid; 
															
 
																+	*interface_id = dst_handle->ops->interfaceid;
															
 
																 	task->cl_arg = interface_id;
															
 
																 	task->cl_arg_size = sizeof(*interface_id);
															
 
																 	task->cl_arg_free = 1;
															
@@ -181,7 +181,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 
																 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
															
 
																 {
															
 
																 	_starpu_spin_lock(&src_handle->header_lock);
															
 
																-	if (src_handle->readonly_dup) {
															
 
																+	if (src_handle->readonly_dup)
															
 
																+	{
															
 
																 		/* Already a ro duplicate, just return it with one more ref */
															
 
																 		*dst_handle = src_handle->readonly_dup;
															
 
																 		_starpu_spin_unlock(&src_handle->header_lock);
															
@@ -190,7 +191,8 @@ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t sr
 
																 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
															
 
																 		return 0;
															
 
																 	}
															
 
																-	if (src_handle->readonly) {
															
 
																+	if (src_handle->readonly)
															
 
																+	{
															
 
																 		src_handle->aliases++;
															
 
																 		_starpu_spin_unlock(&src_handle->header_lock);
															
 
																 		*dst_handle = src_handle;
															
--- a/starpu-1.0-mic.pc.in
+++ b/starpu-1.0-mic.pc.in
@@ -23,7 +23,7 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
 
																 Requires.private: @GORDON_REQUIRES@
															
--- a/starpu-1.0.pc.in
+++ b/starpu-1.0.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/starpu-1.1.pc.in
+++ b/starpu-1.1.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/starpu-1.2.pc.in
+++ b/starpu-1.2.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/starpu-1.3.pc.in
+++ b/starpu-1.3.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
															
 
																-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/starpufft/src/starpufft-double.h
+++ b/starpufft/src/starpufft-double.h
@@ -25,8 +25,8 @@
 
																 #include <cufft.h>
															
 
																 #endif
															
 
																-#undef  FLOAT
															
 
																-#define DOUBLE
															
 
																+#undef  STARPUFFT_FLOAT
															
 
																+#define STARPUFFT_DOUBLE
															
 
																 typedef double real;
															
 
																 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
															
--- a/starpufft/src/starpufft-float.h
+++ b/starpufft/src/starpufft-float.h
@@ -25,8 +25,8 @@
 
																 #include <cufft.h>
															
 
																 #endif
															
 
																-#undef  DOUBLE
															
 
																-#define FLOAT
															
 
																+#undef  STARPUFFT_DOUBLE
															
 
																+#define STARPUFFT_FLOAT
															
 
																 typedef float real;
															
 
																 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 
															
--- a/starpufft/src/starpufftx.c
+++ b/starpufft/src/starpufftx.c
@@ -28,7 +28,7 @@
 
																 #define _externC extern
															
 
																 #include "cudax_kernels.h"
															
 
																-#if defined(FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
															
 
																+#if defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
															
 
																 #  define __STARPU_USE_CUDA
															
 
																 #else
															
 
																 #  undef __STARPU_USE_CUDA
															
@@ -172,7 +172,7 @@ compute_roots(STARPUFFT(plan) plan)
 
																 }
															
 
																 /* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
															
 
																-#ifdef DOUBLE
															
 
																+#ifdef STARPUFFT_DOUBLE
															
 
																 static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) {
															
 
																 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
															
 
																 		return 1;
															
--- a/starpupy/Makefile.am
+++ b/starpupy/Makefile.am
@@ -0,0 +1,21 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+include $(top_srcdir)/starpu-subdirtests.mk
															
 
																+
															
 
																+SUBDIRS  = src
															
 
																+SUBDIRS += examples
															
 
																+
															
--- a/starpupy/examples/Makefile.am
+++ b/starpupy/examples/Makefile.am
@@ -0,0 +1,43 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+include $(top_srcdir)/starpu.mk
															
 
																+
															
 
																+SUBDIRS =
															
 
																+
															
 
																+CLEANFILES = *.gcno *.gcda *.linkinfo
															
 
																+
															
 
																+TESTS	=
															
 
																+TESTS	+=	starpu_py.sh
															
 
																+TESTS	+=	starpu_py_parallel.sh
															
 
																+
															
 
																+if STARPU_STARPUPY_NUMPY
															
 
																+TESTS	+=	starpu_py_np.sh
															
 
																+endif
															
 
																+
															
 
																+EXTRA_DIST	=		\
															
 
																+	starpu_py_parallel.py	\
															
 
																+	starpu_py_parallel.sh	\
															
 
																+	starpu_py.py		\
															
 
																+	starpu_py.sh		\
															
 
																+	starpu_py_np.py		\
															
 
																+	starpu_py_np.sh
															
 
																+
															
 
																+python_sourcesdir = $(libdir)/starpu/python
															
 
																+dist_python_sources_DATA	=	\
															
 
																+	starpu_py_parallel.py	\
															
 
																+	starpu_py.py
															
 
																+
															
--- a/starpupy/examples/execute.sh.in
+++ b/starpupy/examples/execute.sh.in
@@ -0,0 +1,59 @@
 
																+#!@REALBASH@
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+exampledir=@STARPU_SRC_DIR@/starpupy/examples
															
 
																+
															
 
																+modpath=@STARPU_BUILD_DIR@/src/.libs:
															
 
																+pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH
															
 
																+
															
 
																+valgrind=""
															
 
																+gdb=""
															
 
																+if test "$1" == "--valgrind"
															
 
																+then
															
 
																+    valgrind=1
															
 
																+    shift
															
 
																+fi
															
 
																+if test "$1" == "--gdb"
															
 
																+then
															
 
																+    gdb=1
															
 
																+    shift
															
 
																+fi
															
 
																+
															
 
																+examplefile=$1
															
 
																+if test -f $examplefile
															
 
																+then
															
 
																+    pythonscript=$examplefile
															
 
																+elif test -f $exampledir/$examplefile
															
 
																+then
															
 
																+    pythonscript=$exampledir/$examplefile
															
 
																+else
															
 
																+    echo "Error. Python script $examplefile not found in current directory or in $exampledir"
															
 
																+    exit 1
															
 
																+fi
															
 
																+shift
															
 
																+
															
 
																+set -x
															
 
																+if test "$valgrind" == "1"
															
 
																+then
															
 
																+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath PYTHONMALLOC=malloc valgrind --track-origins=yes @PYTHON@ $pythonscript $*
															
 
																+elif test "$gdb" == "1"
															
 
																+then
															
 
																+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath gdb --args @PYTHON@ $pythonscript $*
															
 
																+else
															
 
																+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath @PYTHON@ $pythonscript $*
															
 
																+fi
															
 
																+
															
--- a/starpupy/tests/starpu_py.py
+++ b/starpupy/tests/starpu_py.py
@@ -73,7 +73,7 @@ def sub(a,b,c):
 
																 ###############################################################################
															
 
																 #using decorator wrap the function with input
															
 
																-@starpu.delayed
															
 
																+@starpu.delayed(name="test")
															
 
																 def add_deco(a,b,c):
															
 
																 	#time.sleep(1)
															
 
																 	print ("Example 8:")
															
@@ -83,7 +83,7 @@ def add_deco(a,b,c):
 
																 ###############################################################################
															
 
																 #using decorator wrap the function with input
															
 
																-@starpu.delayed
															
 
																+@starpu.delayed(color=1)
															
 
																 def sub_deco(x,a):
															
 
																 	print ("Example 9:")
															
 
																 	print ("This is a function with input and output wrapped by the decorator function:")
															
@@ -93,34 +93,34 @@ def sub_deco(x,a):
 
																 async def main():
															
 
																 	#submit function "hello"
															
 
																-    fut = starpu.task_submit(hello)
															
 
																+    fut = starpu.task_submit()(hello)
															
 
																     await fut
															
 
																     #submit function "func1"
															
 
																-    fut1 = starpu.task_submit(func1)
															
 
																+    fut1 = starpu.task_submit()(func1)
															
 
																     await fut1
															
 
																     #apply starpu.delayed(func1_deco())
															
 
																     await func1_deco()
															
 
																 	#submit function "func2"
															
 
																-    fut2 = starpu.task_submit(func2)
															
 
																+    fut2 = starpu.task_submit()(func2)
															
 
																     res2 = await fut2
															
 
																 	#print the result of function
															
 
																     print("This is a function no input and the return value is", res2)
															
 
																     #submit function "multi"
															
 
																-    fut3 = starpu.task_submit(multi, 2, 3)
															
 
																+    fut3 = starpu.task_submit()(multi, 2, 3)
															
 
																     res3 = await fut3
															
 
																     print("The result of function multi is :", res3)
															
 
																 	#submit function "add"
															
 
																-    fut4 = starpu.task_submit(add, 1.2, 2.5, 3.6, 4.9)
															
 
																+    fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9)
															
 
																     res4 = await fut4
															
 
																     print("The result of function add is :", res4)
															
 
																 	#submit function "sub"
															
 
																-    fut5 = starpu.task_submit(sub, 6, 2, 5.9)
															
 
																+    fut5 = starpu.task_submit()(sub, 6, 2, 5.9)
															
 
																     res5 = await fut5
															
 
																     print("The result of function sub is:", res5)
															
@@ -138,4 +138,4 @@ async def main():
 
																 asyncio.run(main())
															
 
																-#starpu.task_wait_for_all()
															
 
																+#starpu.task_wait_for_all()
															
--- a/starpupy/examples/starpu_py.sh
+++ b/starpupy/examples/starpu_py.sh
@@ -0,0 +1,19 @@
 
																+#!/bin/bash
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+$(dirname $0)/execute.sh starpu_py.py $*
															
 
																+
															
--- a/starpupy/examples/starpu_py_np.py
+++ b/starpupy/examples/starpu_py_np.py
@@ -0,0 +1,40 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+import starpu
															
 
																+import asyncio
															
 
																+import numpy as np
															
 
																+
															
 
																+
															
 
																+###############################################################################
															
 
																+
															
 
																+def scal(a, t):
															
 
																+	for i in range(len(t)):
															
 
																+		t[i]=t[i]*a
															
 
																+	return t
															
 
																+
															
 
																+t=np.array([1,2,3,4,5,6,7,8,9,10])
															
 
																+
															
 
																+async def main():
															
 
																+    fut8 = starpu.task_submit()(scal, 2, t)
															
 
																+    res8 = await fut8
															
 
																+    print("The result of Example 10 is", res8)
															
 
																+    print("The return array is", t)
															
 
																+    #print("The result type is", type(res8))
															
 
																+
															
 
																+asyncio.run(main())
															
 
																+
															
 
																+
															
 
																+#starpu.task_wait_for_all()
															
--- a/starpupy/src/starpu/delay.py
+++ b/starpupy/src/starpu/delay.py
@@ -1,3 +1,4 @@
 
																+#!/bin/bash
															
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																 # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
@@ -13,11 +14,6 @@
 
																 #
															
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																 #
															
 
																-from starpu import starpupy
															
 
																-import asyncio
															
 
																-def delayed(f):
															
 
																-	def submit(*args,**kwargs):
															
 
																-		fut = starpupy.task_submit(f, *args,**kwargs)
															
 
																-		return fut
															
 
																-	return submit
															
 
																+$(dirname $0)/execute.sh starpu_py_np.py $*
															
 
																+
															
--- a/starpupy/examples/starpu_py_parallel.py
+++ b/starpupy/examples/starpu_py_parallel.py
@@ -0,0 +1,350 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+import starpu
															
 
																+import starpu.joblib
															
 
																+import time
															
 
																+import asyncio
															
 
																+from math import sqrt
															
 
																+from math import log10
															
 
																+import numpy as np
															
 
																+import sys
															
 
																+
															
 
																+#generate a list to store functions
															
 
																+g_func=[]
															
 
																+
															
 
																+#function no input no output print hello world
															
 
																+def hello():
															
 
																+	print ("Example 1: Hello, world!")
															
 
																+g_func.append(starpu.joblib.delayed(hello)())
															
 
																+
															
 
																+#function no input no output
															
 
																+def func1():
															
 
																+	print ("Example 2: This is a function no input no output")
															
 
																+g_func.append(starpu.joblib.delayed(func1)())
															
 
																+
															
 
																+#function no input return a value
															
 
																+def func2():
															
 
																+	print ("Example 3:")
															
 
																+	return 12
															
 
																+g_func.append(starpu.joblib.delayed(func2)())
															
 
																+
															
 
																+#function has 2 int inputs and 1 int output
															
 
																+def exp(a,b):
															
 
																+	res_exp=a**b
															
 
																+	print("Example 4: The result of ",a,"^",b,"is",res_exp)
															
 
																+	return res_exp
															
 
																+g_func.append(starpu.joblib.delayed(exp)(2, 3))
															
 
																+
															
 
																+#function has 4 float inputs and 1 float output
															
 
																+def add(a,b,c,d):
															
 
																+	res_add=a+b+c+d
															
 
																+	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
															
 
																+	return res_add
															
 
																+g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
															
 
																+
															
 
																+#function has 2 int inputs 1 float input and 1 float output 1 int output
															
 
																+def sub(a,b,c):
															
 
																+	res_sub1=a-b-c
															
 
																+	res_sub2=a-b
															
 
																+	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
															
 
																+	return res_sub1, res_sub2
															
 
																+g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
															
 
																+
															
 
																+##########functions of array calculation###############
															
 
																+
															
 
																+def scal(a, t):
															
 
																+	for i in range(len(t)):
															
 
																+		t[i]=t[i]*a
															
 
																+	return t
															
 
																+
															
 
																+def add_scal(a, t1, t2):
															
 
																+	for i in range(len(t1)):
															
 
																+		t1[i]=t1[i]*a+t2[i]
															
 
																+	return t1
															
 
																+
															
 
																+def scal_arr(a, t):
															
 
																+	for i in range(len(t)):
															
 
																+		t[i]=t[i]*a[i]
															
 
																+	return t
															
 
																+
															
 
																+def multi(a,b):
															
 
																+	res_multi=a*b
															
 
																+	return res_multi
															
 
																+
															
 
																+def multi_2arr(a, b):
															
 
																+        for i in range(len(a)):
															
 
																+                a[i]=a[i]*b[i]
															
 
																+        return a
															
 
																+
															
 
																+def multi_list(l):
															
 
																+	res = []
															
 
																+	for (a,b) in l:
															
 
																+		res.append(a*b)
															
 
																+	return res
															
 
																+
															
 
																+def log10_arr(t):
															
 
																+	for i in range(len(t)):
															
 
																+		t[i]=log10(t[i])
															
 
																+	return t
															
 
																+########################################################
															
 
																+
															
 
																+#################scikit test###################
															
 
																+# DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__
															
 
																+# class MyBackend(DEFAULT_JOBLIB_BACKEND):  # type: ignore
															
 
																+#         def __init__(self, *args, **kwargs):
															
 
																+#                 self.count = 0
															
 
																+#                 super().__init__(*args, **kwargs)
															
 
																+
															
 
																+#         def start_call(self):
															
 
																+#                 self.count += 1
															
 
																+#                 return super().start_call()
															
 
																+
															
 
																+# starpu.joblib.register_parallel_backend('testing', MyBackend)
															
 
																+
															
 
																+# with starpu.joblib.parallel_backend("testing") as (ba, n_jobs):
															
 
																+# 	print("backend and n_jobs is", ba, n_jobs)
															
 
																+###############################################
															
 
																+
															
 
																+N=100
															
 
																+# A=np.arange(N)
															
 
																+# B=np.arange(N)
															
 
																+# a=np.arange(N)
															
 
																+# b=np.arange(N, 2*N, 1)
															
 
																+
															
 
																+displayPlot=False
															
 
																+listX=[10, 100, 1000, 10000]
															
 
																+for arg in sys.argv[1:]:
															
 
																+        if arg == "-long":
															
 
																+                listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000]
															
 
																+        if arg == "-plot":
															
 
																+                displayPlot=True
															
 
																+
															
 
																+for x in listX:
															
 
																+	for X in range(x, x*10, x):
															
 
																+		print("X=",X)
															
 
																+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X))
															
 
																+		A=np.arange(1,X+1,1)
															
 
																+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A))
															
 
																+
															
 
																+print("************************")
															
 
																+print("parallel Normal version:")
															
 
																+print("************************")
															
 
																+print("--(sqrt)(i**2)for i in range(N)")
															
 
																+start_exec1=time.time()
															
 
																+start_cpu1=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
															
 
																+end_exec1=time.time()
															
 
																+end_cpu1=time.process_time()
															
 
																+print("the program execution time is", end_exec1-start_exec1)
															
 
																+print("the cpu execution time is", end_cpu1-start_cpu1)
															
 
																+
															
 
																+print("--(multi)(i,j) for i,j in zip(a,b)")
															
 
																+a=np.arange(N)
															
 
																+b=np.arange(N, 2*N, 1)
															
 
																+start_exec2=time.time()
															
 
																+start_cpu2=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
															
 
																+end_exec2=time.time()
															
 
																+end_cpu2=time.process_time()
															
 
																+print("the program execution time is", end_exec2-start_exec2)
															
 
																+print("the cpu execution time is", end_cpu2-start_cpu2)
															
 
																+
															
 
																+print("--(scal_arr)((i for i in b), A)")
															
 
																+A=np.arange(N)
															
 
																+b=np.arange(N, 2*N, 1)
															
 
																+start_exec3=time.time()
															
 
																+start_cpu3=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
															
 
																+end_exec3=time.time()
															
 
																+end_cpu3=time.process_time()
															
 
																+print("the program execution time is", end_exec3-start_exec3)
															
 
																+print("the cpu execution time is", end_cpu3-start_cpu3)
															
 
																+
															
 
																+print("--(multi_list)((i,j) for i,j in zip(a,b))")
															
 
																+a=np.arange(N)
															
 
																+b=np.arange(N, 2*N, 1)
															
 
																+start_exec4=time.time()
															
 
																+start_cpu4=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
															
 
																+end_exec4=time.time()
															
 
																+end_cpu4=time.process_time()
															
 
																+print("the program execution time is", end_exec4-start_exec4)
															
 
																+print("the cpu execution time is", end_cpu4-start_cpu4)
															
 
																+
															
 
																+print("--(multi_2arr)((i for i in a), (j for j in b))")
															
 
																+a=np.arange(N)
															
 
																+b=np.arange(N, 2*N, 1)
															
 
																+start_exec5=time.time()
															
 
																+start_cpu5=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
															
 
																+end_exec5=time.time()
															
 
																+end_cpu5=time.process_time()
															
 
																+print("the program execution time is", end_exec5-start_exec5)
															
 
																+print("the cpu execution time is", end_cpu5-start_cpu5)
															
 
																+
															
 
																+print("--(multi_2arr)(A, B)")
															
 
																+# A=np.arange(N)
															
 
																+# B=np.arange(N, 2*N, 1)
															
 
																+n, m = 4, 5
															
 
																+A = np.arange(n*m).reshape(n, m)
															
 
																+B = np.arange(n*m, 2*n*m, 1).reshape(n, m)
															
 
																+print("The input arrays are A", A, "B", B)
															
 
																+start_exec6=time.time()
															
 
																+start_cpu6=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(A, B))
															
 
																+end_exec6=time.time()
															
 
																+end_cpu6=time.process_time()
															
 
																+print("the program execution time is", end_exec6-start_exec6)
															
 
																+print("the cpu execution time is", end_cpu6-start_cpu6)
															
 
																+print("The return arrays are A", A, "B", B)
															
 
																+
															
 
																+print("--(scal)(2, t=(j for j in a))")
															
 
																+a=np.arange(N)
															
 
																+start_exec7=time.time()
															
 
																+start_cpu7=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a)))
															
 
																+end_exec7=time.time()
															
 
																+end_cpu7=time.process_time()
															
 
																+print("the program execution time is", end_exec7-start_exec7)
															
 
																+print("the cpu execution time is", end_cpu7-start_cpu7)
															
 
																+
															
 
																+print("--(scal)(2,A)")
															
 
																+A=np.arange(N)
															
 
																+print("The input array is", A)
															
 
																+start_exec8=time.time()
															
 
																+start_cpu8=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,A))
															
 
																+end_exec8=time.time()
															
 
																+end_cpu8=time.process_time()
															
 
																+print("the program execution time is", end_exec8-start_exec8)
															
 
																+print("the cpu execution time is", end_cpu8-start_cpu8)
															
 
																+print("The return array is", A)
															
 
																+
															
 
																+print("--(add_scal)(t1=A,t2=B,a=2)")
															
 
																+A=np.arange(N)
															
 
																+B=np.arange(N)
															
 
																+print("The input arrays are A", A, "B", B)
															
 
																+start_exec9=time.time()
															
 
																+start_cpu9=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2))
															
 
																+end_exec9=time.time()
															
 
																+end_cpu9=time.process_time()
															
 
																+print("the program execution time is", end_exec9-start_exec9)
															
 
																+print("the cpu execution time is", end_cpu9-start_cpu9)
															
 
																+print("The return arrays are A", A, "B", B)
															
 
																+
															
 
																+
															
 
																+print("--input is iterable function list")
															
 
																+start_exec10=time.time()
															
 
																+start_cpu10=time.process_time()
															
 
																+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func)
															
 
																+end_exec10=time.time()
															
 
																+end_cpu10=time.process_time()
															
 
																+print("the program execution time is", end_exec10-start_exec10)
															
 
																+print("the cpu execution time is", end_cpu10-start_cpu10)
															
 
																+
															
 
																+# def producer():
															
 
																+# 	for i in range(6):
															
 
																+# 		print('Produced %s' % i)
															
 
																+# 		yield i
															
 
																+#starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer())
															
 
																+
															
 
																+print("************************")
															
 
																+print("parallel Future version:")
															
 
																+print("************************")
															
 
																+async def main():
															
 
																+
															
 
																+	print("--(sqrt)(i**2)for i in range(N)")
															
 
																+	fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
															
 
																+	res1=await fut1
															
 
																+	#print(res1)
															
 
																+
															
 
																+	print("--(multi)(i,j) for i,j in zip(a,b)")
															
 
																+	a=np.arange(N)
															
 
																+	b=np.arange(N, 2*N, 1)
															
 
																+	fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
															
 
																+	res2=await fut2
															
 
																+	#print(res2)
															
 
																+
															
 
																+	print("--(scal_arr)((i for i in b), A)")
															
 
																+	A=np.arange(N)
															
 
																+	b=np.arange(N, 2*N, 1)
															
 
																+	fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
															
 
																+	res3=await fut3
															
 
																+	#print(res3)
															
 
																+
															
 
																+	print("--(multi_list)((i,j) for i,j in zip(a,b))")
															
 
																+	a=np.arange(N)
															
 
																+	b=np.arange(N, 2*N, 1)
															
 
																+	fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
															
 
																+	res4=await fut4
															
 
																+	#print(res4)
															
 
																+
															
 
																+	print("--(multi_2arr)((i for i in a), (j for j in b))")
															
 
																+	a=np.arange(N)
															
 
																+	b=np.arange(N, 2*N, 1)
															
 
																+	fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
															
 
																+	res5=await fut5
															
 
																+	#print(res5)
															
 
																+
															
 
																+	print("--(multi_2arr)(b=B, a=A)")
															
 
																+	A=np.arange(N)
															
 
																+	B=np.arange(N, 2*N, 1)
															
 
																+	print("The input arrays are A", A, "B", B)
															
 
																+	fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(b=B, a=A))
															
 
																+	res6=await fut6
															
 
																+	print("The return arrays are A", A, "B", B)
															
 
																+
															
 
																+
															
 
																+	print("--(scal)(2, (j for j in a))")
															
 
																+	a=np.arange(N)
															
 
																+	fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a)))
															
 
																+	res7=await fut7
															
 
																+	#print(res6)
															
 
																+
															
 
																+	print("--(scal)(2,t=A)")
															
 
																+	A=np.arange(N)
															
 
																+	print("The input array is", A)
															
 
																+	fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,t=A))
															
 
																+	res8=await fut8
															
 
																+	print("The return array is", A)
															
 
																+
															
 
																+	print("--(scal)(2,A,B)")
															
 
																+	A=np.arange(N)
															
 
																+	B=np.arange(N)
															
 
																+	print("The input arrays are A", A, "B", B)
															
 
																+	fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B))
															
 
																+	res9=await fut9
															
 
																+	print("The return arrays are A", A, "B", B)
															
 
																+
															
 
																+	print("--input is iterable function list")
															
 
																+	fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1)(g_func)
															
 
																+	res10=await fut10
															
 
																+	#print(res9)
															
 
																+
															
 
																+asyncio.run(main())
															
 
																+
															
 
																+starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="multi",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="scal",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="func",view=displayPlot)
															
 
																+
															
 
																+starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot)
															
 
																+starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot)
															
--- a/starpupy/examples/starpu_py_parallel.sh
+++ b/starpupy/examples/starpu_py_parallel.sh
@@ -0,0 +1,19 @@
 
																+#!/bin/bash
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+STARPU_CALIBRATE=1 $(dirname $0)/execute.sh starpu_py_parallel.py $*
															
 
																+
															
--- a/starpupy/src/Makefile.am
+++ b/starpupy/src/Makefile.am
@@ -0,0 +1,63 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+include $(top_srcdir)/starpu-notests.mk
															
 
																+
															
 
																+SUBDIRS =
															
 
																+
															
 
																+PYTHON_PY_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*py)
															
 
																+PYTHON_PY_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC)))
															
 
																+
															
 
																+PYTHON_C_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*c)
															
 
																+PYTHON_C_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC)))
															
 
																+
															
 
																+$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py
															
 
																+	$(MKDIR_P) starpu
															
 
																+	$(V_ln) $(LN_S) $< $@
															
 
																+$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c
															
 
																+	@$(MKDIR_P) starpu
															
 
																+	$(V_ln) $(LN_S) $< $@
															
 
																+
															
 
																+all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD)
															
 
																+	$(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS)
															
 
																+
															
 
																+install-exec-local:
															
 
																+	@if test -d $(prefix)/lib/python* ; \
															
 
																+	then	\
															
 
																+		chmod u+w $(prefix)/lib/python* ; \
															
 
																+		chmod u+w $(prefix)/lib/python*/site-packages ; \
															
 
																+	fi
															
 
																+	$(PYTHON) setup.py install
															
 
																+
															
 
																+if STARPU_BUILD_STARPUPY
															
 
																+clean-local:
															
 
																+	$(PYTHON) setup.py clean
															
 
																+	rm -f starpu/*py starpu/*c
															
 
																+endif
															
 
																+
															
 
																+distclean-local:
															
 
																+	rm -rf build
															
 
																+
															
 
																+uninstall-local:
															
 
																+	rm -rf $(prefix)/lib/python*/site-packages/starpu*
															
 
																+	rm -rf $(prefix)/lib/python*/site-packages/tmp/starpu*
															
 
																+
															
 
																+EXTRA_DIST	=		\
															
 
																+	delay.py		\
															
 
																+	__init__.py	\
															
 
																+	intermedia.py	\
															
 
																+	joblib.py	\
															
 
																+	starpu_task_wrapper.c
															
--- a/starpupy/src/starpu/__init__.py
+++ b/starpupy/src/starpu/__init__.py
@@ -17,4 +17,5 @@
 
																 from.starpupy import *
															
 
																 from .delay import *
															
 
																-from . import joblib
															
 
																+#from . import joblib
															
 
																+from .intermedia import *
															
--- a/starpupy/src/delay.py
+++ b/starpupy/src/delay.py
@@ -0,0 +1,29 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+from starpu import starpupy
															
 
																+import starpu
															
 
																+import asyncio
															
 
																+from functools import partial
															
 
																+
															
 
																+def delayed(f=None,*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
															
 
																+	# add options of task_submit
															
 
																+	if f is None:
															
 
																+		return partial(delayed, name=name, synchronous=synchronous, priority=priority, color=color, flops=flops, perfmodel=perfmodel)
															
 
																+	def submit(*args):
															
 
																+		fut = starpu.task_submit(name=name, synchronous=synchronous, priority=priority,\
															
 
																+								 color=color, flops=flops, perfmodel=perfmodel)(f, *args)
															
 
																+		return fut
															
 
																+	return submit
															
--- a/starpupy/src/intermedia.py
+++ b/starpupy/src/intermedia.py
@@ -0,0 +1,63 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+from starpu import starpupy
															
 
																+import os
															
 
																+
															
 
																+#class perfmodel
															
 
																+class Perfmodel(object):
															
 
																+	def __init__(self, symbol):
															
 
																+		self.symbol=symbol
															
 
																+		self.pstruct=starpupy.init_perfmodel(self.symbol)
															
 
																+
															
 
																+	def get_struct(self):
															
 
																+		return self.pstruct
															
 
																+
															
 
																+	def __del__(self):
															
 
																+	#def free_struct(self):
															
 
																+		starpupy.free_perfmodel(self.pstruct)
															
 
																+
															
 
																+# generate the dictionary which contains the perfmodel symbol and its struct pointer
															
 
																+dict_perf={}
															
 
																+def dict_perf_generator(perfsymbol):
															
 
																+	if dict_perf.get(perfsymbol)==None:
															
 
																+		p=Perfmodel(perfsymbol)
															
 
																+		dict_perf[perfsymbol]=p
															
 
																+	else:
															
 
																+		p=dict_perf[perfsymbol]
															
 
																+	return p
															
 
																+
															
 
																+#add options in function task_submit
															
 
																+def task_submit(*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
															
 
																+	if perfmodel==None:
															
 
																+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': None}
															
 
																+	else:
															
 
																+		p=dict_perf_generator(perfmodel)
															
 
																+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': p.get_struct()}
															
 
																+
															
 
																+	def call_task_submit(f, *args):
															
 
																+		fut=starpupy._task_submit(f, *args, dict_option)
															
 
																+		return fut
															
 
																+	return call_task_submit
															
 
																+
															
 
																+# dump performance model and show the plot
															
 
																+def perfmodel_plot(perfmodel, view=True):
															
 
																+	p=dict_perf[perfmodel]
															
 
																+	starpupy.save_history_based_model(p.get_struct())
															
 
																+	if view == True:
															
 
																+		os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
															
 
																+		os.system('gnuplot starpu_'+perfmodel+'.gp')
															
 
																+		os.system('gv starpu_'+perfmodel+'.eps')
															
--- a/starpupy/src/joblib.py
+++ b/starpupy/src/joblib.py
@@ -0,0 +1,324 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+import sys
															
 
																+import types
															
 
																+import joblib as jl
															
 
																+from joblib import logger
															
 
																+from joblib._parallel_backends import ParallelBackendBase
															
 
																+from starpu import starpupy
															
 
																+import starpu
															
 
																+import asyncio
															
 
																+import math
															
 
																+import functools
															
 
																+import numpy as np
															
 
																+import inspect
															
 
																+import threading
															
 
																+
															
 
																+BACKENDS={
															
 
																+	#'loky': LokyBackend,
															
 
																+}
															
 
																+_backend = threading.local()
															
 
																+
															
 
																+# get the number of CPUs controlled by StarPU
															
 
																+def cpu_count():
															
 
																+	n_cpus=starpupy.cpu_worker_get_count()
															
 
																+	return n_cpus
															
 
																+
															
 
																+# split a list ls into n_block numbers of sub-lists 
															
 
																+def partition(ls, n_block):
															
 
																+	if len(ls)>=n_block:
															
 
																+		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
															
 
																+		q1=math.ceil(len(ls)/n_block)
															
 
																+		q2=math.floor(len(ls)/n_block)
															
 
																+		n1=len(ls)%n_block
															
 
																+		#n2=n_block-n1
															
 
																+		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
															
 
																+		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
															
 
																+		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
															
 
																+
															
 
																+		L=L1+L2
															
 
																+	else:
															
 
																+		# if the block number is larger than the length of list, each element in the list is a sub-list
															
 
																+		L=[ls[i:i+1] for i in range (len(ls))]
															
 
																+	return L
															
 
																+
															
 
																+# split a two-dimension numpy matrix into n_block numbers of sub-matrices
															
 
																+def array2d_split(a, n_block):
															
 
																+	# decompose number of n_jobs to two integers multiply
															
 
																+	c_tmp=math.floor(math.sqrt(n_block))
															
 
																+	for i in range (c_tmp,0,-1):
															
 
																+		if n_block%i==0:
															
 
																+			c=i
															
 
																+			r=int(n_block/c)
															
 
																+			break
															
 
																+	# split column
															
 
																+	arr_split_c=np.array_split(a,c,0)
															
 
																+	arr_split=[]
															
 
																+	# split row
															
 
																+	for i in range(c):
															
 
																+		arr_split_r=np.array_split(arr_split_c[i],r,1)
															
 
																+		for j in range(r):
															
 
																+			arr_split.append(arr_split_r[j])
															
 
																+	return arr_split
															
 
																+
															
 
																+
															
 
																+def future_generator(iterable, n_jobs, dict_task):
															
 
																+	# iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
															
 
																+	#print("iterable type is ", type(iterable))
															
 
																+	#print("iterable is", iterable)
															
 
																+	# get the number of block
															
 
																+	if n_jobs<-cpu_count()-1 or n_jobs>cpu_count():
															
 
																+		raise SystemExit('Error: n_jobs is out of range')
															
 
																+		#print("Error: n_jobs is out of range, number of CPUs is", cpu_count())
															
 
																+	elif n_jobs<0:
															
 
																+		n_block=cpu_count()+1+n_jobs
															
 
																+	else:
															
 
																+		n_block=n_jobs
															
 
																+
															
 
																+	# if arguments is tuple format
															
 
																+	if type(iterable) is tuple:
															
 
																+		# the function is always the first element
															
 
																+		f=iterable[0]
															
 
																+		# get the name of formal arguments of f
															
 
																+		formal_args=inspect.getargspec(f).args
															
 
																+		# get the arguments list
															
 
																+		args=[]
															
 
																+		# argument is arbitrary in iterable[1]
															
 
																+		args=list(iterable[1])
															
 
																+		# argument is keyword argument in iterable[2]
															
 
																+		for i in range(len(formal_args)):
															
 
																+			for j in iterable[2].keys():
															
 
																+				if j==formal_args[i]:
															
 
																+					args.append(iterable[2][j])
															
 
																+		# check whether all arrays have the same size
															
 
																+		l_arr=[]
															
 
																+		# list of Future result
															
 
																+		L_fut=[]
															
 
																+		# split the vector
															
 
																+		args_split=[]
															
 
																+		for i in range(len(args)):
															
 
																+			args_split.append([])
															
 
																+			# if the array is an numpy array
															
 
																+			if type(args[i]) is np.ndarray:
															
 
																+				# one-dimension matrix
															
 
																+				if args[i].ndim==1:
															
 
																+					# split numpy array
															
 
																+					args_split[i]=np.array_split(args[i],n_block)
															
 
																+					# get the length of numpy array
															
 
																+					l_arr.append(args[i].size)
															
 
																+				# two-dimension matrix
															
 
																+				elif args[i].ndim==2:
															
 
																+					# split numpy 2D array
															
 
																+					args_split[i]=array2d_split(args[i],n_block)
															
 
																+			# if the array is a generator
															
 
																+			elif isinstance(args[i],types.GeneratorType):
															
 
																+				# split generator
															
 
																+				args_split[i]=partition(list(args[i]),n_block)
															
 
																+				# get the length of generator
															
 
																+				l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i]))))
															
 
																+		if len(set(l_arr))>1:
															
 
																+			raise SystemExit('Error: all arrays should have the same size')
															
 
																+		#print("args list is", args_split)
															
 
																+		for i in range(n_block):
															
 
																+			# generate the argument list
															
 
																+			L_args=[]
															
 
																+			for j in range(len(args)):
															
 
																+				if type(args[j]) is np.ndarray or isinstance(args[j],types.GeneratorType):
															
 
																+					L_args.append(args_split[j][i])
															
 
																+				else:
															
 
																+					L_args.append(args[j])
															
 
																+			#print("L_args is", L_args)
															
 
																+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
															
 
																+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
															
 
																+				                  (f, *L_args)
															
 
																+			L_fut.append(fut)
															
 
																+		return L_fut
															
 
																+
															
 
																+	# if iterable is a generator or a list of function
															
 
																+	else:
															
 
																+		L=list(iterable)
															
 
																+		#print(L)
															
 
																+		# generate a list of function according to iterable
															
 
																+		def lf(ls):
															
 
																+			L_func=[]
															
 
																+			for i in range(len(ls)):
															
 
																+				# the first element is the function
															
 
																+				f=ls[i][0]
															
 
																+				# the second element is the args list of a type tuple
															
 
																+				L_args=list(ls[i][1])
															
 
																+				# generate a list of function
															
 
																+				L_func.append(f(*L_args))
															
 
																+			return L_func
															
 
																+
															
 
																+		# generate the split function list
															
 
																+		L_split=partition(L,n_block)
															
 
																+		# operation in each split list
															
 
																+		L_fut=[]
															
 
																+		for i in range(len(L_split)):
															
 
																+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
															
 
																+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
															
 
																+				                  (lf, L_split[i])
															
 
																+			L_fut.append(fut)
															
 
																+		return L_fut
															
 
																+
															
 
																+class Parallel(object):
															
 
																+	def __init__(self, mode="normal", perfmodel=None, end_msg=None,\
															
 
																+			 name=None, synchronous=0, priority=0, color=None, flops=None,\
															
 
																+	         n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
															
 
																+	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
															
 
																+	         mmap_mode='r', prefer=None, require=None):
															
 
																+		#active_backend= get_active_backend()
															
 
																+		# nesting_level = active_backend.nesting_level
															
 
																+
															
 
																+		# if backend is None:
															
 
																+		# 	backend = active_backend
															
 
																+
															
 
																+		# else:
															
 
																+		# 	try:
															
 
																+		# 		backend_factory = BACKENDS[backend]
															
 
																+		# 	except KeyError as e:
															
 
																+		# 		raise ValueError("Invalid backend: %s, expected one of %r"
															
 
																+  #                                % (backend, sorted(BACKENDS.keys()))) from e
															
 
																+		# 	backend = backend_factory(nesting_level=nesting_level)
															
 
																+
															
 
																+		if n_jobs is None:
															
 
																+			n_jobs = 1
															
 
																+
															
 
																+		self.mode=mode
															
 
																+		self.perfmodel=perfmodel
															
 
																+		self.end_msg=end_msg
															
 
																+		self.name=name
															
 
																+		self.synchronous=synchronous
															
 
																+		self.priority=priority
															
 
																+		self.color=color
															
 
																+		self.flops=flops
															
 
																+		self.n_jobs=n_jobs
															
 
																+		self._backend=backend
															
 
																+
															
 
																+	def print_progress(self):
															
 
																+		#pass
															
 
																+		print("", starpupy.task_nsubmitted())
															
 
																+
															
 
																+	def __call__(self,iterable):
															
 
																+		#generate the dictionary of task_submit
															
 
																+		dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel}
															
 
																+		if hasattr(self._backend, 'start_call'):
															
 
																+			self._backend.start_call()
															
 
																+		# the mode normal, user can call the function directly without using async
															
 
																+		if self.mode=="normal":
															
 
																+			async def asy_main():
															
 
																+				L_fut=future_generator(iterable, self.n_jobs, dict_task)
															
 
																+				res=[]
															
 
																+				for i in range(len(L_fut)):
															
 
																+					L_res=await L_fut[i]
															
 
																+					res.extend(L_res)
															
 
																+				#print(res)
															
 
																+				#print("type of result is", type(res))
															
 
																+				return res
															
 
																+			#asyncio.run(asy_main())
															
 
																+			#retVal=asy_main
															
 
																+			loop = asyncio.get_event_loop()
															
 
																+			results = loop.run_until_complete(asy_main())
															
 
																+			retVal = results
															
 
																+		# the mode future, user needs to use asyncio module and await the Future result in main function
															
 
																+		elif self.mode=="future":
															
 
																+			L_fut=future_generator(iterable, self.n_jobs, dict_task)
															
 
																+			fut=asyncio.gather(*L_fut)
															
 
																+			if self.end_msg!=None:
															
 
																+				fut.add_done_callback(functools.partial(print, self.end_msg))
															
 
																+			retVal=fut
															
 
																+		if hasattr(self._backend, 'stop_call'):
															
 
																+			self._backend.stop_call()
															
 
																+		return retVal
															
 
																+
															
 
																+def delayed(function):
															
 
																+	def delayed_function(*args, **kwargs):
															
 
																+		return function, args, kwargs
															
 
																+	return delayed_function
															
 
																+
															
 
																+
															
 
																+######################################################################
															
 
																+__version__ = jl.__version__
															
 
																+
															
 
																+class Memory(jl.Memory):
															
 
																+	def __init__(self,location=None, backend='local', cachedir=None,
															
 
																+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
															
 
																+                 backend_options=None):
															
 
																+		super(Memory, self).__init__(location=None, backend='local', cachedir=None,
															
 
																+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
															
 
																+                 backend_options=None)
															
 
																+
															
 
																+
															
 
																+def dump(value, filename, compress=0, protocol=None, cache_size=None):
															
 
																+	return jl.dump(value, filename, compress, protocol, cache_size)
															
 
																+
															
 
																+def load(filename, mmap_mode=None):
															
 
																+	return jl.load(filename, mmap_mode)
															
 
																+
															
 
																+def hash(obj, hash_name='md5', coerce_mmap=False):
															
 
																+	return jl.hash(obj, hash_name, coerce_mmap)
															
 
																+
															
 
																+def register_compressor(compressor_name, compressor, force=False):
															
 
																+	return jl.register_compressor(compressor_name, compressor, force)
															
 
																+
															
 
																+def effective_n_jobs(n_jobs=-1):
															
 
																+	return cpu_count()
															
 
																+
															
 
																+def get_active_backend():
															
 
																+	backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
															
 
																+	if backend_and_jobs is not None:
															
 
																+		backend,n_jobs=backend_and_jobs
															
 
																+		return backend
															
 
																+	backend = BACKENDS[loky](nesting_level=0)
															
 
																+	return backend
															
 
																+
															
 
																+class parallel_backend(object):
															
 
																+	def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None,
															
 
																+                 **backend_params):
															
 
																+		if isinstance(backend, str):
															
 
																+			backend = BACKENDS[backend](**backend_params)
															
 
																+
															
 
																+		current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
															
 
																+		if backend.nesting_level is None:
															
 
																+			if current_backend_and_jobs is None:
															
 
																+				nesting_level = 0
															
 
																+			else:
															
 
																+				nesting_level = current_backend_and_jobs[0].nesting_level
															
 
																+
															
 
																+			backend.nesting_level = nesting_level
															
 
																+
															
 
																+		# Save the backends info and set the active backend
															
 
																+		self.old_backend_and_jobs = current_backend_and_jobs
															
 
																+		self.new_backend_and_jobs = (backend, n_jobs)
															
 
																+
															
 
																+		_backend.backend_and_jobs = (backend, n_jobs)
															
 
																+
															
 
																+	def __enter__(self):
															
 
																+		return self.new_backend_and_jobs
															
 
																+
															
 
																+	def __exit__(self, type, value, traceback):
															
 
																+		self.unregister()
															
 
																+
															
 
																+	def unregister(self):
															
 
																+		if self.old_backend_and_jobs is None:
															
 
																+			if getattr(_backend, 'backend_and_jobs', None) is not None:
															
 
																+				del _backend.backend_and_jobs
															
 
																+		else:
															
 
																+			_backend.backend_and_jobs = self.old_backend_and_jobs
															
 
																+
															
 
																+def register_parallel_backend(name, factory):
															
 
																+	BACKENDS[name] = factory
															
--- a/starpupy/src/setup.cfg.in
+++ b/starpupy/src/setup.cfg.in
@@ -0,0 +1,23 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+[build]
															
 
																+build-platlib=build
															
 
																+build-temp=build/tmp
															
 
																+
															
 
																+[install]
															
 
																+prefix=@prefix@
															
 
																+
															
 
																+
															
--- a/starpupy/src/setup.py.in
+++ b/starpupy/src/setup.py.in
@@ -0,0 +1,40 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+from distutils.core import setup, Extension
															
 
																+
															
 
																+numpy_dir = '@PYTHON_NUMPY_DIR@'
															
 
																+if numpy_dir != '':
															
 
																+    numpy_include_dir = [numpy_dir]
															
 
																+else:
															
 
																+    numpy_include_dir = []
															
 
																+starpupy = Extension('starpu.starpupy',
															
 
																+                     include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include'] + numpy_include_dir,
															
 
																+                     libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@'],
															
 
																+                     library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'],
															
 
																+	             sources = ['starpu/starpu_task_wrapper.c'])
															
 
																+
															
 
																+setup(
															
 
																+    name = 'starpupy',
															
 
																+    version = '0.5',
															
 
																+    description = 'Python bindings for StarPU',
															
 
																+    author = 'StarPU team',
															
 
																+    author_email = 'starpu-devel@lists.gforge.inria.fr',
															
 
																+    url = 'https://starpu.gitlabpages.inria.fr/',
															
 
																+    license = 'GPL',
															
 
																+    platforms = 'posix',
															
 
																+    ext_modules = [starpupy],
															
 
																+    packages = ['starpu'],
															
 
																+    )
															
--- a/starpupy/src/starpu/Makefile
+++ b/starpupy/src/starpu/Makefile
@@ -1,13 +0,0 @@
 
																-PYTHON ?= python3
															
 
																-
															
 
																-CPPFLAGS = $(shell $(PYTHON)-config --includes) -Wall -O2 -g
															
 
																-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
															
 
																-LDLIBS += $(shell pkg-config --libs starpu-1.3)
															
 
																-
															
 
																-all: starpupy.so
															
 
																-
															
 
																-starpupy.so: starpu_task_wrapper.c Makefile
															
 
																-	$(CC) -fPIC $(CFLAGS) $< -o $@ -shared  $(CPPFLAGS) $(LDLIBS)
															
 
																-
															
 
																-clean:
															
 
																-	rm -f starpupy.so
															
--- a/starpupy/src/starpu/joblib.py
+++ b/starpupy/src/starpu/joblib.py
@@ -1,147 +0,0 @@
 
																-# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																-#
															
 
																-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																-#
															
 
																-# StarPU is free software; you can redistribute it and/or modify
															
 
																-# it under the terms of the GNU Lesser General Public License as published by
															
 
																-# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																-# your option) any later version.
															
 
																-#
															
 
																-# StarPU is distributed in the hope that it will be useful, but
															
 
																-# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																-#
															
 
																-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-#
															
 
																-from starpu import starpupy
															
 
																-import asyncio
															
 
																-import math
															
 
																-import os
															
 
																-import pickle
															
 
																-import json
															
 
																-import functools
															
 
																-
															
 
																-# get the number of CPUs controlled by StarPU
															
 
																-n_cpus=starpupy.cpu_worker_get_count()
															
 
																-
															
 
																-#class perfmodel
															
 
																-class Perfmodel(object):
															
 
																-	def __init__(self, symbol):
															
 
																-		self.symbol=symbol
															
 
																-		self.pstruct=starpupy.init_perfmodel(self.symbol)
															
 
																-
															
 
																-	def get_struct(self):
															
 
																-		return self.pstruct
															
 
																-
															
 
																-	def __del__(self):
															
 
																-	#def free_struct(self):
															
 
																-		starpupy.free_perfmodel(self.pstruct)
															
 
																-
															
 
																-# split a list ls into n_block numbers of sub-lists 
															
 
																-def partition(ls, n_block):
															
 
																-	if len(ls)>=n_block:
															
 
																-		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
															
 
																-		q1=math.ceil(len(ls)/n_block)
															
 
																-		q2=math.floor(len(ls)/n_block)
															
 
																-		n1=len(ls)%n_block
															
 
																-		#n2=n_block-n1
															
 
																-		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
															
 
																-		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
															
 
																-		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
															
 
																-
															
 
																-		L=L1+L2
															
 
																-	else:
															
 
																-		# if the block number is larger than the length of list, each element in the list is a sub-list
															
 
																-		L=[ls[i:i+1] for i in range (len(ls))]
															
 
																-	return L
															
 
																-
															
 
																-# generate the dictionary which contains the perfmodel symbol and its struct pointer
															
 
																-dict_perf={}
															
 
																-def dict_perf_generator(perfsymbol):
															
 
																-	if dict_perf.get(perfsymbol)==None:
															
 
																-		p=Perfmodel(perfsymbol)
															
 
																-		dict_perf[perfsymbol]=p
															
 
																-	else:
															
 
																-		p=dict_perf[perfsymbol]
															
 
																-	return p
															
 
																-
															
 
																-def future_generator(g, n_jobs, perfsymbol):
															
 
																-	# g is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
															
 
																-	L=list(g)
															
 
																-	# generate a list of function according to g
															
 
																-	def lf(ls):
															
 
																-		L_func=[]
															
 
																-		for i in range(len(ls)):
															
 
																-			# the first element is the function
															
 
																-			f=ls[i][0]
															
 
																-			# the second element is the args list of a type tuple
															
 
																-			L_args=list(ls[i][1])
															
 
																-			# generate a list of function
															
 
																-			L_func.append(f(*L_args))
															
 
																-		return L_func
															
 
																-	# get the number of block
															
 
																-	if n_jobs<-n_cpus-1 or n_jobs>n_cpus:
															
 
																-		print("Error: n_jobs is out of range, number of CPUs is", n_cpus)
															
 
																-	elif n_jobs<0:
															
 
																-		n_block=n_cpus+1+n_jobs
															
 
																-	else:
															
 
																-		n_block=n_jobs
															
 
																-	# generate the split function list
															
 
																-	L_split=partition(L,n_block)
															
 
																-	# operation in each split list
															
 
																-	L_fut=[]
															
 
																-	for i in range(len(L_split)):
															
 
																-		if perfsymbol==None:
															
 
																-			fut=starpupy.task_submit(lf, L_split[i])
															
 
																-			L_fut.append(fut)
															
 
																-		else:
															
 
																-			p=dict_perf_generator(perfsymbol)
															
 
																-			fut=starpupy.task_submit(lf, L_split[i], p.get_struct())
															
 
																-			L_fut.append(fut)
															
 
																-	return L_fut
															
 
																-
															
 
																-def parallel(*, mode="normal", n_jobs=1, perfmodel=None, end_msg=None,\
															
 
																-	         backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
															
 
																-	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
															
 
																-	         mmap_mode='r', prefer=None, require=None):
															
 
																-	# the mode normal, user can call the function directly without using async
															
 
																-	if mode=="normal":
															
 
																-		def parallel_normal(g):
															
 
																-			async def asy_main():
															
 
																-				L_fut=future_generator(g, n_jobs, perfmodel)
															
 
																-				res=[]
															
 
																-				for i in range(len(L_fut)):
															
 
																-					L_res=await L_fut[i]
															
 
																-					res.extend(L_res)
															
 
																-				#print(res)
															
 
																-				return res
															
 
																-			asyncio.run(asy_main())
															
 
																-			return asy_main
															
 
																-		return parallel_normal
															
 
																-	# the mode future, user needs to use asyncio module and await the Future result in main function
															
 
																-	elif mode=="future":
															
 
																-		def parallel_future(g):
															
 
																-			L_fut=future_generator(g, n_jobs, perfmodel)
															
 
																-			fut=asyncio.gather(*L_fut)
															
 
																-			if end_msg==None:
															
 
																-				return fut
															
 
																-			else:
															
 
																-				fut.add_done_callback(functools.partial(print, end_msg))
															
 
																-				return fut
															
 
																-			#return fut
															
 
																-		return parallel_future
															
 
																-
															
 
																-def delayed(f):
															
 
																-	def delayed_func(*args):
															
 
																-		return f, args
															
 
																-	return delayed_func
															
 
																-
															
 
																-
															
 
																-######################################################################
															
 
																-# dump performance model
															
 
																-def perfmodel_plot(perfmodel):
															
 
																-	p=dict_perf[perfmodel]
															
 
																-	starpupy.save_history_based_model(p.get_struct())
															
 
																-	os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
															
 
																-	os.system('gnuplot starpu_'+perfmodel+'.gp')
															
 
																-	os.system('gv starpu_'+perfmodel+'.eps')
															
--- a/starpupy/src/starpu/starpu_task_wrapper.c
+++ b/starpupy/src/starpu/starpu_task_wrapper.c
@@ -1,416 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-#include <stdio.h>
															
 
																-#include <stdlib.h>
															
 
																-#include <string.h>
															
 
																-
															
 
																-#include <starpu.h>
															
 
																-
															
 
																-#define PY_SSIZE_T_CLEAN
															
 
																-#include <Python.h>
															
 
																-
															
 
																-/*macro*/
															
 
																-#if defined(Py_DEBUG) || defined(DEBUG)
															
 
																-extern void _Py_CountReferences(FILE*);
															
 
																-#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
															
 
																-#else
															
 
																-#define CURIOUS(x)
															
 
																-#endif
															
 
																-#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
															
 
																-#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
															
 
																-#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
															
 
																-#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
															
 
																-/*******/
															
 
																-
															
 
																-/*********************Functions passed in task_submit wrapper***********************/
															
 
																-
															
 
																-static PyObject* asyncio_module; /*python asyncio library*/
															
 
																-
															
 
																-/*structure contains parameters which are passed to starpu_task.cl_arg*/
															
 
																-struct codelet_struct { 
															
 
																-    PyObject* f; /*the python function passed in*/
															
 
																-    PyObject* argList; /*argument list of python function passed in*/
															
 
																-    PyObject* rv; /*return value when using PyObject_CallObject call the function f*/
															
 
																-    PyObject* fut; /*asyncio.Future*/
															
 
																-    PyObject* lp; /*asyncio.Eventloop*/
															
 
																-};
															
 
																-typedef struct codelet_struct codelet_st;
															
 
																-
															
 
																-/*function passed to starpu_codelet.cpu_func*/
															
 
																-void codelet_func(void *buffers[], void *cl_arg){
															
 
																-
															
 
																-    codelet_st* cst = (codelet_st*) cl_arg;
															
 
																-
															
 
																-    /*make sure we own the GIL*/
															
 
																-    PyGILState_STATE state = PyGILState_Ensure();
															
 
																-
															
 
																-    /*verify that the function is a proper callable*/
															
 
																-    if (!PyCallable_Check(cst->f)) {
															
 
																-
															
 
																-        printf("py_callback: expected a callable function\n"); 
															
 
																-        exit(1);
															
 
																-    }
															
 
																-    
															
 
																-    /*check the arguments of python function passed in*/
															
 
																-    for (int i=0; i < PyTuple_Size(cst->argList); i++){
															
 
																-      PyObject* obj=PyTuple_GetItem(cst->argList, i);
															
 
																-      const char* tp = Py_TYPE(obj)->tp_name;
															
 
																-      if(strcmp(tp, "_asyncio.Future") == 0){
															
 
																-        /*if one of arguments is Future, get its result*/
															
 
																-        PyObject * fut_result = PyObject_CallMethod(obj, "result", NULL);
															
 
																-        /*replace the Future argument to its result*/
															
 
																-        PyTuple_SetItem(cst->argList, i, fut_result);
															
 
																-      }
															
 
																-    }
															
 
																-
															
 
																-    /*call the python function*/
															
 
																-    PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
															
 
																-    cst->rv=pRetVal;
															
 
																-
															
 
																-    //Py_DECREF(cst->f);
															
 
																-
															
 
																-    /*restore previous GIL state*/
															
 
																-    PyGILState_Release(state);
															
 
																-
															
 
																-}
															
 
																-
															
 
																-/*function passed to starpu_task.callback_func*/
															
 
																-void cb_func(void *v){
															
 
																-
															
 
																-	struct starpu_task *task=starpu_task_get_current();
															
 
																-    codelet_st* cst = (codelet_st*) task->cl_arg;
															
 
																-
															
 
																-    /*make sure we own the GIL*/
															
 
																-    PyGILState_STATE state = PyGILState_Ensure();
															
 
																-
															
 
																-    /*set the Future result and mark the Future as done*/
															
 
																-    PyObject * set_result = PyObject_GetAttrString(cst->fut, "set_result");
															
 
																-    PyObject * loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
															
 
																-
															
 
																-    Py_DECREF(loop_callback);
															
 
																-    Py_DECREF(set_result);
															
 
																-    Py_DECREF(cst->rv);
															
 
																-    Py_DECREF(cst->fut);
															
 
																-    Py_DECREF(cst->lp);
															
 
																-
															
 
																-    //Py_DECREF(perfmodel);
															
 
																-    struct starpu_codelet * func_cl=(struct starpu_codelet *) task->cl;
															
 
																-    if (func_cl->model != NULL){
															
 
																-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
															
 
																-      PyObject* perfmodel=PyCapsule_New(perf, "Perf", 0);
															
 
																-      Py_DECREF(perfmodel);
															
 
																-    }
															
 
																-
															
 
																-    for(int i = 0; i < PyTuple_Size(cst->argList); i++){
															
 
																-        Py_DECREF(PyTuple_GetItem(cst->argList, i));
															
 
																-    }
															
 
																-    Py_DECREF(cst->argList);
															
 
																-
															
 
																-    /*restore previous GIL state*/
															
 
																-    PyGILState_Release(state);
															
 
																-
															
 
																-    /*deallocate task*/
															
 
																-    free(task->cl);
															
 
																-	  free(task->cl_arg);
															
 
																-
															
 
																-}
															
 
																-
															
 
																-/***********************************************************************************/
															
 
																-/*PyObject*->struct starpu_task**/
															
 
																-static struct starpu_task *PyTask_AsTask(PyObject* obj){
															
 
																-  return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
															
 
																-}
															
 
																-
															
 
																-/* destructor function for task */
															
 
																-static void del_Task(PyObject *obj) {
															
 
																-  struct starpu_task* obj_task=PyTask_AsTask(obj);
															
 
																-  obj_task->destroy=1; /*XXX we should call starpu task destroy*/
															
 
																-}
															
 
																-
															
 
																-/*struct starpu_task*->PyObject**/
															
 
																-static PyObject *PyTask_FromTask(struct starpu_task *task) {
															
 
																-  return PyCapsule_New(task, "Task", del_Task);
															
 
																-}
															
 
																-
															
 
																-/***********************************************************************************/
															
 
																-static size_t sizebase (struct starpu_task * task, unsigned nimpl){
															
 
																-
															
 
																-  codelet_st* cst = (codelet_st*) task->cl_arg;
															
 
																-
															
 
																-  PyObject* obj=PyTuple_GetItem(cst->argList, 0);
															
 
																-  /*get the length of arguments*/
															
 
																-  int n = PyList_Size(obj);
															
 
																-
															
 
																-  return n;
															
 
																-}
															
 
																-
															
 
																-static void del_Perf(PyObject *obj){
															
 
																-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
															
 
																-  free(perf);
															
 
																-}
															
 
																-/*initialization of perfmodel*/
															
 
																-static PyObject* init_perfmodel(PyObject *self, PyObject *args){
															
 
																-
															
 
																-  char* sym;
															
 
																-
															
 
																-  if (!PyArg_ParseTuple(args, "s", &sym))
															
 
																-    return NULL;
															
 
																-
															
 
																-  /*allocate a perfmodel structure*/
															
 
																-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
															
 
																-
															
 
																-  /*get the perfmodel symbol*/
															
 
																-  char* p =strdup(sym);
															
 
																-  perf->symbol=p;
															
 
																-  perf->type=STARPU_HISTORY_BASED;
															
 
																-
															
 
																-  /*struct perfmodel*->PyObject**/
															
 
																-  PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
															
 
																-
															
 
																-  return perfmodel;
															
 
																-}
															
 
																-
															
 
																-
															
 
																-/*free perfmodel*/
															
 
																-static PyObject* free_perfmodel(PyObject *self, PyObject *args){
															
 
																-
															
 
																-  PyObject* perfmodel;
															
 
																-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
															
 
																-    return NULL;
															
 
																-
															
 
																-  /*PyObject*->struct perfmodel**/
															
 
																-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																-
															
 
																-  starpu_save_history_based_model(perf);
															
 
																-  //starpu_perfmodel_unload_model(perf);
															
 
																-  free(perf->symbol);
															
 
																-  starpu_perfmodel_deinit(perf);
															
 
																-  free(perf);
															
 
																-
															
 
																-  /*return type is void*/
															
 
																-  Py_INCREF(Py_None);
															
 
																-  return Py_None;
															
 
																-}
															
 
																-
															
 
																-static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-  PyObject* perfmodel;
															
 
																-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
															
 
																-    return NULL;
															
 
																-
															
 
																-  /*PyObject*->struct perfmodel**/
															
 
																-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																-
															
 
																-  starpu_save_history_based_model(perf);
															
 
																-
															
 
																-  /*return type is void*/
															
 
																-  Py_INCREF(Py_None);
															
 
																-  return Py_None;
															
 
																-}
															
 
																-
															
 
																-/*****************************Wrappers of StarPU methods****************************/
															
 
																-/*wrapper submit method*/
															
 
																-static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-    /*get the running Event loop*/
															
 
																-    PyObject* loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
															
 
																-    /*create a asyncio.Future object*/
															
 
																-    PyObject* fut = PyObject_CallMethod(loop, "create_future", NULL);
															
 
																-
															
 
																-    /*first argument in args is always the python function passed in*/
															
 
																-    PyObject* func_py = PyTuple_GetItem(args, 0);
															
 
																-    Py_INCREF(func_py);
															
 
																-
															
 
																-	  /*allocate a task structure and initialize it with default values*/
															
 
																-    struct starpu_task *task=starpu_task_create();
															
 
																-    task->destroy=0;
															
 
																-
															
 
																-    PyObject* PyTask=PyTask_FromTask(task);
															
 
																-
															
 
																-    /*set one of fut attribute to the task pointer*/
															
 
																-    PyObject_SetAttrString(fut, "starpu_task", PyTask);
															
 
																-    /*check the arguments of python function passed in*/
															
 
																-    for (int i=1; i < PyTuple_Size(args); i++){
															
 
																-      PyObject* obj=PyTuple_GetItem(args, i);
															
 
																-      const char* tp = Py_TYPE(obj)->tp_name;
															
 
																-      if(strcmp(tp, "_asyncio.Future") == 0){
															
 
																-        /*if one of arguments is Future, get its corresponding task*/
															
 
																-        PyObject* fut_task=PyObject_GetAttrString(obj, "starpu_task");
															
 
																-        /*declare task dependencies between the current task and the corresponding task of Future argument*/
															
 
																-        starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
															
 
																-
															
 
																-        Py_DECREF(fut_task);
															
 
																-      }
															
 
																-    }
															
 
																-    
															
 
																-    /*allocate a codelet structure*/
															
 
																-    struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
															
 
																-    /*initialize func_cl with default values*/
															
 
																-    starpu_codelet_init(func_cl);
															
 
																-    func_cl->cpu_func=&codelet_func;
															
 
																-    
															
 
																-    /*check whether the last argument in args is the perfmodel*/
															
 
																-    PyObject* perfmodel=PyTuple_GetItem(args, PyTuple_Size(args)-1);
															
 
																-    const char* tp_perf = Py_TYPE(perfmodel)->tp_name;
															
 
																-    if (strcmp(tp_perf, "PyCapsule")==0){
															
 
																-      /*PyObject*->struct perfmodel**/
															
 
																-      struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																-      func_cl->model=perf;
															
 
																-      Py_INCREF(perfmodel);
															
 
																-    }
															
 
																-    
															
 
																-
															
 
																-    /*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
															
 
																-    codelet_st *cst = (codelet_st*)malloc(sizeof(codelet_st));
															
 
																-    cst->f = func_py;
															
 
																-    cst->fut = fut;
															
 
																-    cst->lp = loop;
															
 
																-    
															
 
																-    Py_INCREF(fut);
															
 
																-    Py_INCREF(loop);
															
 
																-
															
 
																-    /*pass args in argList*/
															
 
																-    if (PyTuple_Size(args)==1 || (PyTuple_Size(args)==2 && strcmp(tp_perf, "PyCapsule")==0))/*function no arguments*/
															
 
																-      cst->argList = PyTuple_New(0);
															
 
																-    else if(PyTuple_Size(args)>2 && strcmp(tp_perf, "PyCapsule")==0){/*function has arguments and the last argument in args is the perfmodel*/
															
 
																-      cst->argList = PyTuple_New(PyTuple_Size(args)-2);
															
 
																-      for (int i=0; i < PyTuple_Size(args)-2; i++){
															
 
																-        PyObject* tmp=PyTuple_GetItem(args, i+1);
															
 
																-        PyTuple_SetItem(cst->argList, i, tmp);
															
 
																-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
															
 
																-      }
															
 
																-    }
															
 
																-    else{/*function has arguments and no perfmodel*/
															
 
																-      cst->argList = PyTuple_New(PyTuple_Size(args)-1);
															
 
																-      for (int i=0; i < PyTuple_Size(args)-1; i++){
															
 
																-        PyObject* tmp=PyTuple_GetItem(args, i+1);
															
 
																-        PyTuple_SetItem(cst->argList, i, tmp);
															
 
																-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
															
 
																-      }
															
 
																-    }
															
 
																-
															
 
																-    task->cl=func_cl;
															
 
																-    task->cl_arg=cst;
															
 
																-    /*call starpu_task_submit method*/
															
 
																-    starpu_task_submit(task);
															
 
																-    task->callback_func=&cb_func;
															
 
																-    if (strcmp(tp_perf, "PyCapsule")==0){
															
 
																-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
															
 
																-      perf->size_base=&sizebase;
															
 
																-    }
															
 
																-
															
 
																-    //printf("the number of reference is %ld\n", Py_REFCNT(func_py));
															
 
																-    //_Py_PrintReferences(stderr);
															
 
																-    //COUNTREFS();
															
 
																-    return fut;
															
 
																-
															
 
																-}
															
 
																-
															
 
																-/*wrapper wait for all method*/
															
 
																-static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-	/*call starpu_task_wait_for_all method*/
															
 
																-	Py_BEGIN_ALLOW_THREADS
															
 
																-	starpu_task_wait_for_all();
															
 
																-	Py_END_ALLOW_THREADS
															
 
																-
															
 
																-	/*return type is void*/
															
 
																-	Py_INCREF(Py_None);
															
 
																-  return Py_None;
															
 
																-}
															
 
																-
															
 
																-/*wrapper pause method*/
															
 
																-static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-	/*call starpu_pause method*/
															
 
																-	starpu_pause();
															
 
																-
															
 
																-	/*return type is void*/
															
 
																-	Py_INCREF(Py_None);
															
 
																-  return Py_None;
															
 
																-}
															
 
																-
															
 
																-/*wrapper resume method*/
															
 
																-static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-	/*call starpu_resume method*/
															
 
																-	starpu_resume();
															
 
																-
															
 
																-	/*return type is void*/
															
 
																-	Py_INCREF(Py_None);
															
 
																-  return Py_None;
															
 
																-}
															
 
																-
															
 
																-/*wrapper get count cpu method*/
															
 
																-static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args){
															
 
																-
															
 
																-  /*call starpu_cpu_worker_get_count method*/
															
 
																-  int num_cpu=starpu_cpu_worker_get_count();
															
 
																-
															
 
																-  /*return type is unsigned*/
															
 
																-  return Py_BuildValue("I", num_cpu);
															
 
																-}
															
 
																-
															
 
																-/***********************************************************************************/
															
 
																-
															
 
																-/***************The module’s method table and initialization function**************/
															
 
																-/*method table*/
															
 
																-static PyMethodDef starpupyMethods[] = 
															
 
																-{ 
															
 
																-  {"task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
															
 
																-  {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
															
 
																-  {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
															
 
																-  {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
															
 
																-  {"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
															
 
																-  {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
															
 
																-  {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
															
 
																-  {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
															
 
																-  {NULL, NULL}
															
 
																-};
															
 
																-
															
 
																-/*deallocation function*/
															
 
																-static void starpupyFree(void* self){
															
 
																-	starpu_shutdown();
															
 
																-  Py_DECREF(asyncio_module);
															
 
																-  //COUNTREFS();
															
 
																-}
															
 
																-
															
 
																-/*module definition structure*/
															
 
																-static struct PyModuleDef starpupymodule={
															
 
																-  PyModuleDef_HEAD_INIT,
															
 
																-  "starpupy", /*name of module*/
															
 
																-  NULL,
															
 
																-  -1,
															
 
																-  starpupyMethods, /*method table*/
															
 
																-  NULL,
															
 
																-  NULL,
															
 
																-  NULL,
															
 
																-  starpupyFree /*deallocation function*/
															
 
																-};
															
 
																-
															
 
																-/*initialization function*/
															
 
																-PyMODINIT_FUNC
															
 
																-PyInit_starpupy(void)
															
 
																-{
															
 
																-    PyEval_InitThreads();
															
 
																-    /*starpu initialization*/
															
 
																-	  starpu_init(NULL);
															
 
																-    /*python asysncio import*/
															
 
																-    asyncio_module = PyImport_ImportModule("asyncio");
															
 
																-    /*module import initialization*/
															
 
																-    return PyModule_Create(&starpupymodule);
															
 
																-}
															
 
																-/***********************************************************************************/
															
--- a/starpupy/src/starpu_task_wrapper.c
+++ b/starpupy/src/starpu_task_wrapper.c
@@ -0,0 +1,536 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#undef NDEBUG
															
 
																+#include <stdio.h>
															
 
																+#include <stdlib.h>
															
 
																+#include <string.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#define PY_SSIZE_T_CLEAN
															
 
																+#include <Python.h>
															
 
																+
															
 
																+#ifdef STARPU_PYTHON_HAVE_NUMPY
															
 
																+#include <numpy/arrayobject.h>
															
 
																+#endif
															
 
																+
															
 
																+/*macro*/
															
 
																+#if defined(Py_DEBUG) || defined(DEBUG)
															
 
																+extern void _Py_CountReferences(FILE*);
															
 
																+#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
															
 
																+#else
															
 
																+#define CURIOUS(x)
															
 
																+#endif
															
 
																+#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
															
 
																+#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
															
 
																+#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
															
 
																+#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
															
 
																+/*******/
															
 
																+
															
 
																+/*********************Functions passed in task_submit wrapper***********************/
															
 
																+
															
 
																+static PyObject *asyncio_module; /*python asyncio library*/
															
 
																+
															
 
																+/*structure contains parameters which are passed to starpu_task.cl_arg*/
															
 
																+struct codelet_args
															
 
																+{
															
 
																+	PyObject *f; /*the python function passed in*/
															
 
																+	PyObject *argList; /*argument list of python function passed in*/
															
 
																+	PyObject *rv; /*return value when using PyObject_CallObject call the function f*/
															
 
																+	PyObject *fut; /*asyncio.Future*/
															
 
																+	PyObject *lp; /*asyncio.Eventloop*/
															
 
																+};
															
 
																+
															
 
																+/*function passed to starpu_codelet.cpu_func*/
															
 
																+void codelet_func(void *buffers[], void *cl_arg)
															
 
																+{
															
 
																+	struct codelet_args *cst = (struct codelet_args*) cl_arg;
															
 
																+
															
 
																+	/*make sure we own the GIL*/
															
 
																+	PyGILState_STATE state = PyGILState_Ensure();
															
 
																+
															
 
																+	/*verify that the function is a proper callable*/
															
 
																+	if (!PyCallable_Check(cst->f))
															
 
																+	{
															
 
																+		printf("py_callback: expected a callable function\n");
															
 
																+		exit(1);
															
 
																+	}
															
 
																+
															
 
																+	/*check the arguments of python function passed in*/
															
 
																+	int i;
															
 
																+	for(i=0; i < PyTuple_Size(cst->argList); i++)
															
 
																+	{
															
 
																+		PyObject *obj = PyTuple_GetItem(cst->argList, i);
															
 
																+		const char *tp = Py_TYPE(obj)->tp_name;
															
 
																+		if(strcmp(tp, "_asyncio.Future") == 0)
															
 
																+		{
															
 
																+			/*if one of arguments is Future, get its result*/
															
 
																+			PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL);
															
 
																+			/*replace the Future argument to its result*/
															
 
																+			PyTuple_SetItem(cst->argList, i, fut_result);
															
 
																+		}
															
 
																+		/*else if (strcmp(tp, "numpy.ndarray")==0)
															
 
																+		  {
															
 
																+		  printf("array is %p\n", obj);
															
 
																+		  }*/
															
 
																+	}
															
 
																+
															
 
																+	/*call the python function*/
															
 
																+	PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
															
 
																+	//const char *tp = Py_TYPE(pRetVal)->tp_name;
															
 
																+	//printf("return value type is %s\n", tp);
															
 
																+	cst->rv = pRetVal;
															
 
																+
															
 
																+	//Py_DECREF(cst->f);
															
 
																+
															
 
																+	/*restore previous GIL state*/
															
 
																+	PyGILState_Release(state);
															
 
																+}
															
 
																+
															
 
																+/*function passed to starpu_task.callback_func*/
															
 
																+void cb_func(void *v)
															
 
																+{
															
 
																+	struct starpu_task *task = starpu_task_get_current();
															
 
																+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
															
 
																+
															
 
																+	/*make sure we own the GIL*/
															
 
																+	PyGILState_STATE state = PyGILState_Ensure();
															
 
																+
															
 
																+	/*set the Future result and mark the Future as done*/
															
 
																+	PyObject *set_result = PyObject_GetAttrString(cst->fut, "set_result");
															
 
																+	PyObject *loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
															
 
																+
															
 
																+	Py_DECREF(loop_callback);
															
 
																+	Py_DECREF(set_result);
															
 
																+	Py_DECREF(cst->rv);
															
 
																+	Py_DECREF(cst->fut);
															
 
																+	Py_DECREF(cst->lp);
															
 
																+	Py_DECREF(cst->argList);
															
 
																+
															
 
																+	//Py_DECREF(perfmodel);
															
 
																+	struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl;
															
 
																+	if (func_cl->model != NULL)
															
 
																+	{
															
 
																+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
															
 
																+		PyObject *perfmodel=PyCapsule_New(perf, "Perf", 0);
															
 
																+		Py_DECREF(perfmodel);
															
 
																+	}
															
 
																+
															
 
																+	/*restore previous GIL state*/
															
 
																+	PyGILState_Release(state);
															
 
																+
															
 
																+	/*deallocate task*/
															
 
																+	free(task->cl);
															
 
																+	free(task->cl_arg);
															
 
																+}
															
 
																+
															
 
																+/***********************************************************************************/
															
 
																+/*PyObject*->struct starpu_task**/
															
 
																+static struct starpu_task *PyTask_AsTask(PyObject *obj)
															
 
																+{
															
 
																+	return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
															
 
																+}
															
 
																+
															
 
																+/* destructor function for task */
															
 
																+static void del_Task(PyObject *obj)
															
 
																+{
															
 
																+	struct starpu_task *obj_task=PyTask_AsTask(obj);
															
 
																+	obj_task->destroy=1; /*XXX we should call starpu task destroy*/
															
 
																+}
															
 
																+
															
 
																+/*struct starpu_task*->PyObject**/
															
 
																+static PyObject *PyTask_FromTask(struct starpu_task *task)
															
 
																+{
															
 
																+	return PyCapsule_New(task, "Task", del_Task);
															
 
																+}
															
 
																+
															
 
																+/***********************************************************************************/
															
 
																+static size_t sizebase (struct starpu_task *task, unsigned nimpl)
															
 
																+{
															
 
																+	int n=0;
															
 
																+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
															
 
																+
															
 
																+	/*get the result of function*/
															
 
																+	PyObject *obj=cst->rv;
															
 
																+	/*get the length of result*/
															
 
																+	const char *tp = Py_TYPE(obj)->tp_name;
															
 
																+#ifdef STARPU_PYTHON_HAVE_NUMPY
															
 
																+	/*if the result is a numpy array*/
															
 
																+	if (strcmp(tp, "numpy.ndarray")==0)
															
 
																+		n = PyArray_SIZE(obj);
															
 
																+	else
															
 
																+#endif
															
 
																+	/*if the result is a list*/
															
 
																+	if (strcmp(tp, "list")==0)
															
 
																+		n = PyList_Size(obj);
															
 
																+	/*else error*/
															
 
																+	else
															
 
																+	{
															
 
																+		printf("starpu_perfmodel::size_base: the type of function result is unrecognized\n");
															
 
																+		exit(1);
															
 
																+	}
															
 
																+	return n;
															
 
																+}
															
 
																+
															
 
																+static void del_Perf(PyObject *obj)
															
 
																+{
															
 
																+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
															
 
																+	free(perf);
															
 
																+}
															
 
																+
															
 
																+/*initialization of perfmodel*/
															
 
																+static PyObject* init_perfmodel(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	char *sym;
															
 
																+
															
 
																+	if (!PyArg_ParseTuple(args, "s", &sym))
															
 
																+		return NULL;
															
 
																+
															
 
																+	/*allocate a perfmodel structure*/
															
 
																+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
															
 
																+
															
 
																+	/*get the perfmodel symbol*/
															
 
																+	char *p =strdup(sym);
															
 
																+	perf->symbol=p;
															
 
																+	perf->type=STARPU_HISTORY_BASED;
															
 
																+
															
 
																+	/*struct perfmodel*->PyObject**/
															
 
																+	PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
															
 
																+
															
 
																+	return perfmodel;
															
 
																+}
															
 
																+
															
 
																+/*free perfmodel*/
															
 
																+static PyObject* free_perfmodel(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	PyObject *perfmodel;
															
 
																+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
															
 
																+		return NULL;
															
 
																+
															
 
																+	/*PyObject*->struct perfmodel**/
															
 
																+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																+
															
 
																+	starpu_save_history_based_model(perf);
															
 
																+	//starpu_perfmodel_unload_model(perf);
															
 
																+	//free(perf->symbol);
															
 
																+	starpu_perfmodel_deinit(perf);
															
 
																+	free(perf);
															
 
																+
															
 
																+	/*return type is void*/
															
 
																+	Py_INCREF(Py_None);
															
 
																+	return Py_None;
															
 
																+}
															
 
																+
															
 
																+static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	PyObject *perfmodel;
															
 
																+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
															
 
																+		return NULL;
															
 
																+
															
 
																+	/*PyObject*->struct perfmodel**/
															
 
																+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																+
															
 
																+	starpu_save_history_based_model(perf);
															
 
																+
															
 
																+	/*return type is void*/
															
 
																+	Py_INCREF(Py_None);
															
 
																+	return Py_None;
															
 
																+}
															
 
																+
															
 
																+/*****************************Wrappers of StarPU methods****************************/
															
 
																+/*wrapper submit method*/
															
 
																+static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*get the running Event loop*/
															
 
																+	PyObject *loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
															
 
																+	/*create a asyncio.Future object*/
															
 
																+	PyObject *fut = PyObject_CallMethod(loop, "create_future", NULL);
															
 
																+
															
 
																+	/*first argument in args is always the python function passed in*/
															
 
																+	PyObject *func_py = PyTuple_GetItem(args, 0);
															
 
																+	Py_INCREF(func_py);
															
 
																+
															
 
																+	/*allocate a task structure and initialize it with default values*/
															
 
																+	struct starpu_task *task=starpu_task_create();
															
 
																+	task->destroy=0;
															
 
																+
															
 
																+	PyObject *PyTask=PyTask_FromTask(task);
															
 
																+
															
 
																+	/*set one of fut attribute to the task pointer*/
															
 
																+	PyObject_SetAttrString(fut, "starpu_task", PyTask);
															
 
																+	/*check the arguments of python function passed in*/
															
 
																+	int i;
															
 
																+	for(i=1; i < PyTuple_Size(args)-1; i++)
															
 
																+	{
															
 
																+		PyObject *obj=PyTuple_GetItem(args, i);
															
 
																+		const char* tp = Py_TYPE(obj)->tp_name;
															
 
																+		if(strcmp(tp, "_asyncio.Future") == 0)
															
 
																+		{
															
 
																+			/*if one of arguments is Future, get its corresponding task*/
															
 
																+			PyObject *fut_task=PyObject_GetAttrString(obj, "starpu_task");
															
 
																+			/*declare task dependencies between the current task and the corresponding task of Future argument*/
															
 
																+			starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
															
 
																+
															
 
																+			Py_DECREF(fut_task);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	/*allocate a codelet structure*/
															
 
																+	struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
															
 
																+	/*initialize func_cl with default values*/
															
 
																+	starpu_codelet_init(func_cl);
															
 
																+	func_cl->cpu_funcs[0]=&codelet_func;
															
 
																+	func_cl->cpu_funcs_name[0]="codelet_func";
															
 
																+
															
 
																+	/*check whether the option perfmodel is None*/
															
 
																+	PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1);/*the last argument is the option dictionary*/
															
 
																+	PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel");
															
 
																+	const char *tp_perf = Py_TYPE(perfmodel)->tp_name;
															
 
																+	if (strcmp(tp_perf, "PyCapsule")==0)
															
 
																+	{
															
 
																+		/*PyObject*->struct perfmodel**/
															
 
																+		struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
															
 
																+		func_cl->model=perf;
															
 
																+		Py_INCREF(perfmodel);
															
 
																+	}
															
 
																+
															
 
																+	/*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
															
 
																+	struct codelet_args *cst = (struct codelet_args*)malloc(sizeof(struct codelet_args));
															
 
																+	cst->f = func_py;
															
 
																+	cst->fut = fut;
															
 
																+	cst->lp = loop;
															
 
																+
															
 
																+	Py_INCREF(fut);
															
 
																+	Py_INCREF(loop);
															
 
																+
															
 
																+	/*pass args in argList*/
															
 
																+	if (PyTuple_Size(args)==2)/*function no arguments*/
															
 
																+		cst->argList = PyTuple_New(0);
															
 
																+	else
															
 
																+	{/*function has arguments*/
															
 
																+		cst->argList = PyTuple_New(PyTuple_Size(args)-2);
															
 
																+		int i;
															
 
																+		for(i=0; i < PyTuple_Size(args)-2; i++)
															
 
																+		{
															
 
																+			PyObject *tmp=PyTuple_GetItem(args, i+1);
															
 
																+			PyTuple_SetItem(cst->argList, i, tmp);
															
 
																+			Py_INCREF(PyTuple_GetItem(cst->argList, i));
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	task->cl=func_cl;
															
 
																+	task->cl_arg=cst;
															
 
																+
															
 
																+	/*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None*/
															
 
																+	/*const char * name*/
															
 
																+	PyObject *PyName = PyDict_GetItemString(dict_option, "name");
															
 
																+	const char *name_type = Py_TYPE(PyName)->tp_name;
															
 
																+	if (strcmp(name_type, "NoneType")!=0)
															
 
																+	{
															
 
																+		PyObject *pStrObj = PyUnicode_AsUTF8String(PyName);
															
 
																+		char* name_str = PyBytes_AsString(pStrObj);
															
 
																+		char* name = strdup(name_str);
															
 
																+		//printf("name is %s\n", name);
															
 
																+		task->name=name;
															
 
																+		Py_DECREF(pStrObj);
															
 
																+	}
															
 
																+
															
 
																+	/*unsigned synchronous:1*/
															
 
																+	PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous");
															
 
																+	unsigned sync=PyLong_AsUnsignedLong(PySync);
															
 
																+	//printf("sync is %u\n", sync);
															
 
																+	task->synchronous=sync;
															
 
																+
															
 
																+	/*int priority*/
															
 
																+	PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority");
															
 
																+	int prio=PyLong_AsLong(PyPrio);
															
 
																+	//printf("prio is %d\n", prio);
															
 
																+	task->priority=prio;
															
 
																+
															
 
																+	/*unsigned color*/
															
 
																+	PyObject *PyColor = PyDict_GetItemString(dict_option, "color");
															
 
																+	const char *color_type = Py_TYPE(PyColor)->tp_name;
															
 
																+	if (strcmp(color_type, "NoneType")!=0)
															
 
																+	{
															
 
																+		unsigned color=PyLong_AsUnsignedLong(PyColor);
															
 
																+		//printf("color is %u\n", color);
															
 
																+		task->color=color;
															
 
																+	}
															
 
																+
															
 
																+	/*double flops*/
															
 
																+	PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops");
															
 
																+	const char *flops_type = Py_TYPE(PyFlops)->tp_name;
															
 
																+	if (strcmp(flops_type, "NoneType")!=0)
															
 
																+	{
															
 
																+		double flops=PyFloat_AsDouble(PyFlops);
															
 
																+		//printf("flops is %f\n", flop);
															
 
																+		task->flops=flops;
															
 
																+	}
															
 
																+
															
 
																+	task->callback_func=&cb_func;
															
 
																+
															
 
																+	/*call starpu_task_submit method*/
															
 
																+	Py_BEGIN_ALLOW_THREADS
															
 
																+		int ret = starpu_task_submit(task);
															
 
																+		assert(ret==0);
															
 
																+	Py_END_ALLOW_THREADS
															
 
																+
															
 
																+	if (strcmp(tp_perf, "PyCapsule")==0)
															
 
																+	{
															
 
																+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
															
 
																+		perf->size_base=&sizebase;
															
 
																+	}
															
 
																+
															
 
																+	//printf("the number of reference is %ld\n", Py_REFCNT(func_py));
															
 
																+	//_Py_PrintReferences(stderr);
															
 
																+	//COUNTREFS();
															
 
																+	return fut;
															
 
																+}
															
 
																+
															
 
																+/*wrapper wait for all method*/
															
 
																+static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_task_wait_for_all method*/
															
 
																+	Py_BEGIN_ALLOW_THREADS
															
 
																+		starpu_task_wait_for_all();
															
 
																+	Py_END_ALLOW_THREADS
															
 
																+
															
 
																+	/*return type is void*/
															
 
																+	Py_INCREF(Py_None);
															
 
																+	return Py_None;
															
 
																+}
															
 
																+
															
 
																+/*wrapper pause method*/
															
 
																+static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_pause method*/
															
 
																+	starpu_pause();
															
 
																+
															
 
																+	/*return type is void*/
															
 
																+	Py_INCREF(Py_None);
															
 
																+	return Py_None;
															
 
																+}
															
 
																+
															
 
																+/*wrapper resume method*/
															
 
																+static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_resume method*/
															
 
																+	starpu_resume();
															
 
																+
															
 
																+	/*return type is void*/
															
 
																+	Py_INCREF(Py_None);
															
 
																+	return Py_None;
															
 
																+}
															
 
																+
															
 
																+/*wrapper get count cpu method*/
															
 
																+static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_cpu_worker_get_count method*/
															
 
																+	int num_cpu=starpu_cpu_worker_get_count();
															
 
																+
															
 
																+	/*return type is unsigned*/
															
 
																+	return Py_BuildValue("I", num_cpu);
															
 
																+}
															
 
																+
															
 
																+/*wrapper get min priority method*/
															
 
																+static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_sched_get_min_priority*/
															
 
																+	int min_prio=starpu_sched_get_min_priority();
															
 
																+
															
 
																+	/*return type is int*/
															
 
																+	return Py_BuildValue("i", min_prio);
															
 
																+}
															
 
																+
															
 
																+/*wrapper get max priority method*/
															
 
																+static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_sched_get_max_priority*/
															
 
																+	int max_prio=starpu_sched_get_max_priority();
															
 
																+
															
 
																+	/*return type is int*/
															
 
																+	return Py_BuildValue("i", max_prio);
															
 
																+}
															
 
																+
															
 
																+/*wrapper get the number of no completed submitted tasks method*/
															
 
																+static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args)
															
 
																+{
															
 
																+	/*call starpu_task_nsubmitted*/
															
 
																+	int num_task=starpu_task_nsubmitted();
															
 
																+
															
 
																+	/*Return the number of submitted tasks which have not completed yet */
															
 
																+	return Py_BuildValue("i", num_task);
															
 
																+}
															
 
																+/***********************************************************************************/
															
 
																+
															
 
																+/***************The module’s method table and initialization function**************/
															
 
																+/*method table*/
															
 
																+static PyMethodDef starpupyMethods[] =
															
 
																+{
															
 
																+	{"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
															
 
																+	{"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
															
 
																+	{"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
															
 
																+	{"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
															
 
																+	{"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
															
 
																+	{"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
															
 
																+	{"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
															
 
																+	{"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
															
 
																+	{"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/
															
 
																+	{"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/
															
 
																+	{"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/
															
 
																+	{NULL, NULL}
															
 
																+};
															
 
																+
															
 
																+/*deallocation function*/
															
 
																+static void starpupyFree(void *self)
															
 
																+{
															
 
																+	starpu_shutdown();
															
 
																+	Py_DECREF(asyncio_module);
															
 
																+	//COUNTREFS();
															
 
																+}
															
 
																+
															
 
																+/*module definition structure*/
															
 
																+static struct PyModuleDef starpupymodule =
															
 
																+{
															
 
																+	PyModuleDef_HEAD_INIT,
															
 
																+	"starpupy", /*name of module*/
															
 
																+	NULL,
															
 
																+	-1,
															
 
																+	starpupyMethods, /*method table*/
															
 
																+	NULL,
															
 
																+	NULL,
															
 
																+	NULL,
															
 
																+	starpupyFree /*deallocation function*/
															
 
																+};
															
 
																+
															
 
																+/*initialization function*/
															
 
																+PyMODINIT_FUNC
															
 
																+PyInit_starpupy(void)
															
 
																+{
															
 
																+	PyEval_InitThreads();
															
 
																+	/*starpu initialization*/
															
 
																+	int ret = starpu_init(NULL);
															
 
																+	assert(ret==0);
															
 
																+	/*python asysncio import*/
															
 
																+	asyncio_module = PyImport_ImportModule("asyncio");
															
 
																+#ifdef STARPU_PYTHON_HAVE_NUMPY
															
 
																+	/*numpy import array*/
															
 
																+	import_array();
															
 
																+#endif
															
 
																+	/*module import initialization*/
															
 
																+	return PyModule_Create(&starpupymodule);
															
 
																+}
															
 
																+/***********************************************************************************/
															
--- a/starpupy/tests/Makefile
+++ b/starpupy/tests/Makefile
@@ -1,6 +0,0 @@
 
																-PYTHON ?= python3
															
 
																-
															
 
																-all:
															
 
																-	PYTHONPATH=../src $(PYTHON) starpu_py.py
															
 
																-	PYTHONPATH=../src STARPU_CALIBRATE=1 $(PYTHON) starpu_py_parallel.py
															
 
																-
															
--- a/starpupy/tests/starpu_py_parallel.py
+++ b/starpupy/tests/starpu_py_parallel.py
@@ -1,101 +0,0 @@
 
																-# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																-#
															
 
																-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
															
 
																-#
															
 
																-# StarPU is free software; you can redistribute it and/or modify
															
 
																-# it under the terms of the GNU Lesser General Public License as published by
															
 
																-# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																-# your option) any later version.
															
 
																-#
															
 
																-# StarPU is distributed in the hope that it will be useful, but
															
 
																-# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																-#
															
 
																-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-#
															
 
																-import starpu
															
 
																-import time
															
 
																-import asyncio
															
 
																-from math import sqrt
															
 
																-from math import log10
															
 
																-
															
 
																-#generate a list to store functions
															
 
																-g_func=[]
															
 
																-
															
 
																-#function no input no output print hello world
															
 
																-def hello():
															
 
																-	print ("Example 1: Hello, world!")
															
 
																-g_func.append(starpu.joblib.delayed(hello)())
															
 
																-
															
 
																-#function no input no output
															
 
																-def func1():
															
 
																-	print ("Example 2: This is a function no input no output")
															
 
																-g_func.append(starpu.joblib.delayed(func1)())
															
 
																-
															
 
																-#function no input return a value
															
 
																-def func2():
															
 
																-	print ("Example 3:")
															
 
																-	return 12
															
 
																-g_func.append(starpu.joblib.delayed(func2)())
															
 
																- 
															
 
																-#function has 2 int inputs and 1 int output
															
 
																-def multi(a,b):
															
 
																-	res_multi=a*b
															
 
																-	print("Example 4: The result of ",a,"*",b,"is",res_multi)
															
 
																-	return res_multi
															
 
																-g_func.append(starpu.joblib.delayed(multi)(2, 3))
															
 
																-
															
 
																-#function has 4 float inputs and 1 float output
															
 
																-def add(a,b,c,d):
															
 
																-	res_add=a+b+c+d
															
 
																-	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
															
 
																-	return res_add
															
 
																-g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
															
 
																-
															
 
																-#function has 2 int inputs 1 float input and 1 float output 1 int output
															
 
																-def sub(a,b,c):
															
 
																-	res_sub1=a-b-c
															
 
																-	res_sub2=a-b
															
 
																-	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
															
 
																-	return res_sub1, res_sub2
															
 
																-g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
															
 
																-
															
 
																-#the size of generator
															
 
																-N=1000000
															
 
																-
															
 
																-print("************************")
															
 
																-print("parallel Normal version:")
															
 
																-print("************************")
															
 
																-print("--input is iterable argument list, example 1")
															
 
																-starpu.joblib.parallel(mode="normal", n_jobs=-2, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
															
 
																-
															
 
																-print("--input is iterable argument list, example 2")
															
 
																-starpu.joblib.parallel(mode="normal", n_jobs=2, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
															
 
																-
															
 
																-print("--input is iterable function list")
															
 
																-starpu.joblib.parallel(mode="normal", n_jobs=3, perfmodel="third")(g_func)
															
 
																-
															
 
																-
															
 
																-print("************************")
															
 
																-print("parallel Future version:")
															
 
																-print("************************")
															
 
																-async def main():
															
 
																-	print("--input is iterable argument list, example 1")
															
 
																-	fut1=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
															
 
																-	res1=await fut1
															
 
																-	#print(res1)
															
 
																-
															
 
																-	print("--input is iterable argument list, example 2")
															
 
																-	fut2=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
															
 
																-	res2=await fut2
															
 
																-	#print(res2)
															
 
																-
															
 
																-	print("--input is iterable function list")
															
 
																-	fut3=starpu.joblib.parallel(mode="future", n_jobs=2, perfmodel="third")(g_func)
															
 
																-	res3=await fut3
															
 
																-	#print(res3)
															
 
																-asyncio.run(main())
															
 
																-
															
 
																-starpu.joblib.perfmodel_plot(perfmodel="first")
															
 
																-starpu.joblib.perfmodel_plot(perfmodel="second")
															
 
																-starpu.joblib.perfmodel_plot(perfmodel="third")
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -17,8 +17,8 @@ include $(top_srcdir)/starpu.mk
 
																 AM_CFLAGS += -Wno-unused
															
 
																 AM_CXXFLAGS += -Wno-unused
															
 
																-AM_FFLAGS += -Wno-unused
															
 
																-AM_FCFLAGS += -Wno-unused
															
 
																+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
															
 
																+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
															
 
																 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
															
 
																 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
															
--- a/tests/datawizard/bcsr.c
+++ b/tests/datawizard/bcsr.c
@@ -123,7 +123,8 @@ int main(int argc, char **argv)
 
																 	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV)
															
 
																 		return STARPU_TEST_SKIPPED;
															
 
																-	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) {
															
 
																+	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1)
															
 
																+	{
															
 
																 		starpu_shutdown();
															
 
																 		return STARPU_TEST_SKIPPED;
															
 
																 	}
															
--- a/tests/datawizard/interfaces/test_interfaces.c
+++ b/tests/datawizard/interfaces/test_interfaces.c
@@ -16,8 +16,7 @@
 
																 #include <starpu.h>
															
 
																-/* XXX Why cant we dereference a handle without this one ? */
															
 
																-#include <core/sched_policy.h>
															
 
																+#include <datawizard/coherency.h>
															
 
																 #include <assert.h>
															
--- a/tests/main/starpu_worker_exists.c
+++ b/tests/main/starpu_worker_exists.c
@@ -14,6 +14,7 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#define BUILDING_STARPU
															
 
																 #include <starpu.h>
															
 
																 #include "core/workers.h"
															
 
																 #include "../helper.h"
															
--- a/tests/microbenchs/bandwidth.c
+++ b/tests/microbenchs/bandwidth.c
@@ -170,7 +170,8 @@ static unsigned interleave(unsigned i)
 
																 		return 0;
															
 
																 }
															
 
																-enum sleep_type {
															
 
																+enum sleep_type
															
 
																+{
															
 
																 	PAUSE,
															
 
																 	NOP,
															
 
																 	SYNC,
															
--- a/tests/microbenchs/tasks_size_overhead.c
+++ b/tests/microbenchs/tasks_size_overhead.c
@@ -228,6 +228,8 @@ int main(int argc, char **argv)
 
																 		goto error;
															
 
																 	}
															
 
																+	if (mincpus <= 0)
															
 
																+		mincpus = 1;
															
 
																 	/* For each number of cpus, benchmark */
															
 
																 	for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep)
															
 
																 	{
															
--- a/tests/perfmodels/regression_based_memset.c
+++ b/tests/perfmodels/regression_based_memset.c
@@ -213,7 +213,7 @@ static int bench_energy(int workerid, int where, enum starpu_worker_archtype arc
 
																 		if ( (retval = starpu_energy_start(workerid, archtype)) != 0)
															
 
																 		{
															
 
																 			starpu_data_unregister(handle);
															
 
																-			_STARPU_DISP("Energy measurement not supported for archtype %d\n", archtype);
															
 
																+			_STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype));
															
 
																 			return -1;
															
 
																 		}
															
@@ -328,6 +328,9 @@ int main(int argc, char **argv)
 
																 	starpu_conf_init(&conf);
															
 
																 	/* Use a scheduler which doesn't choose the implementation */
															
 
																+#ifdef STARPU_HAVE_UNSETENV
															
 
																+	unsetenv("STARPU_SCHED");
															
 
																+#endif
															
 
																 	conf.sched_policy_name = "eager";
															
 
																 	conf.calibrate = 1;
															
@@ -345,15 +348,19 @@ int main(int argc, char **argv)
 
																 	{
															
 
																 		memset_cl.cpu_funcs[1] = NULL;
															
 
																 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl);
															
 
																+#ifdef STARPU_HAVE_UNSETENV
															
 
																 		memset_cl.cpu_funcs[1] = memset_cpu;
															
 
																 		memset_cl.cpu_funcs[0] = NULL;
															
 
																 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl);
															
 
																+#endif
															
 
																 		nl_memset_cl.cpu_funcs[1] = NULL;
															
 
																 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl);
															
 
																+#ifdef STARPU_HAVE_UNSETENV
															
 
																 		nl_memset_cl.cpu_funcs[1] = memset_cpu;
															
 
																 		nl_memset_cl.cpu_funcs[0] = NULL;
															
 
																 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl);
															
 
																+#endif
															
 
																 	}
															
 
																 	for (i = 0; i < starpu_cuda_worker_get_count(); i++)
															
--- a/tools/dev/checker/starpu_check_copyright.sh
+++ b/tools/dev/checker/starpu_check_copyright.sh