Explorar el Código

Merge branch 'fpga' of gitlab.inria.fr:starpu/starpu into fpga

Samuel Thibault hace 4 años
padre
commit
0465129baa
Se han modificado 100 ficheros con 4541 adiciones y 1304 borrados
  1. 4 0
      Makefile.am
  2. 144 62
      configure.ac
  3. 2 2
      contrib/ci.inria.fr/disabled/Jenkinsfile-basic
  4. 1 1
      contrib/ci.inria.fr/disabled/Jenkinsfile-windows
  5. 2 2
      contrib/ci.inria.fr/job-1-check-windows.bat
  6. 1 0
      contrib/ci.inria.fr/job-1-check.sh
  7. 275 68
      doc/doxygen/chapters/400_python.doxy
  8. 1 1
      doc/doxygen/chapters/470_simgrid.doxy
  9. BIN
      doc/doxygen/chapters/images/starpu_log.png
  10. 1854 0
      doc/doxygen/chapters/images/starpu_log_arr.eps
  11. BIN
      doc/doxygen/chapters/images/starpu_log_arr.png
  12. 339 316
      doc/doxygen/chapters/images/starpu_log.eps
  13. BIN
      doc/doxygen/chapters/images/starpu_log_list.png
  14. 1 1
      doc/doxygen/refman.tex
  15. 0 1
      doc/doxygen_dev/refman.tex
  16. 2 2
      examples/Makefile.am
  17. 2 2
      examples/cpp/add_vectors_interface.cpp
  18. 2 1
      examples/tag_example/tag_example.c
  19. 3 3
      include/fstarpu_mod.f90
  20. 2 0
      include/starpu_config.h.in
  21. 2 3
      include/starpu_scheduler.h
  22. 1 1
      libstarpu-mic.pc.in
  23. 1 1
      libstarpu.pc.in
  24. 12 0
      m4/libs.m4
  25. 3 0
      mpi/examples/Makefile.am
  26. 2 2
      mpi/examples/native_fortran/nf_mm_task_build.f90
  27. 0 1
      mpi/src/mpi/starpu_mpi_mpi.c
  28. 0 1
      mpi/src/starpu_mpi.c
  29. 3 0
      mpi/tests/Makefile.am
  30. 1 1
      socl/src/init.c
  31. 5 3
      src/Makefile.am
  32. 4 2
      src/common/rbtree_i.h
  33. 14 2
      src/common/thread.c
  34. 2 0
      src/common/utils.h
  35. 2 1
      src/core/dependencies/cg.c
  36. 15 20
      src/core/jobs.c
  37. 2 4
      src/core/jobs.h
  38. 15 12
      src/core/perfmodel/energy_model.c
  39. 4 3
      src/core/perfmodel/perfmodel_history.c
  40. 1 1
      src/core/perfmodel/perfmodel_print.c
  41. 7 14
      src/core/sched_policy.c
  42. 63 10
      src/core/simgrid.c
  43. 3 0
      src/core/simgrid.h
  44. 1 1
      src/core/task.c
  45. 3 3
      src/core/topology.c
  46. 5 4
      src/core/workers.c
  47. 5 5
      src/core/workers.h
  48. 4 2
      src/datawizard/memalloc.c
  49. 2 1
      src/datawizard/memory_nodes.c
  50. 5 3
      src/debug/traces/starpu_fxt.c
  51. 4 2
      src/drivers/cpu/driver_cpu.c
  52. 4 2
      src/drivers/cuda/driver_cuda_init.c
  53. 2 1
      src/drivers/disk/driver_disk.c
  54. 4 2
      src/drivers/mic/driver_mic_init.c
  55. 4 2
      src/drivers/mpi/driver_mpi_init.c
  56. 4 2
      src/drivers/opencl/driver_opencl_init.c
  57. 2 0
      src/profiling/profiling.c
  58. 6 3
      src/sched_policies/component_heteroprio.c
  59. 4 4
      src/sched_policies/component_worker.c
  60. 5 0
      src/sched_policies/helper_mct.c
  61. 2 1
      src/sched_policies/work_stealing_policy.c
  62. 6 4
      src/util/starpu_data_cpy.c
  63. 1 1
      starpu-1.0-mic.pc.in
  64. 1 1
      starpu-1.0.pc.in
  65. 1 1
      starpu-1.1.pc.in
  66. 1 1
      starpu-1.2.pc.in
  67. 1 1
      starpu-1.3.pc.in
  68. 2 2
      starpufft/src/starpufft-double.h
  69. 2 2
      starpufft/src/starpufft-float.h
  70. 2 2
      starpufft/src/starpufftx.c
  71. 21 0
      starpupy/Makefile.am
  72. 43 0
      starpupy/examples/Makefile.am
  73. 59 0
      starpupy/examples/execute.sh.in
  74. 9 9
      starpupy/tests/starpu_py.py
  75. 19 0
      starpupy/examples/starpu_py.sh
  76. 40 0
      starpupy/examples/starpu_py_np.py
  77. 3 7
      starpupy/src/starpu/delay.py
  78. 350 0
      starpupy/examples/starpu_py_parallel.py
  79. 19 0
      starpupy/examples/starpu_py_parallel.sh
  80. 63 0
      starpupy/src/Makefile.am
  81. 2 1
      starpupy/src/starpu/__init__.py
  82. 29 0
      starpupy/src/delay.py
  83. 63 0
      starpupy/src/intermedia.py
  84. 324 0
      starpupy/src/joblib.py
  85. 23 0
      starpupy/src/setup.cfg.in
  86. 40 0
      starpupy/src/setup.py.in
  87. 0 13
      starpupy/src/starpu/Makefile
  88. 0 147
      starpupy/src/starpu/joblib.py
  89. 0 416
      starpupy/src/starpu/starpu_task_wrapper.c
  90. 536 0
      starpupy/src/starpu_task_wrapper.c
  91. 0 6
      starpupy/tests/Makefile
  92. 0 101
      starpupy/tests/starpu_py_parallel.py
  93. 2 2
      tests/Makefile.am
  94. 2 1
      tests/datawizard/bcsr.c
  95. 1 2
      tests/datawizard/interfaces/test_interfaces.c
  96. 1 0
      tests/main/starpu_worker_exists.c
  97. 2 1
      tests/microbenchs/bandwidth.c
  98. 2 0
      tests/microbenchs/tasks_size_overhead.c
  99. 8 1
      tests/perfmodels/regression_based_memset.c
  100. 0 0
      tools/dev/checker/starpu_check_copyright.sh

+ 4 - 0
Makefile.am

@@ -53,6 +53,10 @@ if STARPU_BUILD_STARPURM
 SUBDIRS += starpurm
 endif
 
+if STARPU_BUILD_STARPUPY
+SUBDIRS += starpupy
+endif
+
 if STARPU_BUILD_SC_HYPERVISOR
 SUBDIRS += sc_hypervisor
 endif

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 144 - 62
configure.ac


+ 2 - 2
contrib/ci.inria.fr/disabled/Jenkinsfile-basic

@@ -34,7 +34,7 @@ pipeline
 		{
 			steps
 			{
-				node('autotools')
+				node('autotools2')
 				{
 					checkout scm
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
@@ -62,7 +62,7 @@ pipeline
 			{
 				script
 				{
-					labelToSelect = 'unix'
+					labelToSelect = 'unix2'
 					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
 					{
 						node -> node.getLabelString().contains(labelToSelect) ? node.name : null

+ 1 - 1
contrib/ci.inria.fr/disabled/Jenkinsfile-windows

@@ -34,7 +34,7 @@ pipeline
 		{
 			steps
 			{
-				node('autotools')
+				node('autotools2')
 				{
 					checkout scm
 					sh 'contrib/ci.inria.fr/job-0-tarball.sh'

+ 2 - 2
contrib/ci.inria.fr/job-1-check-windows.bat

@@ -14,9 +14,9 @@ REM
 REM See the GNU Lesser General Public License in COPYING.LGPL for more details.
 REM
 
-set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin
+set PATH=%PATH%;C:\MinGW\msys\1.0\bin;c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
 sh -c "./job-1-build-windows.sh"
-set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;
+set PATH=C:\Windows\SysWOW64;C:\Program Files (x86)\Mozilla Firefox;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\SysWOW64;C:\Program Files\Java\jre7\bin;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32
 set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0
 
 cd starpu_install

+ 1 - 0
contrib/ci.inria.fr/job-1-check.sh

@@ -41,6 +41,7 @@ env > $PWD/env
 
 test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
 tar xfz ../$tarball
+touch --date="last hour" $(find $basename)
 cd $basename
 mkdir build
 cd build

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 275 - 68
doc/doxygen/chapters/400_python.doxy


+ 1 - 1
doc/doxygen/chapters/470_simgrid.doxy

@@ -23,7 +23,7 @@
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
-3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
+3.26. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
 Other versions may have compatibility issues. 3.17 notably does not build at
 all. MPI simulation does not work with version 3.22.
 

BIN
doc/doxygen/chapters/images/starpu_log.png


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 1854 - 0
doc/doxygen/chapters/images/starpu_log_arr.eps


BIN
doc/doxygen/chapters/images/starpu_log_arr.png


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 339 - 316
doc/doxygen/chapters/images/starpu_log.eps


BIN
doc/doxygen/chapters/images/starpu_log_list.png


+ 1 - 1
doc/doxygen/refman.tex

@@ -138,7 +138,7 @@ Documentation License”.
 
 \part{StarPU Extensions}
 
-\chapter{PythonInterface}
+\chapter{Python Interface}
 \label{PythonInterface}
 \hypertarget{PythonInterface}{}
 \input{PythonInterface}

+ 0 - 1
doc/doxygen_dev/refman.tex

@@ -148,7 +148,6 @@ Documentation License”.
 \input{starpu__data__cpy_8h}
 \input{starpu__debug__helpers_8h}
 \input{starpu__fxt_8h}
-\input{starpu__parameters_8h}
 \input{starpu__spinlock_8h}
 \input{starpu__task__insert__utils_8h}
 \input{tags_8h}

+ 2 - 2
examples/Makefile.am

@@ -20,8 +20,8 @@ include $(top_srcdir)/starpu.mk
 
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
-AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
-AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 2 - 2
examples/cpp/add_vectors_interface.cpp

@@ -61,9 +61,9 @@ class my_allocator
 		node = a.get_node();
 	}
 
-	explicit my_allocator(const unsigned node)
+	explicit my_allocator(const unsigned thenode)
 	{
-		this->node = node;
+		this->node = thenode;
 	}
 
 	pointer allocate(size_type n, const void * = 0)

+ 2 - 1
examples/tag_example/tag_example.c

@@ -223,7 +223,8 @@ int main(int argc, char **argv)
 	int ret;
 
 #ifdef STARPU_HAVE_HELGRIND_H
-	if (RUNNING_ON_VALGRIND) {
+	if (RUNNING_ON_VALGRIND)
+	{
 		ni /= 2;
 		nj /= 2;
 		nk /= 2;

+ 3 - 3
include/fstarpu_mod.f90

@@ -1054,7 +1054,7 @@ module fstarpu_mod
                 end subroutine fstarpu_vector_data_register
 
                 ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
-                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
+                subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) &
                                 bind(C,name="starpu_vector_ptr_register")
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         type(c_ptr), intent(out) :: dh
@@ -1092,7 +1092,7 @@ module fstarpu_mod
                 end subroutine fstarpu_variable_data_register
 
                 ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
-                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset, ld) &
+                subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) &
                                 bind(C,name="starpu_variable_ptr_register")
                         use iso_c_binding, only: c_ptr, c_int, c_size_t
                         type(c_ptr), intent(out) :: dh
@@ -1758,7 +1758,7 @@ module fstarpu_mod
                 end function fstarpu_data_descr_array_alloc
 
                 ! struct starpu_data_descr *fstarpu_data_descr_alloc(void);
-                function fstarpu_data_descr_alloc (nb) bind(C)
+                function fstarpu_data_descr_alloc () bind(C)
                         use iso_c_binding, only: c_ptr
                         type(c_ptr) :: fstarpu_data_descr_alloc
                 end function fstarpu_data_descr_alloc

+ 2 - 0
include/starpu_config.h.in

@@ -331,4 +331,6 @@ typedef ssize_t starpu_ssize_t;
 #undef STARPU_HAVE_STATEMENT_EXPRESSIONS
 #undef STARPU_PERF_MODEL_DIR
 
+#undef STARPU_PYTHON_HAVE_NUMPY
+
 #endif

+ 2 - 3
include/starpu_scheduler.h

@@ -294,9 +294,8 @@ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_t
 /**
    The scheduling policy may put tasks directly into a worker’s local
    queue so that it is not always necessary to create its own queue
-   when the local queue is sufficient. If \p back is not 0, \p task is
-   put at the back of the queue where the worker will pop tasks first.
-   Setting \p back to 0 therefore ensures a FIFO ordering.
+   when the local queue is sufficient. \p back is ignored: the task priority is
+   used to order tasks in this queue.
 */
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
 

+ 1 - 1
libstarpu-mic.pc.in

@@ -22,6 +22,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
libstarpu.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 12 - 0
m4/libs.m4

@@ -202,3 +202,15 @@ AC_DEFUN([IS_SUPPORTED_FLAG],
 	IS_SUPPORTED_FFLAG($1)
 	IS_SUPPORTED_FCFLAG($1)
 ])
+
+# AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found])
+# Check if the given python module is available
+AC_DEFUN([AC_PYTHON_MODULE],
+[
+	echo "import $1" | $PYTHON - 2>/dev/null
+	if test $? -ne 0 ; then
+	   	$3
+	else
+		$2
+	fi
+])

+ 3 - 0
mpi/examples/Makefile.am

@@ -108,6 +108,9 @@ endif
 endif
 
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS)

+ 2 - 2
mpi/examples/native_fortran/nf_mm_task_build.f90

@@ -169,7 +169,7 @@ program nf_mm
         do b_col=1,NB
            do b_row=1,NB
               task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
-                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_A(b_row), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 C_NULL_PTR /))
@@ -177,7 +177,7 @@ program nf_mm
                  ret = fstarpu_task_submit(task)
               endif
               call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
-                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_A(b_row), &
                                                 FSTARPU_R,  dh_B(b_col), &
                                                 FSTARPU_RW, dh_C(b_row,b_col), &
                                                 C_NULL_PTR /))

+ 0 - 1
mpi/src/mpi/starpu_mpi_mpi.c

@@ -41,7 +41,6 @@
 #include <core/simgrid.h>
 #include <core/task.h>
 #include <core/topology.h>
-#include <core/workers.h>
 
 #ifdef STARPU_USE_MPI_MPI
 

+ 0 - 1
mpi/src/starpu_mpi.c

@@ -33,7 +33,6 @@
 #include <core/simgrid.h>
 #include <core/task.h>
 #include <core/topology.h>
-#include <core/workers.h>
 
 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
 {

+ 3 - 0
mpi/tests/Makefile.am

@@ -84,6 +84,9 @@ endif
 endif
 
 AM_CFLAGS += -Wno-unused
+AM_CXXFLAGS += -Wno-unused
+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(STARPU_CUDA_LDFLAGS)

+ 1 - 1
socl/src/init.c

@@ -16,7 +16,7 @@
  */
 
 #include <stdlib.h>
-#include "../src/core/workers.h"
+#include "../src/common/utils.h"
 #include "socl.h"
 #include "gc.h"
 #include "mem_objects.h"

+ 5 - 3
src/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 # Copyright (C) 2013       Simon Archipoff
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -21,6 +21,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU -DSTARPU_DATADIR='"$(da
 AM_CPPFLAGS += $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS += $(FXT_CFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(STARPU_RCCE_CPPFLAGS)
 LIBS += -lm $(LIBSTARPU_LDFLAGS)
+if STARPU_USE_MPI_MASTER_SLAVE
+LIBS += $(MPICC_LDFLAGS)
+endif
 
 SUBDIRS =
 
@@ -60,8 +63,7 @@ endif STARPU_HAVE_WINDOWS
 
 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined									\
-  -version-info $(libstarpu_so_version)
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpu_so_version)
 
 if STARPU_HAVE_DARWIN
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS += \

+ 4 - 2
src/common/rbtree_i.h

@@ -44,7 +44,8 @@
  * architectures, as long as the nodes aren't embedded in structures with
  * special alignment constraints such as member packing.
  */
-struct starpu_rbtree_node {
+struct starpu_rbtree_node
+{
     uintptr_t parent;
     struct starpu_rbtree_node *children[2];
 };
@@ -52,7 +53,8 @@ struct starpu_rbtree_node {
 /**
  * Red-black tree structure.
  */
-struct starpu_rbtree {
+struct starpu_rbtree
+{
     struct starpu_rbtree_node *root;
 };
 

+ 14 - 2
src/common/thread.c

@@ -96,14 +96,22 @@ int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const s
 	if (attr && attr->stacksize)
 		sg_actor_set_stacksize(*thread, attr->stacksize);
 #endif
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(*thread, tsd);
+#else
 	sg_actor_data_set(*thread, tsd);
+#endif
 	sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args);
 #else
 	*thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args);
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(*thread, tsd);
+#else
 	sg_actor_data_set(*thread, tsd);
 #endif
 #endif
+#endif
 #ifndef HAVE_SG_ACTOR_SET_STACKSIZE
 	if (attr && attr->stacksize)
 		_starpu_simgrid_set_stack_size(_starpu_default_stack_size);
@@ -328,7 +336,9 @@ extern void *smpi_process_get_user_data();
 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 {
 	void **array;
-#ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_GET_DATA
+	array = sg_actor_get_data(sg_actor_self());
+#elif defined(HAVE_SG_ACTOR_DATA)
 	array = sg_actor_data(sg_actor_self());
 #else
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)
@@ -355,7 +365,9 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 {
 	void **array;
-#ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_GET_DATA
+	array = sg_actor_get_data(sg_actor_self());
+#elif defined(HAVE_SG_ACTOR_DATA)
 	array = sg_actor_data(sg_actor_self());
 #else
 #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)

+ 2 - 0
src/common/utils.h

@@ -183,4 +183,6 @@ int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
 void _starpu_util_init(void);
 
+enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
+
 #endif // __COMMON_UTILS_H__

+ 2 - 1
src/core/dependencies/cg.c

@@ -221,7 +221,8 @@ void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg
 					tag_successors->ndeps_completed = 0;
 					/* This releases the lock */
 					_starpu_tag_set_ready(tag);
-				} else
+				}
+				else
 					_starpu_spin_unlock(&tag->lock);
 				break;
 			}

+ 15 - 20
src/core/jobs.c

@@ -347,19 +347,10 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 				_starpu_spin_unlock(&handle->header_lock);
 		}
 	}
+
 	/* Check nowhere before releasing the sequential consistency (which may
 	 * unregister the handle and free its switch_cl, and thus task->cl here.  */
 	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
-	/* If this is a continuation, we do not release task dependencies now.
-	 * Task dependencies will be released only when the continued task
-	 * fully completes */
-	if (!continuation)
-	{
-		/* Tell other tasks that we don't exist any more, thus no need for
-		 * implicit dependencies any more.  */
-		_starpu_release_task_enforce_sequential_consistency(j);
-	}
-
 	/* If the job was executed on a combined worker there is no need for the
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * data as it's not linked to an actual worker */
@@ -395,6 +386,16 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	if (!callback && task->cl)
 		callback = task->cl->callback_func;
 
+	/* If this is a continuation, we do not release task dependencies now.
+	 * Task dependencies will be released only when the continued task
+	 * fully completes */
+	if (!continuation)
+	{
+		/* Tell other tasks that we don't exist any more, thus no need for
+		 * implicit dependencies any more.  */
+		_starpu_release_task_enforce_sequential_consistency(j);
+	}
+
 	/* Task does not have a cl, but has explicit data dependencies, we need
 	 * to tell them that we will not exist any more before notifying the
 	 * tasks waiting for us
@@ -764,14 +765,14 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker)
 		}
 	}
 
-	if (!starpu_task_list_empty(&worker->local_tasks))
-		task = starpu_task_list_pop_front(&worker->local_tasks);
+	if (!starpu_task_prio_list_empty(&worker->local_tasks))
+		task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks);
 
 	_starpu_pop_task_end(task);
 	return task;
 }
 
-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio)
+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task)
 {
 	/* Check that the worker is able to execute the task ! */
 	STARPU_ASSERT(task && task->cl);
@@ -814,13 +815,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 	}
 	else
 	{
-#ifdef STARPU_DEVEL
-#warning FIXME use a prio_list
-#endif
-		if (prio)
-			starpu_task_list_push_front(&worker->local_tasks, task);
-		else
-			starpu_task_list_push_back(&worker->local_tasks, task);
+		starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	}
 
 	starpu_wake_worker_locked(worker->workerid);

+ 2 - 4
src/core/jobs.h

@@ -269,10 +269,8 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
 
 /** Put a task into the pool of tasks that are explicitly attributed to the
- * specified worker. If "back" is set, the task is put at the back of the list.
- * Considering the tasks are popped from the back, this value should be 0 to
- * enforce a FIFO ordering. */
-int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int prio);
+ * specified worker. */
+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task);
 
 #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index)
 #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)

+ 15 - 12
src/core/perfmodel/energy_model.c

@@ -56,8 +56,11 @@ static const int N_EVTS = 2;
 
 static int nsockets;
 
-static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d",
-				     "rapl::RAPL_ENERGY_DRAM:cpu=%d"};
+static const char* event_names[] =
+{
+	"rapl::RAPL_ENERGY_PKG:cpu=%d",
+	"rapl::RAPL_ENERGY_DRAM:cpu=%d"
+};
 
 static int add_event(int EventSet, int socket);
 
@@ -66,9 +69,6 @@ static int add_event(int EventSet, int socket);
 /*must be initialized to PAPI_NULL before calling PAPI_create_event*/
 static int EventSet = PAPI_NULL;
 
-/*This is where we store the values we read from the eventset */
-static long long *values;
-
 #endif
 
 static double t1;
@@ -99,9 +99,6 @@ int starpu_energy_start(int workerid, enum starpu_worker_archtype archi)
 
 		nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
 
-		values=calloc(nsockets * N_EVTS,sizeof(long long));
-		STARPU_ASSERT(values);
-
 		if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
 			ERROR_RETURN(retval);
 
@@ -178,6 +175,9 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 	{
 		STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n");
 
+		/*This is where we store the values we read from the eventset */
+		long long values[nsockets*N_EVTS];
+
 		/* Stop counting and store the values into the array */
 		if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK)
 			ERROR_RETURN(retval);
@@ -196,9 +196,6 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 				      delta, t, delta/(t*1.0E-6));
 			}
 		}
-		free(values);
-
-		energy = energy * 0.23 / 1.0e9 / ntasks;
 
 		/*removes all events from a PAPI event set */
 		if ( (retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK)
@@ -242,7 +239,7 @@ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task,
 
 	arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
 
-	starpu_perfmodel_update_history(model, task, arch, cpuid, nimpl, energy);
+	starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks);
 
 	return retval;
 }
@@ -266,6 +263,12 @@ static int add_event(int eventSet, int socket)
 		retval = PAPI_add_named_event(eventSet, buf);
 		if (retval != PAPI_OK)
 		{
+			if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d"))
+			{
+				/* Ok, too bad */
+				_STARPU_DISP("Note: DRAM energy measurement not available\n");
+				return PAPI_OK;
+			}
 			_STARPU_DISP("cannot add event '%s': %d\n", buf, retval);
 			return retval;
 		}

+ 4 - 3
src/core/perfmodel/perfmodel_history.c

@@ -1243,7 +1243,8 @@ void _starpu_initialize_registered_performance_models(void)
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
 
-	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
+	{
 		char name[128];
 		const char *arch = starpu_worker_get_type_as_env_var(archtype);
 		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
@@ -1518,8 +1519,8 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
 	return 0;
 }
 
-int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
-
+int starpu_perfmodel_deinit(struct starpu_perfmodel *model)
+{
 	_starpu_deinitialize_performance_model(model);
 	free(model->state);
 	model->state = NULL;

+ 1 - 1
src/core/perfmodel/perfmodel_print.c

@@ -30,7 +30,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 	ptr = per_arch_model->list;
 
 	if (!parameter && ptr)
-		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us)\tstddev (us)\t\tn\n");
+		fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n");
 
 	while (ptr)
 	{

+ 7 - 14
src/core/sched_policy.c

@@ -372,10 +372,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		}
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 
-		if(task->priority > 0)
-			return _starpu_push_local_task(worker, task, 1);
-		else
-			return _starpu_push_local_task(worker, task, 0);
+		return _starpu_push_local_task(worker, task);
 	}
 	else
 	{
@@ -406,7 +403,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
 			_STARPU_TRACE_JOB_PUSH(alias, alias->priority);
 			worker = _starpu_get_worker_struct(combined_workerid[j]);
-			ret |= _starpu_push_local_task(worker, alias, 0);
+			ret |= _starpu_push_local_task(worker, alias);
 		}
 
 		return ret;
@@ -632,7 +629,8 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 				enum starpu_worker_archtype type;
 				for (type = 0; type < STARPU_NARCH; type++)
 				{
-					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type))
+					{
 						if (config->arch_nodeid[type] >= 0)
 							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
 						break;
@@ -1032,7 +1030,7 @@ pick:
 	}
 
 	task->mf_skip = 1;
-	starpu_task_list_push_back(&worker->local_tasks, task);
+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	goto pick;
 
 profiling:
@@ -1174,16 +1172,11 @@ void _starpu_wait_on_sched_event(void)
 	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 }
 
-/* The scheduling policy may put tasks directly into a worker's local queue so
- * that it is not always necessary to create its own queue when the local queue
- * is sufficient. If "back" not null, the task is put at the back of the queue
- * where the worker will pop tasks first. Setting "back" to 0 therefore ensures
- * a FIFO ordering. */
-int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
+int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED)
 {
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 
-	return  _starpu_push_local_task(worker, task, prio);
+	return  _starpu_push_local_task(worker, task);
 }
 
 void _starpu_print_idle_time()

+ 63 - 10
src/core/simgrid.c

@@ -357,11 +357,16 @@ void _starpu_start_simgrid(int *argc, char **argv)
 	int limit_bandwidth = starpu_get_env_number("STARPU_LIMIT_BANDWIDTH");
 	if (limit_bandwidth >= 0)
 	{
-#ifdef HAVE_SG_LINK_BANDWIDTH_SET
+#if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH)
 		sg_link_t *links = sg_link_list();
 		int count = sg_link_count(), i;
-		for (i = 0; i < count; i++) {
+		for (i = 0; i < count; i++)
+		{
+#ifdef HAVE_SG_LINK_SET_BANDWIDTH
+			sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.);
+#else
 			sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.);
+#endif
 		}
 #else
 		_STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth);
@@ -492,7 +497,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 
 #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
 		sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
+#ifdef HAVE_SG_ACTOR_SET_DATA
+		sg_actor_set_data(actor, tsd);
+#else
 		sg_actor_data_set(actor, tsd);
+#endif
 #else
 		MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
 #endif
@@ -519,7 +528,11 @@ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv
 		void **tsd;
 		_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+		sg_actor_set_data(sg_actor_self(), tsd);
+#else
 		sg_actor_data_set(sg_actor_self(), tsd);
+#endif
 #else
 		smpi_process_set_user_data(tsd);
 #endif
@@ -735,6 +748,9 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 		 * to be able to easily check scheduling robustness */
 	}
 
+#ifdef HAVE_SG_HOST_GET_SPEED
+	flops = length/1000000.0*sg_host_get_speed(sg_host_self());
+#else
 #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
 #  if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
 	flops = length/1000000.0*sg_host_speed(sg_host_self());
@@ -746,6 +762,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 #else
 	flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
 #endif
+#endif
 
 #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
 	simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
@@ -1210,14 +1227,22 @@ starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t
 	_STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
 #ifdef HAVE_SG_ACTOR_INIT
 	actor = sg_actor_init(name, host);
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(actor, tsd);
+#else
 	sg_actor_data_set(actor, tsd);
+#endif
 	sg_actor_start(actor, code, argc, argv);
 #else
 	actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
 #ifdef HAVE_SG_ACTOR_DATA
+#ifdef HAVE_SG_ACTOR_SET_DATA
+	sg_actor_set_data(actor, tsd);
+#else
 	sg_actor_data_set(actor, tsd);
 #endif
 #endif
+#endif
 	return actor;
 }
 
@@ -1251,7 +1276,7 @@ starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node)
 
 void _starpu_simgrid_count_ngpus(void)
 {
-#if (defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
+#if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
 	unsigned src, dst;
 	starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
 
@@ -1261,7 +1286,7 @@ void _starpu_simgrid_count_ngpus(void)
 		{
 			int busid;
 			starpu_sg_host_t srchost, dsthost;
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
 			SD_link_t *route;
 #else
@@ -1281,8 +1306,12 @@ void _starpu_simgrid_count_ngpus(void)
 
 			srchost = _starpu_simgrid_get_memnode_host(src);
 			dsthost = _starpu_simgrid_get_memnode_host(dst);
-#if defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE)  || defined(sg_host_route)
+#ifdef HAVE_SG_HOST_GET_ROUTE
+			sg_host_get_route(srchost, dsthost, route_dynar);
+#else
 			sg_host_route(srchost, dsthost, route_dynar);
+#endif
 			routesize = xbt_dynar_length(route_dynar);
 			route = xbt_dynar_to_array(route_dynar);
 #else
@@ -1293,7 +1322,13 @@ void _starpu_simgrid_count_ngpus(void)
 			/* If it goes through "Host", do not care, there is no
 			 * direct transfer support */
 			for (i = 0; i < routesize; i++)
-				if (!strcmp(sg_link_name(route[i]), "Host"))
+				if (
+#ifdef HAVE_SG_LINK_GET_NAME
+					!strcmp(sg_link_get_name(route[i]), "Host")
+#else
+					!strcmp(sg_link_name(route[i]), "Host")
+#endif
+					)
 					break;
 			if (i < routesize)
 				continue;
@@ -1302,7 +1337,11 @@ void _starpu_simgrid_count_ngpus(void)
 			through = -1;
 			for (i = 0; i < routesize; i++)
 			{
+#ifdef HAVE_SG_LINK_GET_NAME
+				name = sg_link_get_name(route[i]);
+#else
 				name = sg_link_name(route[i]);
+#endif
 				size_t len = strlen(name);
 				if (!strcmp(" through", name+len-8))
 					through = i;
@@ -1315,7 +1354,11 @@ void _starpu_simgrid_count_ngpus(void)
 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
 				continue;
 			}
+#ifdef HAVE_SG_LINK_GET_NAME
+			name = sg_link_get_name(route[through]);
+#else
 			name = sg_link_name(route[through]);
+#endif
 
 			/*
 			 * count how many direct routes go through it between
@@ -1339,10 +1382,14 @@ void _starpu_simgrid_count_ngpus(void)
 
 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
 				int routesize2;
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
 				SD_link_t *route2;
+#ifdef HAVE_SG_HOST_GET_ROUTE
+				sg_host_get_route(srchost2, ramhost, route_dynar2);
+#else
 				sg_host_route(srchost2, ramhost, route_dynar2);
+#endif
 				routesize2 = xbt_dynar_length(route_dynar2);
 				route2 = xbt_dynar_to_array(route_dynar2);
 #else
@@ -1351,19 +1398,25 @@ void _starpu_simgrid_count_ngpus(void)
 #endif
 
 				for (i = 0; i < routesize2; i++)
-					if (!strcmp(name, sg_link_name(route2[i])))
+					if (
+#ifdef HAVE_SG_LINK_GET_NAME
+						!strcmp(name, sg_link_get_name(route2[i]))
+#else
+						!strcmp(name, sg_link_name(route2[i]))
+#endif
+						)
 					{
 						/* This GPU goes through this PCI bridge to access RAM */
 						ngpus++;
 						break;
 					}
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 				free(route2);
 #endif
 			}
 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
 			starpu_bus_set_ngpus(busid, ngpus);
-#if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 			free(route);
 #endif
 		}

+ 3 - 0
src/core/simgrid.h

@@ -24,6 +24,9 @@
 extern "C"
 {
 #endif
+
+/* Note: when changing something here, update the include list in configure.ac
+ * in the part that tries to enable stdc++11 */
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #include <simgrid/msg.h>

+ 1 - 1
src/core/task.c

@@ -1084,7 +1084,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
 	struct _starpu_worker *worker;
 	worker = _starpu_get_worker_struct(workerid);
-	starpu_task_list_push_back(&worker->local_tasks, task);
+	starpu_task_prio_list_push_back(&worker->local_tasks, task);
 	starpu_wake_worker_locked(worker->workerid);
 
 	_starpu_profiling_set_task_push_end_time(task);

+ 3 - 3
src/core/topology.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2009-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  * Copyright (C) 2013       Thibaut Lambert
  * Copyright (C) 2016       Uppsala University
  *
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  * Discover the topology of the machine
  */
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 						int *current, int *workers_gpuid,
 						const char *varname, unsigned nhwgpus,
@@ -1817,7 +1817,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
 	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
 	for (i = 0; i < nopencl; i++)
-		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
+		topology->nworker[STARPU_OPENCL_WORKER][i] = 1;
 	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
 
 	_starpu_initialize_workers_opencl_gpuid(config);

+ 5 - 4
src/core/workers.c

@@ -688,7 +688,7 @@ void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machin
 	/* memory_node initialized by topology.c */
 	STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
-	starpu_task_list_init(&workerarg->local_tasks);
+	starpu_task_prio_list_init(&workerarg->local_tasks);
 	_starpu_ctx_change_list_init(&workerarg->ctx_change_list);
 	workerarg->local_ordered_tasks = NULL;
 	workerarg->local_ordered_tasks_size = 0;
@@ -1039,7 +1039,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
         if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
         {
                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
-                struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
+                struct _starpu_worker * worker_zero STARPU_ATTRIBUTE_UNUSED = &worker_set_zero->workers[0];
                 STARPU_PTHREAD_CREATE_ON(
                                 "zero",
                                 &worker_set_zero->worker_thread,
@@ -1445,7 +1445,8 @@ int _starpu_get_catch_signals(void)
 	return _starpu_config.conf.catch_signals;
 }
 
-void starpu_drivers_preinit(void) {
+void starpu_drivers_preinit(void)
+{
 	_starpu_cpu_preinit();
 	_starpu_cuda_preinit();
 	_starpu_opencl_preinit();
@@ -1828,7 +1829,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 		}
 
 out:
-		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
+		STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks));
 		for (n = 0; n < worker->local_ordered_tasks_size; n++)
 			STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
 		_starpu_sched_ctx_list_delete(&worker->sched_ctx_list);

+ 5 - 5
src/core/workers.h

@@ -61,8 +61,6 @@
 
 #define STARPU_MAX_PIPELINE 4
 
-enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
-
 struct _starpu_ctx_change_list;
 
 /** This is initialized by _starpu_worker_init() */
@@ -125,7 +123,7 @@ LIST_TYPE(_starpu_worker,
 	     * subsequent processing once worker completes the ongoing scheduling
 	     * operation */
 	struct _starpu_ctx_change_list ctx_change_list;
-	struct starpu_task_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
+	struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue */
 	struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
 	unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */
 	unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */
@@ -427,7 +425,8 @@ struct _starpu_machine_config
 };
 
 /** Provides information for a device driver */
-struct starpu_driver_info {
+struct starpu_driver_info
+{
 	const char *name_upper;	/**< Name of worker type in upper case */
 	const char *name_var;	/**< Name of worker type for environment variables */
 	const char *name_lower;	/**< Name of worker type in lower case */
@@ -441,7 +440,8 @@ extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
 
 /** Provides information for a memory node driver */
-struct starpu_memory_driver_info {
+struct starpu_memory_driver_info
+{
 	const char *name_upper;	/**< Name of memory in upper case */
 	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
 };

+ 4 - 2
src/datawizard/memalloc.c

@@ -1513,7 +1513,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			/* First try to flush data explicitly marked for freeing */
 			size_t freed = flush_memchunk_cache(dst_node, reclaim);
 
-			if (freed >= reclaim) {
+			if (freed >= reclaim)
+			{
 				/* That freed enough data, retry allocating */
 				prefetch_out_of_memory[dst_node] = 0;
 				continue;
@@ -1550,7 +1551,8 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
 			prefetch_out_of_memory[dst_node] = 0;
-		} else
+		}
+		else
 			prefetch_out_of_memory[dst_node] = 0;
 	}
 	while((allocated_memory == -ENOMEM) && attempts++ < 2);

+ 2 - 1
src/datawizard/memory_nodes.c

@@ -180,7 +180,8 @@ int starpu_memory_node_get_devid(unsigned node)
 	return _starpu_descr.devid[node];
 }
 
-enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
+enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind)
+{
 	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
 	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
 	return archtype;

+ 5 - 3
src/debug/traces/starpu_fxt.c

@@ -193,7 +193,8 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		fprintf(tasks_file, "Name: %s\n", task->name);
 	if (task->model_name)
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
-	if (task->file) {
+	if (task->file)
+	{
 		fprintf(tasks_file, "File: %s\n", task->file);
 		fprintf(tasks_file, "Line: %d\n", task->line);
 	}
@@ -4129,7 +4130,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 
 	if (out_paje_file && !options->no_bus)
 	{
-		while (!_starpu_communication_list_empty(&communication_list)) {
+		while (!_starpu_communication_list_empty(&communication_list))
+		{
 			struct _starpu_communication*itor;
 			itor = _starpu_communication_list_pop_front(&communication_list);
 
@@ -4423,7 +4425,7 @@ void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options)
 			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
 
 		/* FUT_SETUP_CODE is the event with the maximal value */
-		number_events = calloc(FUT_SETUP_CODE+1, sizeof(uint64_t));
+		_STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t));
 	}
 	else
 		number_events_file = NULL;

+ 4 - 2
src/drivers/cpu/driver_cpu.c

@@ -60,7 +60,8 @@
 #include <windows.h>
 #endif
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "CPU",
 	.name_var = "CPU",
 	.name_lower = "cpu",
@@ -68,7 +69,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 0.5f,
 };
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "NUMA",
 	.worker_archtype = STARPU_CPU_WORKER,
 };

+ 4 - 2
src/drivers/cuda/driver_cuda_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <drivers/cuda/driver_cuda.h>
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "CUDA",
 	.name_var = "CUDA",
 	.name_lower = "cuda",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 13.33f,
 };
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "CUDA",
 	.worker_archtype = STARPU_CUDA_WORKER,
 };

+ 2 - 1
src/drivers/disk/driver_disk.c

@@ -23,7 +23,8 @@
 #include <datawizard/coherency.h>
 #include <datawizard/memory_nodes.h>
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "Disk",
 	.worker_archtype = (enum starpu_worker_archtype) -1,
 };

+ 4 - 2
src/drivers/mic/driver_mic_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <drivers/mic/driver_mic_source.h>
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "MIC",
 	.name_var = "MIC",
 	.name_lower = "mic",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 0.5f,
 };
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "MIC",
 	.worker_archtype = STARPU_MIC_WORKER,
 };

+ 4 - 2
src/drivers/mpi/driver_mpi_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <drivers/mpi/driver_mpi_source.h>
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "MPI_MS",
 	.name_var = "MPI_MS",
 	.name_lower = "mpi_ms",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 1.0f,
 };
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "MPI_MS",
 	.worker_archtype = STARPU_MPI_MS_WORKER,
 };

+ 4 - 2
src/drivers/opencl/driver_opencl_init.c

@@ -17,7 +17,8 @@
 #include <core/workers.h>
 #include <drivers/opencl/driver_opencl.h>
 
-static struct starpu_driver_info driver_info = {
+static struct starpu_driver_info driver_info =
+{
 	.name_upper = "OpenCL",
 	.name_var = "OPENCL",
 	.name_lower = "opencl",
@@ -25,7 +26,8 @@ static struct starpu_driver_info driver_info = {
 	.alpha = 12.22f,
 };
 
-static struct starpu_memory_driver_info memory_driver_info = {
+static struct starpu_memory_driver_info memory_driver_info =
+{
 	.name_upper = "OpenCL",
 	.worker_archtype = STARPU_OPENCL_WORKER,
 };

+ 2 - 0
src/profiling/profiling.c

@@ -201,11 +201,13 @@ void _starpu_profiling_papi_task_start_counters(struct starpu_task *task)
 		for(i=0; i<papi_nevents; i++)
 		{
 			int ret = PAPI_add_event(profiling_info->papi_event_set, papi_events[i]);
+#ifdef PAPI_ECMP_DISABLED
 			if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable)
 			{
 				_STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n");
 				warned_component_unavailable = 1;
 			}
+#endif
 			profiling_info->papi_values[i]=0;
 		}
 		PAPI_reset(profiling_info->papi_event_set);

+ 6 - 3
src/sched_policies/component_heteroprio.c

@@ -434,9 +434,12 @@ static int heteroprio_push_task(struct starpu_sched_component * component, struc
 			/* Didn't find it, add one */
 			data->naccel++;
 
-			float *newaccel = malloc(data->naccel * sizeof(*newaccel));
-			struct _starpu_prio_deque **newbuckets = malloc(data->naccel * sizeof(*newbuckets));
-			struct _starpu_prio_deque *newbucket = malloc(sizeof(*newbucket));
+			float *newaccel;
+			_STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel));
+			struct _starpu_prio_deque **newbuckets;
+			_STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets));
+			struct _starpu_prio_deque *newbucket;
+			_STARPU_MALLOC(newbucket, sizeof(*newbucket));
 			_starpu_prio_deque_init(newbucket);
 			int inserted = 0;
 

+ 4 - 4
src/sched_policies/component_worker.c

@@ -510,11 +510,11 @@ static double simple_worker_estimated_load(struct starpu_sched_component * compo
 	struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
 	int nb_task = 0;
 	STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
-	struct starpu_task_list list = worker->local_tasks;
+	struct starpu_task_prio_list *list = &worker->local_tasks;
 	struct starpu_task * task;
-	for(task = starpu_task_list_front(&list);
-	    task != starpu_task_list_end(&list);
-	    task = starpu_task_list_next(task))
+	for(task = starpu_task_prio_list_begin(list);
+	    task != starpu_task_prio_list_end(list);
+	    task = starpu_task_prio_list_next(list, task))
 		nb_task++;
 	STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
 	struct _starpu_worker_component_data * d = component->data;

+ 5 - 0
src/sched_policies/helper_mct.c

@@ -88,6 +88,11 @@ static double compute_expected_time(double now, double predicted_end, double pre
 
 double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
 {
+	if(isnan(local_energy))
+		/* Energy not calibrated yet, but we cannot do this
+		 * automatically anyway, so ignoring this for now */
+		local_energy = 0.;
+
 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
 	
 	/* min_exp_end_of_task is the minimum end time of the task over all workers */

+ 2 - 1
src/sched_policies/work_stealing_policy.c

@@ -145,7 +145,8 @@ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsi
 		if (!ws->per_worker[workerids[worker]].notask)
 		{
 			if (ws->per_worker[workerids[worker]].busy
-						   || starpu_worker_is_blocked_in_parallel(workerids[worker])) {
+			    || starpu_worker_is_blocked_in_parallel(workerids[worker]))
+			{
 				ntasks = 1;
 				break;
 			}

+ 6 - 4
src/util/starpu_data_cpy.c

@@ -86,7 +86,7 @@ void mp_cpy_kernel(void *descr[], void *cl_arg)
 
 	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
 	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
-	
+
 	void *dst_interface = descr[0];
 	void *src_interface = descr[1];
 
@@ -151,7 +151,7 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
 	unsigned *interface_id;
 	_STARPU_MALLOC(interface_id, sizeof(*interface_id));
-	*interface_id = dst_handle->ops->interfaceid; 
+	*interface_id = dst_handle->ops->interfaceid;
 	task->cl_arg = interface_id;
 	task->cl_arg_size = sizeof(*interface_id);
 	task->cl_arg_free = 1;
@@ -181,7 +181,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
 {
 	_starpu_spin_lock(&src_handle->header_lock);
-	if (src_handle->readonly_dup) {
+	if (src_handle->readonly_dup)
+	{
 		/* Already a ro duplicate, just return it with one more ref */
 		*dst_handle = src_handle->readonly_dup;
 		_starpu_spin_unlock(&src_handle->header_lock);
@@ -190,7 +191,8 @@ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t sr
 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
 		return 0;
 	}
-	if (src_handle->readonly) {
+	if (src_handle->readonly)
+	{
 		src_handle->aliases++;
 		_starpu_spin_unlock(&src_handle->header_lock);
 		*dst_handle = src_handle;

+ 1 - 1
starpu-1.0-mic.pc.in

@@ -23,7 +23,7 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@
 Requires.private: @GORDON_REQUIRES@

+ 1 - 1
starpu-1.0.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.1.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.2.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.3.pc.in

@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_H_CPPFLAGS@
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 2 - 2
starpufft/src/starpufft-double.h

@@ -25,8 +25,8 @@
 #include <cufft.h>
 #endif
 
-#undef  FLOAT
-#define DOUBLE
+#undef  STARPUFFT_FLOAT
+#define STARPUFFT_DOUBLE
 
 typedef double real;
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 

+ 2 - 2
starpufft/src/starpufft-float.h

@@ -25,8 +25,8 @@
 #include <cufft.h>
 #endif
 
-#undef  DOUBLE
-#define FLOAT
+#undef  STARPUFFT_DOUBLE
+#define STARPUFFT_FLOAT
 
 typedef float real;
 #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) 

+ 2 - 2
starpufft/src/starpufftx.c

@@ -28,7 +28,7 @@
 #define _externC extern
 #include "cudax_kernels.h"
 
-#if defined(FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
+#if defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
 #  define __STARPU_USE_CUDA
 #else
 #  undef __STARPU_USE_CUDA
@@ -172,7 +172,7 @@ compute_roots(STARPUFFT(plan) plan)
 }
 
 /* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
-#ifdef DOUBLE
+#ifdef STARPUFFT_DOUBLE
 static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) {
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 		return 1;

+ 21 - 0
starpupy/Makefile.am

@@ -0,0 +1,21 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu-subdirtests.mk
+
+SUBDIRS  = src
+SUBDIRS += examples
+

+ 43 - 0
starpupy/examples/Makefile.am

@@ -0,0 +1,43 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu.mk
+
+SUBDIRS =
+
+CLEANFILES = *.gcno *.gcda *.linkinfo
+
+TESTS	=
+TESTS	+=	starpu_py.sh
+TESTS	+=	starpu_py_parallel.sh
+
+if STARPU_STARPUPY_NUMPY
+TESTS	+=	starpu_py_np.sh
+endif
+
+EXTRA_DIST	=		\
+	starpu_py_parallel.py	\
+	starpu_py_parallel.sh	\
+	starpu_py.py		\
+	starpu_py.sh		\
+	starpu_py_np.py		\
+	starpu_py_np.sh
+
+python_sourcesdir = $(libdir)/starpu/python
+dist_python_sources_DATA	=	\
+	starpu_py_parallel.py	\
+	starpu_py.py
+

+ 59 - 0
starpupy/examples/execute.sh.in

@@ -0,0 +1,59 @@
+#!@REALBASH@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+exampledir=@STARPU_SRC_DIR@/starpupy/examples
+
+modpath=@STARPU_BUILD_DIR@/src/.libs:
+pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH
+
+valgrind=""
+gdb=""
+if test "$1" == "--valgrind"
+then
+    valgrind=1
+    shift
+fi
+if test "$1" == "--gdb"
+then
+    gdb=1
+    shift
+fi
+
+examplefile=$1
+if test -f $examplefile
+then
+    pythonscript=$examplefile
+elif test -f $exampledir/$examplefile
+then
+    pythonscript=$exampledir/$examplefile
+else
+    echo "Error. Python script $examplefile not found in current directory or in $exampledir"
+    exit 1
+fi
+shift
+
+set -x
+if test "$valgrind" == "1"
+then
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath PYTHONMALLOC=malloc valgrind --track-origins=yes @PYTHON@ $pythonscript $*
+elif test "$gdb" == "1"
+then
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath gdb --args @PYTHON@ $pythonscript $*
+else
+    PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath @PYTHON@ $pythonscript $*
+fi
+

+ 9 - 9
starpupy/tests/starpu_py.py

@@ -73,7 +73,7 @@ def sub(a,b,c):
 ###############################################################################
 
 #using decorator wrap the function with input
-@starpu.delayed
+@starpu.delayed(name="test")
 def add_deco(a,b,c):
 	#time.sleep(1)
 	print ("Example 8:")
@@ -83,7 +83,7 @@ def add_deco(a,b,c):
 ###############################################################################
 
 #using decorator wrap the function with input
-@starpu.delayed
+@starpu.delayed(color=1)
 def sub_deco(x,a):
 	print ("Example 9:")
 	print ("This is a function with input and output wrapped by the decorator function:")
@@ -93,34 +93,34 @@ def sub_deco(x,a):
 
 async def main():
 	#submit function "hello"
-    fut = starpu.task_submit(hello)
+    fut = starpu.task_submit()(hello)
     await fut
 
     #submit function "func1"
-    fut1 = starpu.task_submit(func1)
+    fut1 = starpu.task_submit()(func1)
     await fut1
 
     #apply starpu.delayed(func1_deco())
     await func1_deco()
 
 	#submit function "func2"
-    fut2 = starpu.task_submit(func2)
+    fut2 = starpu.task_submit()(func2)
     res2 = await fut2
 	#print the result of function
     print("This is a function no input and the return value is", res2)
 
     #submit function "multi"
-    fut3 = starpu.task_submit(multi, 2, 3)
+    fut3 = starpu.task_submit()(multi, 2, 3)
     res3 = await fut3
     print("The result of function multi is :", res3)
 
 	#submit function "add"
-    fut4 = starpu.task_submit(add, 1.2, 2.5, 3.6, 4.9)
+    fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9)
     res4 = await fut4
     print("The result of function add is :", res4)
 
 	#submit function "sub"
-    fut5 = starpu.task_submit(sub, 6, 2, 5.9)
+    fut5 = starpu.task_submit()(sub, 6, 2, 5.9)
     res5 = await fut5
     print("The result of function sub is:", res5)
 
@@ -138,4 +138,4 @@ async def main():
 asyncio.run(main())
 
 
-#starpu.task_wait_for_all()
+#starpu.task_wait_for_all()

+ 19 - 0
starpupy/examples/starpu_py.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+$(dirname $0)/execute.sh starpu_py.py $*
+

+ 40 - 0
starpupy/examples/starpu_py_np.py

@@ -0,0 +1,40 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+import starpu
+import asyncio
+import numpy as np
+
+
+###############################################################################
+
+def scal(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a
+	return t
+
+t=np.array([1,2,3,4,5,6,7,8,9,10])
+
+async def main():
+    fut8 = starpu.task_submit()(scal, 2, t)
+    res8 = await fut8
+    print("The result of Example 10 is", res8)
+    print("The return array is", t)
+    #print("The result type is", type(res8))
+
+asyncio.run(main())
+
+
+#starpu.task_wait_for_all()

+ 3 - 7
starpupy/src/starpu/delay.py

@@ -1,3 +1,4 @@
+#!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
@@ -13,11 +14,6 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-from starpu import starpupy
-import asyncio
 
-def delayed(f):
-	def submit(*args,**kwargs):
-		fut = starpupy.task_submit(f, *args,**kwargs)
-		return fut
-	return submit
+$(dirname $0)/execute.sh starpu_py_np.py $*
+

+ 350 - 0
starpupy/examples/starpu_py_parallel.py

@@ -0,0 +1,350 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+import starpu
+import starpu.joblib
+import time
+import asyncio
+from math import sqrt
+from math import log10
+import numpy as np
+import sys
+
+#generate a list to store functions
+g_func=[]
+
+#function no input no output print hello world
+def hello():
+	print ("Example 1: Hello, world!")
+g_func.append(starpu.joblib.delayed(hello)())
+
+#function no input no output
+def func1():
+	print ("Example 2: This is a function no input no output")
+g_func.append(starpu.joblib.delayed(func1)())
+
+#function no input return a value
+def func2():
+	print ("Example 3:")
+	return 12
+g_func.append(starpu.joblib.delayed(func2)())
+
+#function has 2 int inputs and 1 int output
+def exp(a,b):
+	res_exp=a**b
+	print("Example 4: The result of ",a,"^",b,"is",res_exp)
+	return res_exp
+g_func.append(starpu.joblib.delayed(exp)(2, 3))
+
+#function has 4 float inputs and 1 float output
+def add(a,b,c,d):
+	res_add=a+b+c+d
+	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
+	return res_add
+g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
+
+#function has 2 int inputs 1 float input and 1 float output 1 int output
+def sub(a,b,c):
+	res_sub1=a-b-c
+	res_sub2=a-b
+	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
+	return res_sub1, res_sub2
+g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
+
+##########functions of array calculation###############
+
+def scal(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a
+	return t
+
+def add_scal(a, t1, t2):
+	for i in range(len(t1)):
+		t1[i]=t1[i]*a+t2[i]
+	return t1
+
+def scal_arr(a, t):
+	for i in range(len(t)):
+		t[i]=t[i]*a[i]
+	return t
+
+def multi(a,b):
+	res_multi=a*b
+	return res_multi
+
+def multi_2arr(a, b):
+        for i in range(len(a)):
+                a[i]=a[i]*b[i]
+        return a
+
+def multi_list(l):
+	res = []
+	for (a,b) in l:
+		res.append(a*b)
+	return res
+
+def log10_arr(t):
+	for i in range(len(t)):
+		t[i]=log10(t[i])
+	return t
+########################################################
+
+#################scikit test###################
+# DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__
+# class MyBackend(DEFAULT_JOBLIB_BACKEND):  # type: ignore
+#         def __init__(self, *args, **kwargs):
+#                 self.count = 0
+#                 super().__init__(*args, **kwargs)
+
+#         def start_call(self):
+#                 self.count += 1
+#                 return super().start_call()
+
+# starpu.joblib.register_parallel_backend('testing', MyBackend)
+
+# with starpu.joblib.parallel_backend("testing") as (ba, n_jobs):
+# 	print("backend and n_jobs is", ba, n_jobs)
+###############################################
+
+N=100
+# A=np.arange(N)
+# B=np.arange(N)
+# a=np.arange(N)
+# b=np.arange(N, 2*N, 1)
+
+displayPlot=False
+listX=[10, 100, 1000, 10000]
+for arg in sys.argv[1:]:
+        if arg == "-long":
+                listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000]
+        if arg == "-plot":
+                displayPlot=True
+
+for x in listX:
+	for X in range(x, x*10, x):
+		print("X=",X)
+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X))
+		A=np.arange(1,X+1,1)
+		starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A))
+
+print("************************")
+print("parallel Normal version:")
+print("************************")
+print("--(sqrt)(i**2)for i in range(N)")
+start_exec1=time.time()
+start_cpu1=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
+end_exec1=time.time()
+end_cpu1=time.process_time()
+print("the program execution time is", end_exec1-start_exec1)
+print("the cpu execution time is", end_cpu1-start_cpu1)
+
+print("--(multi)(i,j) for i,j in zip(a,b)")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec2=time.time()
+start_cpu2=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
+end_exec2=time.time()
+end_cpu2=time.process_time()
+print("the program execution time is", end_exec2-start_exec2)
+print("the cpu execution time is", end_cpu2-start_cpu2)
+
+print("--(scal_arr)((i for i in b), A)")
+A=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec3=time.time()
+start_cpu3=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
+end_exec3=time.time()
+end_cpu3=time.process_time()
+print("the program execution time is", end_exec3-start_exec3)
+print("the cpu execution time is", end_cpu3-start_cpu3)
+
+print("--(multi_list)((i,j) for i,j in zip(a,b))")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec4=time.time()
+start_cpu4=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
+end_exec4=time.time()
+end_cpu4=time.process_time()
+print("the program execution time is", end_exec4-start_exec4)
+print("the cpu execution time is", end_cpu4-start_cpu4)
+
+print("--(multi_2arr)((i for i in a), (j for j in b))")
+a=np.arange(N)
+b=np.arange(N, 2*N, 1)
+start_exec5=time.time()
+start_cpu5=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
+end_exec5=time.time()
+end_cpu5=time.process_time()
+print("the program execution time is", end_exec5-start_exec5)
+print("the cpu execution time is", end_cpu5-start_cpu5)
+
+print("--(multi_2arr)(A, B)")
+# A=np.arange(N)
+# B=np.arange(N, 2*N, 1)
+n, m = 4, 5
+A = np.arange(n*m).reshape(n, m)
+B = np.arange(n*m, 2*n*m, 1).reshape(n, m)
+print("The input arrays are A", A, "B", B)
+start_exec6=time.time()
+start_cpu6=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(A, B))
+end_exec6=time.time()
+end_cpu6=time.process_time()
+print("the program execution time is", end_exec6-start_exec6)
+print("the cpu execution time is", end_cpu6-start_cpu6)
+print("The return arrays are A", A, "B", B)
+
+print("--(scal)(2, t=(j for j in a))")
+a=np.arange(N)
+start_exec7=time.time()
+start_cpu7=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a)))
+end_exec7=time.time()
+end_cpu7=time.process_time()
+print("the program execution time is", end_exec7-start_exec7)
+print("the cpu execution time is", end_cpu7-start_cpu7)
+
+print("--(scal)(2,A)")
+A=np.arange(N)
+print("The input array is", A)
+start_exec8=time.time()
+start_cpu8=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,A))
+end_exec8=time.time()
+end_cpu8=time.process_time()
+print("the program execution time is", end_exec8-start_exec8)
+print("the cpu execution time is", end_cpu8-start_cpu8)
+print("The return array is", A)
+
+print("--(add_scal)(t1=A,t2=B,a=2)")
+A=np.arange(N)
+B=np.arange(N)
+print("The input arrays are A", A, "B", B)
+start_exec9=time.time()
+start_cpu9=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2))
+end_exec9=time.time()
+end_cpu9=time.process_time()
+print("the program execution time is", end_exec9-start_exec9)
+print("the cpu execution time is", end_cpu9-start_cpu9)
+print("The return arrays are A", A, "B", B)
+
+
+print("--input is iterable function list")
+start_exec10=time.time()
+start_cpu10=time.process_time()
+starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func)
+end_exec10=time.time()
+end_cpu10=time.process_time()
+print("the program execution time is", end_exec10-start_exec10)
+print("the cpu execution time is", end_cpu10-start_cpu10)
+
+# def producer():
+# 	for i in range(6):
+# 		print('Produced %s' % i)
+# 		yield i
+#starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer())
+
+print("************************")
+print("parallel Future version:")
+print("************************")
+async def main():
+
+	print("--(sqrt)(i**2)for i in range(N)")
+	fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
+	res1=await fut1
+	#print(res1)
+
+	print("--(multi)(i,j) for i,j in zip(a,b)")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b))
+	res2=await fut2
+	#print(res2)
+
+	print("--(scal_arr)((i for i in b), A)")
+	A=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A))
+	res3=await fut3
+	#print(res3)
+
+	print("--(multi_list)((i,j) for i,j in zip(a,b))")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b)))
+	res4=await fut4
+	#print(res4)
+
+	print("--(multi_2arr)((i for i in a), (j for j in b))")
+	a=np.arange(N)
+	b=np.arange(N, 2*N, 1)
+	fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b)))
+	res5=await fut5
+	#print(res5)
+
+	print("--(multi_2arr)(b=B, a=A)")
+	A=np.arange(N)
+	B=np.arange(N, 2*N, 1)
+	print("The input arrays are A", A, "B", B)
+	fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)(b=B, a=A))
+	res6=await fut6
+	print("The return arrays are A", A, "B", B)
+
+
+	print("--(scal)(2, (j for j in a))")
+	a=np.arange(N)
+	fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a)))
+	res7=await fut7
+	#print(res6)
+
+	print("--(scal)(2,t=A)")
+	A=np.arange(N)
+	print("The input array is", A)
+	fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2,t=A))
+	res8=await fut8
+	print("The return array is", A)
+
+	print("--(scal)(2,A,B)")
+	A=np.arange(N)
+	B=np.arange(N)
+	print("The input arrays are A", A, "B", B)
+	fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B))
+	res9=await fut9
+	print("The return arrays are A", A, "B", B)
+
+	print("--input is iterable function list")
+	fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1)(g_func)
+	res10=await fut10
+	#print(res9)
+
+asyncio.run(main())
+
+starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="scal",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="func",view=displayPlot)
+
+starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot)
+starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot)

+ 19 - 0
starpupy/examples/starpu_py_parallel.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+STARPU_CALIBRATE=1 $(dirname $0)/execute.sh starpu_py_parallel.py $*
+

+ 63 - 0
starpupy/src/Makefile.am

@@ -0,0 +1,63 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+include $(top_srcdir)/starpu-notests.mk
+
+SUBDIRS =
+
+PYTHON_PY_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*py)
+PYTHON_PY_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC)))
+
+PYTHON_C_SRC	=	$(wildcard $(top_srcdir)/starpupy/src/*c)
+PYTHON_C_BUILD	=	$(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC)))
+
+$(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py
+	$(MKDIR_P) starpu
+	$(V_ln) $(LN_S) $< $@
+$(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c
+	@$(MKDIR_P) starpu
+	$(V_ln) $(LN_S) $< $@
+
+all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD)
+	$(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS)
+
+install-exec-local:
+	@if test -d $(prefix)/lib/python* ; \
+	then	\
+		chmod u+w $(prefix)/lib/python* ; \
+		chmod u+w $(prefix)/lib/python*/site-packages ; \
+	fi
+	$(PYTHON) setup.py install
+
+if STARPU_BUILD_STARPUPY
+clean-local:
+	$(PYTHON) setup.py clean
+	rm -f starpu/*py starpu/*c
+endif
+
+distclean-local:
+	rm -rf build
+
+uninstall-local:
+	rm -rf $(prefix)/lib/python*/site-packages/starpu*
+	rm -rf $(prefix)/lib/python*/site-packages/tmp/starpu*
+
+EXTRA_DIST	=		\
+	delay.py		\
+	__init__.py	\
+	intermedia.py	\
+	joblib.py	\
+	starpu_task_wrapper.c

+ 2 - 1
starpupy/src/starpu/__init__.py

@@ -17,4 +17,5 @@
 
 from.starpupy import *
 from .delay import *
-from . import joblib
+#from . import joblib
+from .intermedia import *

+ 29 - 0
starpupy/src/delay.py

@@ -0,0 +1,29 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+from starpu import starpupy
+import starpu
+import asyncio
+from functools import partial
+
+def delayed(f=None,*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
+	# add options of task_submit
+	if f is None:
+		return partial(delayed, name=name, synchronous=synchronous, priority=priority, color=color, flops=flops, perfmodel=perfmodel)
+	def submit(*args):
+		fut = starpu.task_submit(name=name, synchronous=synchronous, priority=priority,\
+								 color=color, flops=flops, perfmodel=perfmodel)(f, *args)
+		return fut
+	return submit

+ 63 - 0
starpupy/src/intermedia.py

@@ -0,0 +1,63 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+from starpu import starpupy
+import os
+
+#class perfmodel
+class Perfmodel(object):
+	def __init__(self, symbol):
+		self.symbol=symbol
+		self.pstruct=starpupy.init_perfmodel(self.symbol)
+
+	def get_struct(self):
+		return self.pstruct
+
+	def __del__(self):
+	#def free_struct(self):
+		starpupy.free_perfmodel(self.pstruct)
+
+# generate the dictionary which contains the perfmodel symbol and its struct pointer
+dict_perf={}
+def dict_perf_generator(perfsymbol):
+	if dict_perf.get(perfsymbol)==None:
+		p=Perfmodel(perfsymbol)
+		dict_perf[perfsymbol]=p
+	else:
+		p=dict_perf[perfsymbol]
+	return p
+
+#add options in function task_submit
+def task_submit(*, name=None, synchronous=0, priority=0, color=None, flops=None, perfmodel=None):
+	if perfmodel==None:
+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': None}
+	else:
+		p=dict_perf_generator(perfmodel)
+		dict_option={'name': name, 'synchronous': synchronous, 'priority': priority, 'color': color, 'flops': flops, 'perfmodel': p.get_struct()}
+
+	def call_task_submit(f, *args):
+		fut=starpupy._task_submit(f, *args, dict_option)
+		return fut
+	return call_task_submit
+
+# dump performance model and show the plot
+def perfmodel_plot(perfmodel, view=True):
+	p=dict_perf[perfmodel]
+	starpupy.save_history_based_model(p.get_struct())
+	if view == True:
+		os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
+		os.system('gnuplot starpu_'+perfmodel+'.gp')
+		os.system('gv starpu_'+perfmodel+'.eps')

+ 324 - 0
starpupy/src/joblib.py

@@ -0,0 +1,324 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+import sys
+import types
+import joblib as jl
+from joblib import logger
+from joblib._parallel_backends import ParallelBackendBase
+from starpu import starpupy
+import starpu
+import asyncio
+import math
+import functools
+import numpy as np
+import inspect
+import threading
+
+BACKENDS={
+	#'loky': LokyBackend,
+}
+_backend = threading.local()
+
+# get the number of CPUs controlled by StarPU
+def cpu_count():
+	n_cpus=starpupy.cpu_worker_get_count()
+	return n_cpus
+
+# split a list ls into n_block numbers of sub-lists 
+def partition(ls, n_block):
+	if len(ls)>=n_block:
+		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
+		q1=math.ceil(len(ls)/n_block)
+		q2=math.floor(len(ls)/n_block)
+		n1=len(ls)%n_block
+		#n2=n_block-n1
+		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
+		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
+		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
+
+		L=L1+L2
+	else:
+		# if the block number is larger than the length of list, each element in the list is a sub-list
+		L=[ls[i:i+1] for i in range (len(ls))]
+	return L
+
+# split a two-dimension numpy matrix into n_block numbers of sub-matrices
+def array2d_split(a, n_block):
+	# decompose number of n_jobs to two integers multiply
+	c_tmp=math.floor(math.sqrt(n_block))
+	for i in range (c_tmp,0,-1):
+		if n_block%i==0:
+			c=i
+			r=int(n_block/c)
+			break
+	# split column
+	arr_split_c=np.array_split(a,c,0)
+	arr_split=[]
+	# split row
+	for i in range(c):
+		arr_split_r=np.array_split(arr_split_c[i],r,1)
+		for j in range(r):
+			arr_split.append(arr_split_r[j])
+	return arr_split
+
+
+def future_generator(iterable, n_jobs, dict_task):
+	# iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
+	#print("iterable type is ", type(iterable))
+	#print("iterable is", iterable)
+	# get the number of block
+	if n_jobs<-cpu_count()-1 or n_jobs>cpu_count():
+		raise SystemExit('Error: n_jobs is out of range')
+		#print("Error: n_jobs is out of range, number of CPUs is", cpu_count())
+	elif n_jobs<0:
+		n_block=cpu_count()+1+n_jobs
+	else:
+		n_block=n_jobs
+
+	# if arguments is tuple format
+	if type(iterable) is tuple:
+		# the function is always the first element
+		f=iterable[0]
+		# get the name of formal arguments of f
+		formal_args=inspect.getargspec(f).args
+		# get the arguments list
+		args=[]
+		# argument is arbitrary in iterable[1]
+		args=list(iterable[1])
+		# argument is keyword argument in iterable[2]
+		for i in range(len(formal_args)):
+			for j in iterable[2].keys():
+				if j==formal_args[i]:
+					args.append(iterable[2][j])
+		# check whether all arrays have the same size
+		l_arr=[]
+		# list of Future result
+		L_fut=[]
+		# split the vector
+		args_split=[]
+		for i in range(len(args)):
+			args_split.append([])
+			# if the array is an numpy array
+			if type(args[i]) is np.ndarray:
+				# one-dimension matrix
+				if args[i].ndim==1:
+					# split numpy array
+					args_split[i]=np.array_split(args[i],n_block)
+					# get the length of numpy array
+					l_arr.append(args[i].size)
+				# two-dimension matrix
+				elif args[i].ndim==2:
+					# split numpy 2D array
+					args_split[i]=array2d_split(args[i],n_block)
+			# if the array is a generator
+			elif isinstance(args[i],types.GeneratorType):
+				# split generator
+				args_split[i]=partition(list(args[i]),n_block)
+				# get the length of generator
+				l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i]))))
+		if len(set(l_arr))>1:
+			raise SystemExit('Error: all arrays should have the same size')
+		#print("args list is", args_split)
+		for i in range(n_block):
+			# generate the argument list
+			L_args=[]
+			for j in range(len(args)):
+				if type(args[j]) is np.ndarray or isinstance(args[j],types.GeneratorType):
+					L_args.append(args_split[j][i])
+				else:
+					L_args.append(args[j])
+			#print("L_args is", L_args)
+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
+				                  (f, *L_args)
+			L_fut.append(fut)
+		return L_fut
+
+	# if iterable is a generator or a list of function
+	else:
+		L=list(iterable)
+		#print(L)
+		# generate a list of function according to iterable
+		def lf(ls):
+			L_func=[]
+			for i in range(len(ls)):
+				# the first element is the function
+				f=ls[i][0]
+				# the second element is the args list of a type tuple
+				L_args=list(ls[i][1])
+				# generate a list of function
+				L_func.append(f(*L_args))
+			return L_func
+
+		# generate the split function list
+		L_split=partition(L,n_block)
+		# operation in each split list
+		L_fut=[]
+		for i in range(len(L_split)):
+			fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\
+								   color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'])\
+				                  (lf, L_split[i])
+			L_fut.append(fut)
+		return L_fut
+
+class Parallel(object):
+	def __init__(self, mode="normal", perfmodel=None, end_msg=None,\
+			 name=None, synchronous=0, priority=0, color=None, flops=None,\
+	         n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
+	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
+	         mmap_mode='r', prefer=None, require=None):
+		#active_backend= get_active_backend()
+		# nesting_level = active_backend.nesting_level
+
+		# if backend is None:
+		# 	backend = active_backend
+
+		# else:
+		# 	try:
+		# 		backend_factory = BACKENDS[backend]
+		# 	except KeyError as e:
+		# 		raise ValueError("Invalid backend: %s, expected one of %r"
+  #                                % (backend, sorted(BACKENDS.keys()))) from e
+		# 	backend = backend_factory(nesting_level=nesting_level)
+
+		if n_jobs is None:
+			n_jobs = 1
+
+		self.mode=mode
+		self.perfmodel=perfmodel
+		self.end_msg=end_msg
+		self.name=name
+		self.synchronous=synchronous
+		self.priority=priority
+		self.color=color
+		self.flops=flops
+		self.n_jobs=n_jobs
+		self._backend=backend
+
+	def print_progress(self):
+		#pass
+		print("", starpupy.task_nsubmitted())
+
+	def __call__(self,iterable):
+		#generate the dictionary of task_submit
+		dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel}
+		if hasattr(self._backend, 'start_call'):
+			self._backend.start_call()
+		# the mode normal, user can call the function directly without using async
+		if self.mode=="normal":
+			async def asy_main():
+				L_fut=future_generator(iterable, self.n_jobs, dict_task)
+				res=[]
+				for i in range(len(L_fut)):
+					L_res=await L_fut[i]
+					res.extend(L_res)
+				#print(res)
+				#print("type of result is", type(res))
+				return res
+			#asyncio.run(asy_main())
+			#retVal=asy_main
+			loop = asyncio.get_event_loop()
+			results = loop.run_until_complete(asy_main())
+			retVal = results
+		# the mode future, user needs to use asyncio module and await the Future result in main function
+		elif self.mode=="future":
+			L_fut=future_generator(iterable, self.n_jobs, dict_task)
+			fut=asyncio.gather(*L_fut)
+			if self.end_msg!=None:
+				fut.add_done_callback(functools.partial(print, self.end_msg))
+			retVal=fut
+		if hasattr(self._backend, 'stop_call'):
+			self._backend.stop_call()
+		return retVal
+
+def delayed(function):
+	def delayed_function(*args, **kwargs):
+		return function, args, kwargs
+	return delayed_function
+
+
+######################################################################
+__version__ = jl.__version__
+
+class Memory(jl.Memory):
+	def __init__(self,location=None, backend='local', cachedir=None,
+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
+                 backend_options=None):
+		super(Memory, self).__init__(location=None, backend='local', cachedir=None,
+                 mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
+                 backend_options=None)
+
+
+def dump(value, filename, compress=0, protocol=None, cache_size=None):
+	return jl.dump(value, filename, compress, protocol, cache_size)
+
+def load(filename, mmap_mode=None):
+	return jl.load(filename, mmap_mode)
+
+def hash(obj, hash_name='md5', coerce_mmap=False):
+	return jl.hash(obj, hash_name, coerce_mmap)
+
+def register_compressor(compressor_name, compressor, force=False):
+	return jl.register_compressor(compressor_name, compressor, force)
+
+def effective_n_jobs(n_jobs=-1):
+	return cpu_count()
+
+def get_active_backend():
+	backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
+	if backend_and_jobs is not None:
+		backend,n_jobs=backend_and_jobs
+		return backend
+	backend = BACKENDS[loky](nesting_level=0)
+	return backend
+
+class parallel_backend(object):
+	def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None,
+                 **backend_params):
+		if isinstance(backend, str):
+			backend = BACKENDS[backend](**backend_params)
+
+		current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
+		if backend.nesting_level is None:
+			if current_backend_and_jobs is None:
+				nesting_level = 0
+			else:
+				nesting_level = current_backend_and_jobs[0].nesting_level
+
+			backend.nesting_level = nesting_level
+
+		# Save the backends info and set the active backend
+		self.old_backend_and_jobs = current_backend_and_jobs
+		self.new_backend_and_jobs = (backend, n_jobs)
+
+		_backend.backend_and_jobs = (backend, n_jobs)
+
+	def __enter__(self):
+		return self.new_backend_and_jobs
+
+	def __exit__(self, type, value, traceback):
+		self.unregister()
+
+	def unregister(self):
+		if self.old_backend_and_jobs is None:
+			if getattr(_backend, 'backend_and_jobs', None) is not None:
+				del _backend.backend_and_jobs
+		else:
+			_backend.backend_and_jobs = self.old_backend_and_jobs
+
+def register_parallel_backend(name, factory):
+	BACKENDS[name] = factory

+ 23 - 0
starpupy/src/setup.cfg.in

@@ -0,0 +1,23 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+[build]
+build-platlib=build
+build-temp=build/tmp
+
+[install]
+prefix=@prefix@
+
+

+ 40 - 0
starpupy/src/setup.py.in

@@ -0,0 +1,40 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Universit'e de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+from distutils.core import setup, Extension
+
+numpy_dir = '@PYTHON_NUMPY_DIR@'
+if numpy_dir != '':
+    numpy_include_dir = [numpy_dir]
+else:
+    numpy_include_dir = []
+starpupy = Extension('starpu.starpupy',
+                     include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include'] + numpy_include_dir,
+                     libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@'],
+                     library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'],
+	             sources = ['starpu/starpu_task_wrapper.c'])
+
+setup(
+    name = 'starpupy',
+    version = '0.5',
+    description = 'Python bindings for StarPU',
+    author = 'StarPU team',
+    author_email = 'starpu-devel@lists.gforge.inria.fr',
+    url = 'https://starpu.gitlabpages.inria.fr/',
+    license = 'GPL',
+    platforms = 'posix',
+    ext_modules = [starpupy],
+    packages = ['starpu'],
+    )

+ 0 - 13
starpupy/src/starpu/Makefile

@@ -1,13 +0,0 @@
-PYTHON ?= python3
-
-CPPFLAGS = $(shell $(PYTHON)-config --includes) -Wall -O2 -g
-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
-LDLIBS += $(shell pkg-config --libs starpu-1.3)
-
-all: starpupy.so
-
-starpupy.so: starpu_task_wrapper.c Makefile
-	$(CC) -fPIC $(CFLAGS) $< -o $@ -shared  $(CPPFLAGS) $(LDLIBS)
-
-clean:
-	rm -f starpupy.so

+ 0 - 147
starpupy/src/starpu/joblib.py

@@ -1,147 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-from starpu import starpupy
-import asyncio
-import math
-import os
-import pickle
-import json
-import functools
-
-# get the number of CPUs controlled by StarPU
-n_cpus=starpupy.cpu_worker_get_count()
-
-#class perfmodel
-class Perfmodel(object):
-	def __init__(self, symbol):
-		self.symbol=symbol
-		self.pstruct=starpupy.init_perfmodel(self.symbol)
-
-	def get_struct(self):
-		return self.pstruct
-
-	def __del__(self):
-	#def free_struct(self):
-		starpupy.free_perfmodel(self.pstruct)
-
-# split a list ls into n_block numbers of sub-lists 
-def partition(ls, n_block):
-	if len(ls)>=n_block:
-		# there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
-		q1=math.ceil(len(ls)/n_block)
-		q2=math.floor(len(ls)/n_block)
-		n1=len(ls)%n_block
-		#n2=n_block-n1
-		# generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
-		L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
-		L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
-
-		L=L1+L2
-	else:
-		# if the block number is larger than the length of list, each element in the list is a sub-list
-		L=[ls[i:i+1] for i in range (len(ls))]
-	return L
-
-# generate the dictionary which contains the perfmodel symbol and its struct pointer
-dict_perf={}
-def dict_perf_generator(perfsymbol):
-	if dict_perf.get(perfsymbol)==None:
-		p=Perfmodel(perfsymbol)
-		dict_perf[perfsymbol]=p
-	else:
-		p=dict_perf[perfsymbol]
-	return p
-
-def future_generator(g, n_jobs, perfsymbol):
-	# g is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
-	L=list(g)
-	# generate a list of function according to g
-	def lf(ls):
-		L_func=[]
-		for i in range(len(ls)):
-			# the first element is the function
-			f=ls[i][0]
-			# the second element is the args list of a type tuple
-			L_args=list(ls[i][1])
-			# generate a list of function
-			L_func.append(f(*L_args))
-		return L_func
-	# get the number of block
-	if n_jobs<-n_cpus-1 or n_jobs>n_cpus:
-		print("Error: n_jobs is out of range, number of CPUs is", n_cpus)
-	elif n_jobs<0:
-		n_block=n_cpus+1+n_jobs
-	else:
-		n_block=n_jobs
-	# generate the split function list
-	L_split=partition(L,n_block)
-	# operation in each split list
-	L_fut=[]
-	for i in range(len(L_split)):
-		if perfsymbol==None:
-			fut=starpupy.task_submit(lf, L_split[i])
-			L_fut.append(fut)
-		else:
-			p=dict_perf_generator(perfsymbol)
-			fut=starpupy.task_submit(lf, L_split[i], p.get_struct())
-			L_fut.append(fut)
-	return L_fut
-
-def parallel(*, mode="normal", n_jobs=1, perfmodel=None, end_msg=None,\
-	         backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
-	         batch_size='auto', temp_folder=None, max_nbytes='1M',\
-	         mmap_mode='r', prefer=None, require=None):
-	# the mode normal, user can call the function directly without using async
-	if mode=="normal":
-		def parallel_normal(g):
-			async def asy_main():
-				L_fut=future_generator(g, n_jobs, perfmodel)
-				res=[]
-				for i in range(len(L_fut)):
-					L_res=await L_fut[i]
-					res.extend(L_res)
-				#print(res)
-				return res
-			asyncio.run(asy_main())
-			return asy_main
-		return parallel_normal
-	# the mode future, user needs to use asyncio module and await the Future result in main function
-	elif mode=="future":
-		def parallel_future(g):
-			L_fut=future_generator(g, n_jobs, perfmodel)
-			fut=asyncio.gather(*L_fut)
-			if end_msg==None:
-				return fut
-			else:
-				fut.add_done_callback(functools.partial(print, end_msg))
-				return fut
-			#return fut
-		return parallel_future
-
-def delayed(f):
-	def delayed_func(*args):
-		return f, args
-	return delayed_func
-
-
-######################################################################
-# dump performance model
-def perfmodel_plot(perfmodel):
-	p=dict_perf[perfmodel]
-	starpupy.save_history_based_model(p.get_struct())
-	os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
-	os.system('gnuplot starpu_'+perfmodel+'.gp')
-	os.system('gv starpu_'+perfmodel+'.eps')

+ 0 - 416
starpupy/src/starpu/starpu_task_wrapper.c

@@ -1,416 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <starpu.h>
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-/*macro*/
-#if defined(Py_DEBUG) || defined(DEBUG)
-extern void _Py_CountReferences(FILE*);
-#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
-#else
-#define CURIOUS(x)
-#endif
-#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
-#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
-#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
-#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
-/*******/
-
-/*********************Functions passed in task_submit wrapper***********************/
-
-static PyObject* asyncio_module; /*python asyncio library*/
-
-/*structure contains parameters which are passed to starpu_task.cl_arg*/
-struct codelet_struct { 
-    PyObject* f; /*the python function passed in*/
-    PyObject* argList; /*argument list of python function passed in*/
-    PyObject* rv; /*return value when using PyObject_CallObject call the function f*/
-    PyObject* fut; /*asyncio.Future*/
-    PyObject* lp; /*asyncio.Eventloop*/
-};
-typedef struct codelet_struct codelet_st;
-
-/*function passed to starpu_codelet.cpu_func*/
-void codelet_func(void *buffers[], void *cl_arg){
-
-    codelet_st* cst = (codelet_st*) cl_arg;
-
-    /*make sure we own the GIL*/
-    PyGILState_STATE state = PyGILState_Ensure();
-
-    /*verify that the function is a proper callable*/
-    if (!PyCallable_Check(cst->f)) {
-
-        printf("py_callback: expected a callable function\n"); 
-        exit(1);
-    }
-    
-    /*check the arguments of python function passed in*/
-    for (int i=0; i < PyTuple_Size(cst->argList); i++){
-      PyObject* obj=PyTuple_GetItem(cst->argList, i);
-      const char* tp = Py_TYPE(obj)->tp_name;
-      if(strcmp(tp, "_asyncio.Future") == 0){
-        /*if one of arguments is Future, get its result*/
-        PyObject * fut_result = PyObject_CallMethod(obj, "result", NULL);
-        /*replace the Future argument to its result*/
-        PyTuple_SetItem(cst->argList, i, fut_result);
-      }
-    }
-
-    /*call the python function*/
-    PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
-    cst->rv=pRetVal;
-
-    //Py_DECREF(cst->f);
-
-    /*restore previous GIL state*/
-    PyGILState_Release(state);
-
-}
-
-/*function passed to starpu_task.callback_func*/
-void cb_func(void *v){
-
-	struct starpu_task *task=starpu_task_get_current();
-    codelet_st* cst = (codelet_st*) task->cl_arg;
-
-    /*make sure we own the GIL*/
-    PyGILState_STATE state = PyGILState_Ensure();
-
-    /*set the Future result and mark the Future as done*/
-    PyObject * set_result = PyObject_GetAttrString(cst->fut, "set_result");
-    PyObject * loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
-
-    Py_DECREF(loop_callback);
-    Py_DECREF(set_result);
-    Py_DECREF(cst->rv);
-    Py_DECREF(cst->fut);
-    Py_DECREF(cst->lp);
-
-    //Py_DECREF(perfmodel);
-    struct starpu_codelet * func_cl=(struct starpu_codelet *) task->cl;
-    if (func_cl->model != NULL){
-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
-      PyObject* perfmodel=PyCapsule_New(perf, "Perf", 0);
-      Py_DECREF(perfmodel);
-    }
-
-    for(int i = 0; i < PyTuple_Size(cst->argList); i++){
-        Py_DECREF(PyTuple_GetItem(cst->argList, i));
-    }
-    Py_DECREF(cst->argList);
-
-    /*restore previous GIL state*/
-    PyGILState_Release(state);
-
-    /*deallocate task*/
-    free(task->cl);
-	  free(task->cl_arg);
-
-}
-
-/***********************************************************************************/
-/*PyObject*->struct starpu_task**/
-static struct starpu_task *PyTask_AsTask(PyObject* obj){
-  return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
-}
-
-/* destructor function for task */
-static void del_Task(PyObject *obj) {
-  struct starpu_task* obj_task=PyTask_AsTask(obj);
-  obj_task->destroy=1; /*XXX we should call starpu task destroy*/
-}
-
-/*struct starpu_task*->PyObject**/
-static PyObject *PyTask_FromTask(struct starpu_task *task) {
-  return PyCapsule_New(task, "Task", del_Task);
-}
-
-/***********************************************************************************/
-static size_t sizebase (struct starpu_task * task, unsigned nimpl){
-
-  codelet_st* cst = (codelet_st*) task->cl_arg;
-
-  PyObject* obj=PyTuple_GetItem(cst->argList, 0);
-  /*get the length of arguments*/
-  int n = PyList_Size(obj);
-
-  return n;
-}
-
-static void del_Perf(PyObject *obj){
-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
-  free(perf);
-}
-/*initialization of perfmodel*/
-static PyObject* init_perfmodel(PyObject *self, PyObject *args){
-
-  char* sym;
-
-  if (!PyArg_ParseTuple(args, "s", &sym))
-    return NULL;
-
-  /*allocate a perfmodel structure*/
-  struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
-
-  /*get the perfmodel symbol*/
-  char* p =strdup(sym);
-  perf->symbol=p;
-  perf->type=STARPU_HISTORY_BASED;
-
-  /*struct perfmodel*->PyObject**/
-  PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
-
-  return perfmodel;
-}
-
-
-/*free perfmodel*/
-static PyObject* free_perfmodel(PyObject *self, PyObject *args){
-
-  PyObject* perfmodel;
-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
-    return NULL;
-
-  /*PyObject*->struct perfmodel**/
-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-
-  starpu_save_history_based_model(perf);
-  //starpu_perfmodel_unload_model(perf);
-  free(perf->symbol);
-  starpu_perfmodel_deinit(perf);
-  free(perf);
-
-  /*return type is void*/
-  Py_INCREF(Py_None);
-  return Py_None;
-}
-
-static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args){
-
-  PyObject* perfmodel;
-  if (!PyArg_ParseTuple(args, "O", &perfmodel))
-    return NULL;
-
-  /*PyObject*->struct perfmodel**/
-  struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-
-  starpu_save_history_based_model(perf);
-
-  /*return type is void*/
-  Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*****************************Wrappers of StarPU methods****************************/
-/*wrapper submit method*/
-static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args){
-
-    /*get the running Event loop*/
-    PyObject* loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
-    /*create a asyncio.Future object*/
-    PyObject* fut = PyObject_CallMethod(loop, "create_future", NULL);
-
-    /*first argument in args is always the python function passed in*/
-    PyObject* func_py = PyTuple_GetItem(args, 0);
-    Py_INCREF(func_py);
-
-	  /*allocate a task structure and initialize it with default values*/
-    struct starpu_task *task=starpu_task_create();
-    task->destroy=0;
-
-    PyObject* PyTask=PyTask_FromTask(task);
-
-    /*set one of fut attribute to the task pointer*/
-    PyObject_SetAttrString(fut, "starpu_task", PyTask);
-    /*check the arguments of python function passed in*/
-    for (int i=1; i < PyTuple_Size(args); i++){
-      PyObject* obj=PyTuple_GetItem(args, i);
-      const char* tp = Py_TYPE(obj)->tp_name;
-      if(strcmp(tp, "_asyncio.Future") == 0){
-        /*if one of arguments is Future, get its corresponding task*/
-        PyObject* fut_task=PyObject_GetAttrString(obj, "starpu_task");
-        /*declare task dependencies between the current task and the corresponding task of Future argument*/
-        starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
-
-        Py_DECREF(fut_task);
-      }
-    }
-    
-    /*allocate a codelet structure*/
-    struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
-    /*initialize func_cl with default values*/
-    starpu_codelet_init(func_cl);
-    func_cl->cpu_func=&codelet_func;
-    
-    /*check whether the last argument in args is the perfmodel*/
-    PyObject* perfmodel=PyTuple_GetItem(args, PyTuple_Size(args)-1);
-    const char* tp_perf = Py_TYPE(perfmodel)->tp_name;
-    if (strcmp(tp_perf, "PyCapsule")==0){
-      /*PyObject*->struct perfmodel**/
-      struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
-      func_cl->model=perf;
-      Py_INCREF(perfmodel);
-    }
-    
-
-    /*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
-    codelet_st *cst = (codelet_st*)malloc(sizeof(codelet_st));
-    cst->f = func_py;
-    cst->fut = fut;
-    cst->lp = loop;
-    
-    Py_INCREF(fut);
-    Py_INCREF(loop);
-
-    /*pass args in argList*/
-    if (PyTuple_Size(args)==1 || (PyTuple_Size(args)==2 && strcmp(tp_perf, "PyCapsule")==0))/*function no arguments*/
-      cst->argList = PyTuple_New(0);
-    else if(PyTuple_Size(args)>2 && strcmp(tp_perf, "PyCapsule")==0){/*function has arguments and the last argument in args is the perfmodel*/
-      cst->argList = PyTuple_New(PyTuple_Size(args)-2);
-      for (int i=0; i < PyTuple_Size(args)-2; i++){
-        PyObject* tmp=PyTuple_GetItem(args, i+1);
-        PyTuple_SetItem(cst->argList, i, tmp);
-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
-      }
-    }
-    else{/*function has arguments and no perfmodel*/
-      cst->argList = PyTuple_New(PyTuple_Size(args)-1);
-      for (int i=0; i < PyTuple_Size(args)-1; i++){
-        PyObject* tmp=PyTuple_GetItem(args, i+1);
-        PyTuple_SetItem(cst->argList, i, tmp);
-        Py_INCREF(PyTuple_GetItem(cst->argList, i));
-      }
-    }
-
-    task->cl=func_cl;
-    task->cl_arg=cst;
-    /*call starpu_task_submit method*/
-    starpu_task_submit(task);
-    task->callback_func=&cb_func;
-    if (strcmp(tp_perf, "PyCapsule")==0){
-      struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
-      perf->size_base=&sizebase;
-    }
-
-    //printf("the number of reference is %ld\n", Py_REFCNT(func_py));
-    //_Py_PrintReferences(stderr);
-    //COUNTREFS();
-    return fut;
-
-}
-
-/*wrapper wait for all method*/
-static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_task_wait_for_all method*/
-	Py_BEGIN_ALLOW_THREADS
-	starpu_task_wait_for_all();
-	Py_END_ALLOW_THREADS
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper pause method*/
-static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_pause method*/
-	starpu_pause();
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper resume method*/
-static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args){
-
-	/*call starpu_resume method*/
-	starpu_resume();
-
-	/*return type is void*/
-	Py_INCREF(Py_None);
-  return Py_None;
-}
-
-/*wrapper get count cpu method*/
-static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args){
-
-  /*call starpu_cpu_worker_get_count method*/
-  int num_cpu=starpu_cpu_worker_get_count();
-
-  /*return type is unsigned*/
-  return Py_BuildValue("I", num_cpu);
-}
-
-/***********************************************************************************/
-
-/***************The module’s method table and initialization function**************/
-/*method table*/
-static PyMethodDef starpupyMethods[] = 
-{ 
-  {"task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
-  {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
-  {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
-  {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
-  {"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
-  {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
-  {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
-  {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
-  {NULL, NULL}
-};
-
-/*deallocation function*/
-static void starpupyFree(void* self){
-	starpu_shutdown();
-  Py_DECREF(asyncio_module);
-  //COUNTREFS();
-}
-
-/*module definition structure*/
-static struct PyModuleDef starpupymodule={
-  PyModuleDef_HEAD_INIT,
-  "starpupy", /*name of module*/
-  NULL,
-  -1,
-  starpupyMethods, /*method table*/
-  NULL,
-  NULL,
-  NULL,
-  starpupyFree /*deallocation function*/
-};
-
-/*initialization function*/
-PyMODINIT_FUNC
-PyInit_starpupy(void)
-{
-    PyEval_InitThreads();
-    /*starpu initialization*/
-	  starpu_init(NULL);
-    /*python asysncio import*/
-    asyncio_module = PyImport_ImportModule("asyncio");
-    /*module import initialization*/
-    return PyModule_Create(&starpupymodule);
-}
-/***********************************************************************************/

+ 536 - 0
starpupy/src/starpu_task_wrapper.c

@@ -0,0 +1,536 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#undef NDEBUG
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <starpu.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+#include <numpy/arrayobject.h>
+#endif
+
+/*macro*/
+#if defined(Py_DEBUG) || defined(DEBUG)
+extern void _Py_CountReferences(FILE*);
+#define CURIOUS(x) { fprintf(stderr, __FILE__ ":%d ", __LINE__); x; }
+#else
+#define CURIOUS(x)
+#endif
+#define MARKER()        CURIOUS(fprintf(stderr, "\n"))
+#define DESCRIBE(x)     CURIOUS(fprintf(stderr, "  " #x "=%d\n", x))
+#define DESCRIBE_HEX(x) CURIOUS(fprintf(stderr, "  " #x "=%08x\n", x))
+#define COUNTREFS()     CURIOUS(_Py_CountReferences(stderr))
+/*******/
+
+/*********************Functions passed in task_submit wrapper***********************/
+
+static PyObject *asyncio_module; /*python asyncio library*/
+
+/*structure contains parameters which are passed to starpu_task.cl_arg*/
+struct codelet_args
+{
+	PyObject *f; /*the python function passed in*/
+	PyObject *argList; /*argument list of python function passed in*/
+	PyObject *rv; /*return value when using PyObject_CallObject call the function f*/
+	PyObject *fut; /*asyncio.Future*/
+	PyObject *lp; /*asyncio.Eventloop*/
+};
+
+/*function passed to starpu_codelet.cpu_func*/
+void codelet_func(void *buffers[], void *cl_arg)
+{
+	struct codelet_args *cst = (struct codelet_args*) cl_arg;
+
+	/*make sure we own the GIL*/
+	PyGILState_STATE state = PyGILState_Ensure();
+
+	/*verify that the function is a proper callable*/
+	if (!PyCallable_Check(cst->f))
+	{
+		printf("py_callback: expected a callable function\n");
+		exit(1);
+	}
+
+	/*check the arguments of python function passed in*/
+	int i;
+	for(i=0; i < PyTuple_Size(cst->argList); i++)
+	{
+		PyObject *obj = PyTuple_GetItem(cst->argList, i);
+		const char *tp = Py_TYPE(obj)->tp_name;
+		if(strcmp(tp, "_asyncio.Future") == 0)
+		{
+			/*if one of arguments is Future, get its result*/
+			PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL);
+			/*replace the Future argument to its result*/
+			PyTuple_SetItem(cst->argList, i, fut_result);
+		}
+		/*else if (strcmp(tp, "numpy.ndarray")==0)
+		  {
+		  printf("array is %p\n", obj);
+		  }*/
+	}
+
+	/*call the python function*/
+	PyObject *pRetVal = PyObject_CallObject(cst->f, cst->argList);
+	//const char *tp = Py_TYPE(pRetVal)->tp_name;
+	//printf("return value type is %s\n", tp);
+	cst->rv = pRetVal;
+
+	//Py_DECREF(cst->f);
+
+	/*restore previous GIL state*/
+	PyGILState_Release(state);
+}
+
+/*function passed to starpu_task.callback_func*/
+void cb_func(void *v)
+{
+	struct starpu_task *task = starpu_task_get_current();
+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
+
+	/*make sure we own the GIL*/
+	PyGILState_STATE state = PyGILState_Ensure();
+
+	/*set the Future result and mark the Future as done*/
+	PyObject *set_result = PyObject_GetAttrString(cst->fut, "set_result");
+	PyObject *loop_callback = PyObject_CallMethod(cst->lp, "call_soon_threadsafe", "(O,O)", set_result, cst->rv);
+
+	Py_DECREF(loop_callback);
+	Py_DECREF(set_result);
+	Py_DECREF(cst->rv);
+	Py_DECREF(cst->fut);
+	Py_DECREF(cst->lp);
+	Py_DECREF(cst->argList);
+
+	//Py_DECREF(perfmodel);
+	struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl;
+	if (func_cl->model != NULL)
+	{
+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
+		PyObject *perfmodel=PyCapsule_New(perf, "Perf", 0);
+		Py_DECREF(perfmodel);
+	}
+
+	/*restore previous GIL state*/
+	PyGILState_Release(state);
+
+	/*deallocate task*/
+	free(task->cl);
+	free(task->cl_arg);
+}
+
+/***********************************************************************************/
+/*PyObject*->struct starpu_task**/
+static struct starpu_task *PyTask_AsTask(PyObject *obj)
+{
+	return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task");
+}
+
+/* destructor function for task */
+static void del_Task(PyObject *obj)
+{
+	struct starpu_task *obj_task=PyTask_AsTask(obj);
+	obj_task->destroy=1; /*XXX we should call starpu task destroy*/
+}
+
+/*struct starpu_task*->PyObject**/
+static PyObject *PyTask_FromTask(struct starpu_task *task)
+{
+	return PyCapsule_New(task, "Task", del_Task);
+}
+
+/***********************************************************************************/
+static size_t sizebase (struct starpu_task *task, unsigned nimpl)
+{
+	int n=0;
+	struct codelet_args *cst = (struct codelet_args*) task->cl_arg;
+
+	/*get the result of function*/
+	PyObject *obj=cst->rv;
+	/*get the length of result*/
+	const char *tp = Py_TYPE(obj)->tp_name;
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+	/*if the result is a numpy array*/
+	if (strcmp(tp, "numpy.ndarray")==0)
+		n = PyArray_SIZE(obj);
+	else
+#endif
+	/*if the result is a list*/
+	if (strcmp(tp, "list")==0)
+		n = PyList_Size(obj);
+	/*else error*/
+	else
+	{
+		printf("starpu_perfmodel::size_base: the type of function result is unrecognized\n");
+		exit(1);
+	}
+	return n;
+}
+
+static void del_Perf(PyObject *obj)
+{
+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)PyCapsule_GetPointer(obj, "Perf");
+	free(perf);
+}
+
+/*initialization of perfmodel*/
+static PyObject* init_perfmodel(PyObject *self, PyObject *args)
+{
+	char *sym;
+
+	if (!PyArg_ParseTuple(args, "s", &sym))
+		return NULL;
+
+	/*allocate a perfmodel structure*/
+	struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel));
+
+	/*get the perfmodel symbol*/
+	char *p =strdup(sym);
+	perf->symbol=p;
+	perf->type=STARPU_HISTORY_BASED;
+
+	/*struct perfmodel*->PyObject**/
+	PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL);
+
+	return perfmodel;
+}
+
+/*free perfmodel*/
+static PyObject* free_perfmodel(PyObject *self, PyObject *args)
+{
+	PyObject *perfmodel;
+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
+		return NULL;
+
+	/*PyObject*->struct perfmodel**/
+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+
+	starpu_save_history_based_model(perf);
+	//starpu_perfmodel_unload_model(perf);
+	//free(perf->symbol);
+	starpu_perfmodel_deinit(perf);
+	free(perf);
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args)
+{
+	PyObject *perfmodel;
+	if (!PyArg_ParseTuple(args, "O", &perfmodel))
+		return NULL;
+
+	/*PyObject*->struct perfmodel**/
+	struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+
+	starpu_save_history_based_model(perf);
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*****************************Wrappers of StarPU methods****************************/
+/*wrapper submit method*/
+static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
+{
+	/*get the running Event loop*/
+	PyObject *loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL);
+	/*create a asyncio.Future object*/
+	PyObject *fut = PyObject_CallMethod(loop, "create_future", NULL);
+
+	/*first argument in args is always the python function passed in*/
+	PyObject *func_py = PyTuple_GetItem(args, 0);
+	Py_INCREF(func_py);
+
+	/*allocate a task structure and initialize it with default values*/
+	struct starpu_task *task=starpu_task_create();
+	task->destroy=0;
+
+	PyObject *PyTask=PyTask_FromTask(task);
+
+	/*set one of fut attribute to the task pointer*/
+	PyObject_SetAttrString(fut, "starpu_task", PyTask);
+	/*check the arguments of python function passed in*/
+	int i;
+	for(i=1; i < PyTuple_Size(args)-1; i++)
+	{
+		PyObject *obj=PyTuple_GetItem(args, i);
+		const char* tp = Py_TYPE(obj)->tp_name;
+		if(strcmp(tp, "_asyncio.Future") == 0)
+		{
+			/*if one of arguments is Future, get its corresponding task*/
+			PyObject *fut_task=PyObject_GetAttrString(obj, "starpu_task");
+			/*declare task dependencies between the current task and the corresponding task of Future argument*/
+			starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task));
+
+			Py_DECREF(fut_task);
+		}
+	}
+
+	/*allocate a codelet structure*/
+	struct starpu_codelet *func_cl=(struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
+	/*initialize func_cl with default values*/
+	starpu_codelet_init(func_cl);
+	func_cl->cpu_funcs[0]=&codelet_func;
+	func_cl->cpu_funcs_name[0]="codelet_func";
+
+	/*check whether the option perfmodel is None*/
+	PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1);/*the last argument is the option dictionary*/
+	PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel");
+	const char *tp_perf = Py_TYPE(perfmodel)->tp_name;
+	if (strcmp(tp_perf, "PyCapsule")==0)
+	{
+		/*PyObject*->struct perfmodel**/
+		struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf");
+		func_cl->model=perf;
+		Py_INCREF(perfmodel);
+	}
+
+	/*allocate a new codelet structure to pass the python function, asyncio.Future and Event loop*/
+	struct codelet_args *cst = (struct codelet_args*)malloc(sizeof(struct codelet_args));
+	cst->f = func_py;
+	cst->fut = fut;
+	cst->lp = loop;
+
+	Py_INCREF(fut);
+	Py_INCREF(loop);
+
+	/*pass args in argList*/
+	if (PyTuple_Size(args)==2)/*function no arguments*/
+		cst->argList = PyTuple_New(0);
+	else
+	{/*function has arguments*/
+		cst->argList = PyTuple_New(PyTuple_Size(args)-2);
+		int i;
+		for(i=0; i < PyTuple_Size(args)-2; i++)
+		{
+			PyObject *tmp=PyTuple_GetItem(args, i+1);
+			PyTuple_SetItem(cst->argList, i, tmp);
+			Py_INCREF(PyTuple_GetItem(cst->argList, i));
+		}
+	}
+
+	task->cl=func_cl;
+	task->cl_arg=cst;
+
+	/*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None*/
+	/*const char * name*/
+	PyObject *PyName = PyDict_GetItemString(dict_option, "name");
+	const char *name_type = Py_TYPE(PyName)->tp_name;
+	if (strcmp(name_type, "NoneType")!=0)
+	{
+		PyObject *pStrObj = PyUnicode_AsUTF8String(PyName);
+		char* name_str = PyBytes_AsString(pStrObj);
+		char* name = strdup(name_str);
+		//printf("name is %s\n", name);
+		task->name=name;
+		Py_DECREF(pStrObj);
+	}
+
+	/*unsigned synchronous:1*/
+	PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous");
+	unsigned sync=PyLong_AsUnsignedLong(PySync);
+	//printf("sync is %u\n", sync);
+	task->synchronous=sync;
+
+	/*int priority*/
+	PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority");
+	int prio=PyLong_AsLong(PyPrio);
+	//printf("prio is %d\n", prio);
+	task->priority=prio;
+
+	/*unsigned color*/
+	PyObject *PyColor = PyDict_GetItemString(dict_option, "color");
+	const char *color_type = Py_TYPE(PyColor)->tp_name;
+	if (strcmp(color_type, "NoneType")!=0)
+	{
+		unsigned color=PyLong_AsUnsignedLong(PyColor);
+		//printf("color is %u\n", color);
+		task->color=color;
+	}
+
+	/*double flops*/
+	PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops");
+	const char *flops_type = Py_TYPE(PyFlops)->tp_name;
+	if (strcmp(flops_type, "NoneType")!=0)
+	{
+		double flops=PyFloat_AsDouble(PyFlops);
+		//printf("flops is %f\n", flop);
+		task->flops=flops;
+	}
+
+	task->callback_func=&cb_func;
+
+	/*call starpu_task_submit method*/
+	Py_BEGIN_ALLOW_THREADS
+		int ret = starpu_task_submit(task);
+		assert(ret==0);
+	Py_END_ALLOW_THREADS
+
+	if (strcmp(tp_perf, "PyCapsule")==0)
+	{
+		struct starpu_perfmodel *perf =(struct starpu_perfmodel *) func_cl->model;
+		perf->size_base=&sizebase;
+	}
+
+	//printf("the number of reference is %ld\n", Py_REFCNT(func_py));
+	//_Py_PrintReferences(stderr);
+	//COUNTREFS();
+	return fut;
+}
+
+/*wrapper wait for all method*/
+static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_task_wait_for_all method*/
+	Py_BEGIN_ALLOW_THREADS
+		starpu_task_wait_for_all();
+	Py_END_ALLOW_THREADS
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper pause method*/
+static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_pause method*/
+	starpu_pause();
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper resume method*/
+static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_resume method*/
+	starpu_resume();
+
+	/*return type is void*/
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/*wrapper get count cpu method*/
+static PyObject* starpu_cpu_worker_get_count_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_cpu_worker_get_count method*/
+	int num_cpu=starpu_cpu_worker_get_count();
+
+	/*return type is unsigned*/
+	return Py_BuildValue("I", num_cpu);
+}
+
+/*wrapper get min priority method*/
+static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_sched_get_min_priority*/
+	int min_prio=starpu_sched_get_min_priority();
+
+	/*return type is int*/
+	return Py_BuildValue("i", min_prio);
+}
+
+/*wrapper get max priority method*/
+static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_sched_get_max_priority*/
+	int max_prio=starpu_sched_get_max_priority();
+
+	/*return type is int*/
+	return Py_BuildValue("i", max_prio);
+}
+
+/*wrapper get the number of no completed submitted tasks method*/
+static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args)
+{
+	/*call starpu_task_nsubmitted*/
+	int num_task=starpu_task_nsubmitted();
+
+	/*Return the number of submitted tasks which have not completed yet */
+	return Py_BuildValue("i", num_task);
+}
+/***********************************************************************************/
+
+/***************The module’s method table and initialization function**************/
+/*method table*/
+static PyMethodDef starpupyMethods[] =
+{
+	{"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/
+	{"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/
+	{"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/
+	{"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/
+	{"cpu_worker_get_count", starpu_cpu_worker_get_count_wrapper, METH_VARARGS, "return the number of CPUs controlled by StarPU"}, /*get count cpu method*/
+	{"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/
+	{"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/
+	{"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/
+	{"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/
+	{"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/
+	{"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/
+	{NULL, NULL}
+};
+
+/*deallocation function*/
+static void starpupyFree(void *self)
+{
+	starpu_shutdown();
+	Py_DECREF(asyncio_module);
+	//COUNTREFS();
+}
+
+/*module definition structure*/
+static struct PyModuleDef starpupymodule =
+{
+	PyModuleDef_HEAD_INIT,
+	"starpupy", /*name of module*/
+	NULL,
+	-1,
+	starpupyMethods, /*method table*/
+	NULL,
+	NULL,
+	NULL,
+	starpupyFree /*deallocation function*/
+};
+
+/*initialization function*/
+PyMODINIT_FUNC
+PyInit_starpupy(void)
+{
+	PyEval_InitThreads();
+	/*starpu initialization*/
+	int ret = starpu_init(NULL);
+	assert(ret==0);
+	/*python asysncio import*/
+	asyncio_module = PyImport_ImportModule("asyncio");
+#ifdef STARPU_PYTHON_HAVE_NUMPY
+	/*numpy import array*/
+	import_array();
+#endif
+	/*module import initialization*/
+	return PyModule_Create(&starpupymodule);
+}
+/***********************************************************************************/

+ 0 - 6
starpupy/tests/Makefile

@@ -1,6 +0,0 @@
-PYTHON ?= python3
-
-all:
-	PYTHONPATH=../src $(PYTHON) starpu_py.py
-	PYTHONPATH=../src STARPU_CALIBRATE=1 $(PYTHON) starpu_py_parallel.py
-

+ 0 - 101
starpupy/tests/starpu_py_parallel.py

@@ -1,101 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-import starpu
-import time
-import asyncio
-from math import sqrt
-from math import log10
-
-#generate a list to store functions
-g_func=[]
-
-#function no input no output print hello world
-def hello():
-	print ("Example 1: Hello, world!")
-g_func.append(starpu.joblib.delayed(hello)())
-
-#function no input no output
-def func1():
-	print ("Example 2: This is a function no input no output")
-g_func.append(starpu.joblib.delayed(func1)())
-
-#function no input return a value
-def func2():
-	print ("Example 3:")
-	return 12
-g_func.append(starpu.joblib.delayed(func2)())
- 
-#function has 2 int inputs and 1 int output
-def multi(a,b):
-	res_multi=a*b
-	print("Example 4: The result of ",a,"*",b,"is",res_multi)
-	return res_multi
-g_func.append(starpu.joblib.delayed(multi)(2, 3))
-
-#function has 4 float inputs and 1 float output
-def add(a,b,c,d):
-	res_add=a+b+c+d
-	print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add)
-	return res_add
-g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9))
-
-#function has 2 int inputs 1 float input and 1 float output 1 int output
-def sub(a,b,c):
-	res_sub1=a-b-c
-	res_sub2=a-b
-	print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2)
-	return res_sub1, res_sub2
-g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9))
-
-#the size of generator
-N=1000000
-
-print("************************")
-print("parallel Normal version:")
-print("************************")
-print("--input is iterable argument list, example 1")
-starpu.joblib.parallel(mode="normal", n_jobs=-2, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
-
-print("--input is iterable argument list, example 2")
-starpu.joblib.parallel(mode="normal", n_jobs=2, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
-
-print("--input is iterable function list")
-starpu.joblib.parallel(mode="normal", n_jobs=3, perfmodel="third")(g_func)
-
-
-print("************************")
-print("parallel Future version:")
-print("************************")
-async def main():
-	print("--input is iterable argument list, example 1")
-	fut1=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="first")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N))
-	res1=await fut1
-	#print(res1)
-
-	print("--input is iterable argument list, example 2")
-	fut2=starpu.joblib.parallel(mode="future", n_jobs=-3, perfmodel="second")(starpu.joblib.delayed(log10)(i+1)for i in range(N))
-	res2=await fut2
-	#print(res2)
-
-	print("--input is iterable function list")
-	fut3=starpu.joblib.parallel(mode="future", n_jobs=2, perfmodel="third")(g_func)
-	res3=await fut3
-	#print(res3)
-asyncio.run(main())
-
-starpu.joblib.perfmodel_plot(perfmodel="first")
-starpu.joblib.perfmodel_plot(perfmodel="second")
-starpu.joblib.perfmodel_plot(perfmodel="third")

+ 2 - 2
tests/Makefile.am

@@ -17,8 +17,8 @@ include $(top_srcdir)/starpu.mk
 
 AM_CFLAGS += -Wno-unused
 AM_CXXFLAGS += -Wno-unused
-AM_FFLAGS += -Wno-unused
-AM_FCFLAGS += -Wno-unused
+AM_FFLAGS += -Wno-unused -Wno-unused-dummy-argument
+AM_FCFLAGS += -Wno-unused -Wno-unused-dummy-argument
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 2 - 1
tests/datawizard/bcsr.c

@@ -123,7 +123,8 @@ int main(int argc, char **argv)
 	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV)
 		return STARPU_TEST_SKIPPED;
 
-	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) {
+	if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1)
+	{
 		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}

+ 1 - 2
tests/datawizard/interfaces/test_interfaces.c

@@ -16,8 +16,7 @@
 
 #include <starpu.h>
 
-/* XXX Why cant we dereference a handle without this one ? */
-#include <core/sched_policy.h>
+#include <datawizard/coherency.h>
 
 #include <assert.h>
 

+ 1 - 0
tests/main/starpu_worker_exists.c

@@ -14,6 +14,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+#define BUILDING_STARPU
 #include <starpu.h>
 #include "core/workers.h"
 #include "../helper.h"

+ 2 - 1
tests/microbenchs/bandwidth.c

@@ -170,7 +170,8 @@ static unsigned interleave(unsigned i)
 		return 0;
 }
 
-enum sleep_type {
+enum sleep_type
+{
 	PAUSE,
 	NOP,
 	SYNC,

+ 2 - 0
tests/microbenchs/tasks_size_overhead.c

@@ -228,6 +228,8 @@ int main(int argc, char **argv)
 		goto error;
 	}
 
+	if (mincpus <= 0)
+		mincpus = 1;
 	/* For each number of cpus, benchmark */
 	for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep)
 	{

+ 8 - 1
tests/perfmodels/regression_based_memset.c

@@ -213,7 +213,7 @@ static int bench_energy(int workerid, int where, enum starpu_worker_archtype arc
 		if ( (retval = starpu_energy_start(workerid, archtype)) != 0)
 		{
 			starpu_data_unregister(handle);
-			_STARPU_DISP("Energy measurement not supported for archtype %d\n", archtype);
+			_STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype));
 			return -1;
 		}
 
@@ -328,6 +328,9 @@ int main(int argc, char **argv)
 	starpu_conf_init(&conf);
 
 	/* Use a scheduler which doesn't choose the implementation */
+#ifdef STARPU_HAVE_UNSETENV
+	unsetenv("STARPU_SCHED");
+#endif
 	conf.sched_policy_name = "eager";
 	conf.calibrate = 1;
 
@@ -345,15 +348,19 @@ int main(int argc, char **argv)
 	{
 		memset_cl.cpu_funcs[1] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl);
+#ifdef STARPU_HAVE_UNSETENV
 		memset_cl.cpu_funcs[1] = memset_cpu;
 		memset_cl.cpu_funcs[0] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl);
+#endif
 
 		nl_memset_cl.cpu_funcs[1] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl);
+#ifdef STARPU_HAVE_UNSETENV
 		nl_memset_cl.cpu_funcs[1] = memset_cpu;
 		nl_memset_cl.cpu_funcs[0] = NULL;
 		bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl);
+#endif
 	}
 
 	for (i = 0; i < starpu_cuda_worker_get_count(); i++)

+ 0 - 0
tools/dev/checker/starpu_check_copyright.sh


Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio