лет назад: 5 · e962578553
--- a/Makefile.am
+++ b/Makefile.am
@@ -82,8 +82,8 @@ versinclude_HEADERS = 				\
 
				 	include/starpu_util.h			\
			
 
				 	include/starpu_fxt.h			\
			
 
				 	include/starpu_cuda.h			\
			
 
				-	include/starpu_fpga.h			\
			
 
				 	include/starpu_opencl.h			\
			
 
				+	include/starpu_fpga.h			\
			
 
				 	include/starpu_openmp.h			\
			
 
				 	include/starpu_sink.h			\
			
 
				 	include/starpu_mic.h			\
			
--- a/configure.ac
+++ b/configure.ac
@@ -1848,9 +1848,6 @@ AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
 
				 			nmaxmicthreads=$enableval, nmaxmicthreads=120)
			
 
				 AC_MSG_RESULT($nmaxmicthread)
			
 
				 
			
 
				-AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
			
 
				-	[maximum number of MIC cores])
			
 
				-
			
 
				 AC_ARG_WITH(coi-dir,
			
 
				 	[AS_HELP_STRING([--with-coi-dir=<path>],
			
 
				 	[specify the MIC's COI installation directory])],
			
@@ -2487,6 +2484,23 @@ nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev +  $n
 
				 AC_MSG_CHECKING(Maximum number of workers)
			
 
				 AC_MSG_RESULT($nmaxworkers)
			
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
			
 
				+nmaxdevs=0
			
 
				+if test $nmaxdevs -lt $nmaxcudadev; then
			
 
				+	nmaxdevs=$nmaxcudadev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxopencldev; then
			
 
				+	nmaxdevs=$nmaxopencldev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxfpgadev; then
			
 
				+	nmaxdevs=$nmaxfpgadev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxmicdev; then
			
 
				+	nmaxdevs=$nmaxmicdev
			
 
				+fi
			
 
				+if test $nmaxdevs -lt $nmaxmpidev; then
			
 
				+	nmaxdevs=$nmaxmpidev
			
 
				+fi
			
 
				+AC_DEFINE_UNQUOTED(STARPU_NMAXDEVS, [$nmaxdevs], [Maximum number of device per device arch])
			
 
				 
			
 
				 # Computes the maximun number of combined worker
			
 
				 nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`
			
--- a/doc/doxygen/doxygen.cfg
+++ b/doc/doxygen/doxygen.cfg
@@ -1613,10 +1613,10 @@ INCLUDE_FILE_PATTERNS  =
 
				 # undefined via #undef or recursively expanded use the := operator
			
 
				 # instead of the = operator.
			
 
				 
			
 
				-PREDEFINED             = STARPU_USE_OPENCL=1 \
			
 
				-                         STARPU_USE_CUDA=1 \
			
 
				-                         STARPU_USE_MIC=1 \
			
 
				+PREDEFINED             = STARPU_USE_CUDA=1 \
			
 
				+                         STARPU_USE_OPENCL=1 \
			
 
				                          STARPU_USE_FPGA=1 \
			
 
				+                         STARPU_USE_MIC=1 \
			
 
				 			 STARPU_USE_MPI=1 \
			
 
				 			 STARPU_HAVE_HWLOC=1 \
			
 
				 			 STARPU_USE_SC_HYPERVISOR=1 \
			
--- a/doc/doxygen_dev/Makefile.am
+++ b/doc/doxygen_dev/Makefile.am
@@ -172,7 +172,6 @@ dox_inputs = $(DOX_CONFIG) 				\
 
				 	$(top_srcdir)/src/common/list.h	\
			
 
				 	$(top_srcdir)/src/debug/starpu_debug_helpers.h	\
			
 
				 	$(top_srcdir)/src/debug/traces/starpu_fxt.h	\
			
 
				-	$(top_srcdir)/src/starpu_parameters.h	\
			
 
				 	$(top_srcdir)/src/sched_policies/fifo_queues.h	\
			
 
				 	$(top_srcdir)/src/sched_policies/helper_mct.h	\
			
 
				 	$(top_srcdir)/src/sched_policies/sched_component.h	\
			
--- a/doc/doxygen_dev/doxygen-config.cfg.in
+++ b/doc/doxygen_dev/doxygen-config.cfg.in
@@ -73,7 +73,6 @@ INPUT                  = @top_srcdir@/doc/doxygen_dev/chapters         \
 
				 			 @top_srcdir@/src/common/list.h \
			
 
				 			 @top_srcdir@/src/debug/starpu_debug_helpers.h \
			
 
				 			 @top_srcdir@/src/debug/traces/starpu_fxt.h \
			
 
				-			 @top_srcdir@/src/starpu_parameters.h \
			
 
				 			 @top_srcdir@/src/sched_policies/fifo_queues.h \
			
 
				 			 @top_srcdir@/src/sched_policies/helper_mct.h \
			
 
				 			 @top_srcdir@/src/sched_policies/sched_component.h \
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,6 +20,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
			
--- a/examples/cpp/add_vectors_interface.cpp
+++ b/examples/cpp/add_vectors_interface.cpp
@@ -171,16 +171,17 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
				 	.ram_to_ram = NULL,
			
 
				 	.ram_to_cuda = NULL,
			
 
				 	.ram_to_opencl = NULL,
			
 
				+	.ram_to_fpga = NULL,
			
 
				 	.ram_to_mic = NULL,
			
 
				 
			
 
				 	.cuda_to_ram = NULL,
			
 
				 	.cuda_to_cuda = NULL,
			
 
				-	.cuda_to_opencl = NULL,
			
 
				 
			
 
				 	.opencl_to_ram = NULL,
			
 
				-	.opencl_to_cuda = NULL,
			
 
				 	.opencl_to_opencl = NULL,
			
 
				 
			
 
				+	.fpga_to_ram = NULL,
			
 
				+
			
 
				 	.mic_to_ram = NULL,
			
 
				 
			
 
				 	.ram_to_mpi_ms = NULL,
			
@@ -195,13 +196,16 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
				 	.opencl_to_ram_async = NULL,
			
 
				 	.opencl_to_opencl_async = NULL,
			
 
				 
			
 
				-	.ram_to_mpi_ms_async = NULL,
			
 
				-	.mpi_ms_to_ram_async = NULL,
			
 
				-	.mpi_ms_to_mpi_ms_async = NULL,
			
 
				+	.ram_to_fpga_async = NULL,
			
 
				+	.fpga_to_ram_async = NULL,
			
 
				 
			
 
				 	.ram_to_mic_async = NULL,
			
 
				 	.mic_to_ram_async = NULL,
			
 
				 
			
 
				+	.ram_to_mpi_ms_async = NULL,
			
 
				+	.mpi_ms_to_ram_async = NULL,
			
 
				+	.mpi_ms_to_mpi_ms_async = NULL,
			
 
				+
			
 
				 	.any_to_any = vector_interface_copy_any_to_any,
			
 
				 };
			
 
				 #else
			
@@ -213,13 +217,14 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
				 	NULL,
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				-
			
 
				 	NULL,
			
 
				+
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				 
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				+
			
 
				 	NULL,
			
 
				 
			
 
				 	NULL,
			
@@ -238,14 +243,13 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
				 
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				-	NULL,
			
 
				 
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				-	NULL,
			
 
				 
			
 
				 	NULL,
			
 
				 	NULL,
			
 
				+	NULL,
			
 
				 
			
 
				 	vector_interface_copy_any_to_any,
			
 
				 };
			
--- a/examples/fortran90/f90_example.f90
+++ b/examples/fortran90/f90_example.f90
@@ -33,8 +33,7 @@ PROGRAM f90_example
 
				   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
			
 
				   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
			
 
				   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
			
 
				-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
			
 
				-  REAL(KIND=C_DOUBLE)            :: r, coeff2
			
 
				+  INTEGER(KIND=C_INT)            :: it,it_tot
			
 
				 
			
 
				   !Initialization with arbitrary data
			
 
				   Nelt           = 2
			
--- a/examples/native_fortran/nf_example.f90
+++ b/examples/native_fortran/nf_example.f90
@@ -32,8 +32,7 @@ PROGRAM f90_example
 
				   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
			
 
				   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
			
 
				   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
			
 
				-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
			
 
				-  REAL(KIND=C_DOUBLE)            :: r, coeff2
			
 
				+  INTEGER(KIND=C_INT)            :: it,it_tot
			
 
				   REAL(KIND=C_DOUBLE),TARGET     :: flops
			
 
				 
			
 
				   TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet
			
--- a/examples/scheduler/dummy_modular_sched.c
+++ b/examples/scheduler/dummy_modular_sched.c
@@ -215,7 +215,8 @@ static struct starpu_codelet dummy_codelet =
 
				 	.cpu_funcs = {dummy_func},
			
 
				 	.cpu_funcs_name = {"dummy_func"},
			
 
				 	.cuda_funcs = {dummy_func},
			
 
				-        .opencl_funcs = {dummy_func},
			
 
				+	.opencl_funcs = {dummy_func},
			
 
				+	.fpga_funcs = {dummy_func},
			
 
				 	.model = &starpu_perfmodel_nop,
			
 
				 	.nbuffers = 0,
			
 
				 	.name = "dummy",
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -254,6 +254,40 @@ struct starpu_conf
 
				 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 	/**
			
 
				+	   If this flag is set, the FPGA workers will be attached to
			
 
				+	   the FPGA devices specified in the
			
 
				+	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
			
 
				+	   affects the FPGA devices in a round-robin fashion. This
			
 
				+	   can also be specified with the environment variable \ref
			
 
				+	   STARPU_WORKERS_FPGAID.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+        unsigned use_explicit_workers_fpga_deviceid;
			
 
				+
			
 
				+	/**
			
 
				+	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
			
 
				+	   is set, this array contains the logical identifiers of the
			
 
				+	   FPGA devices to be used.
			
 
				+	*/
			
 
				+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+#ifdef STARPU_USE_FPGA
			
 
				+	/**
			
 
				+           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
			
 
				+	   This is an array of starpu_max_load, the last of which shall have
			
 
				+	   file set to NULL. In order to use all available devices,
			
 
				+	   starpu_max_load::engine_id_pattern can be set to "*", but only the
			
 
				+           last non-NULL entry can be set so.
			
 
				+
			
 
				+	   If this is not set, it is assumed that the basic static SLiC
			
 
				+           interface is used.
			
 
				+        */
			
 
				+	struct starpu_max_load *fpga_load;
			
 
				+#else
			
 
				+	void *fpga_load;
			
 
				+#endif
			
 
				+
			
 
				+	/**
			
 
				 	   If this flag is set, the MIC workers will be attached to
			
 
				 	   the MIC devices specified in the array
			
 
				 	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
			
@@ -290,40 +324,6 @@ struct starpu_conf
 
				 	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 	/**
			
 
				-	   If this flag is set, the FPGA workers will be attached to
			
 
				-	   the FPGA devices specified in the
			
 
				-	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
			
 
				-	   affects the FPGA devices in a round-robin fashion. This
			
 
				-	   can also be specified with the environment variable \ref
			
 
				-	   STARPU_WORKERS_FPGAID.
			
 
				-	   (default = 0)
			
 
				-	*/
			
 
				-        unsigned use_explicit_workers_fpga_deviceid;
			
 
				-
			
 
				-	/**
			
 
				-	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
			
 
				-	   is set, this array contains the logical identifiers of the
			
 
				-	   FPGA devices to be used.
			
 
				-	*/
			
 
				-	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
			
 
				-
			
 
				-#ifdef STARPU_USE_FPGA
			
 
				-	/**
			
 
				-           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
			
 
				-	   This is an array of starpu_max_load, the last of which shall have
			
 
				-	   file set to NULL. In order to use all available devices,
			
 
				-	   starpu_max_load::engine_id_pattern can be set to "*", but only the
			
 
				-           last non-NULL entry can be set so.
			
 
				-
			
 
				-	   If this is not set, it is assumed that the basic static SLiC
			
 
				-           interface is used.
			
 
				-        */
			
 
				-	struct starpu_max_load *fpga_load;
			
 
				-#else
			
 
				-	void *fpga_files;
			
 
				-#endif
			
 
				-
			
 
				-	/**
			
 
				 	   If this flag is set, StarPU will recalibrate the bus.  If
			
 
				 	   this value is equal to -1, the default value is used. This
			
 
				 	   can also be specified with the environment variable \ref
			
@@ -658,6 +658,12 @@ int starpu_asynchronous_cuda_copy_disabled(void);
 
				 int starpu_asynchronous_opencl_copy_disabled(void);
			
 
				 
			
 
				 /**
			
 
				+   Return 1 if asynchronous data transfers between CPU and FPGA
			
 
				+   devices are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_fpga_copy_disabled(void);
			
 
				+
			
 
				+/**
			
 
				    Return 1 if asynchronous data transfers between CPU and MIC devices
			
 
				    are disabled.
			
 
				 */
			
@@ -669,12 +675,6 @@ int starpu_asynchronous_mic_copy_disabled(void);
 
				 */
			
 
				 int starpu_asynchronous_mpi_ms_copy_disabled(void);
			
 
				 
			
 
				-/**
			
 
				-   Return 1 if asynchronous data transfers between CPU and FPGA
			
 
				-   devices are disabled.
			
 
				-*/
			
 
				-int starpu_asynchronous_fpga_copy_disabled(void);
			
 
				-
			
 
				 void starpu_display_stats(void);
			
 
				 
			
 
				 void starpu_get_version(int *major, int *minor, int *release);
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -65,6 +65,13 @@
 
				 #undef STARPU_USE_OPENCL
			
 
				 
			
 
				 /**
			
 
				+   Defined when StarPU has been installed with FPGA support. It should
			
 
				+   be used in your code to detect the availability of FPGA.
			
 
				+   @ingroup API_FPGA_Extensions
			
 
				+*/
			
 
				+#undef STARPU_USE_FPGA
			
 
				+
			
 
				+/**
			
 
				    Defined when StarPU has been installed with MIC support. It should
			
 
				    be used in your code to detect the availability of MIC.
			
 
				    @ingroup API_MIC_Extensions
			
@@ -80,13 +87,6 @@
 
				 #undef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 
			
 
				 /**
			
 
				-   Defined when StarPU has been installed with FPGA support. It should
			
 
				-   be used in your code to detect the availability of FPGA.
			
 
				-   @ingroup API_FPGA_Extensions
			
 
				-*/
			
 
				-#undef STARPU_USE_FPGA
			
 
				-
			
 
				-/**
			
 
				    Defined when StarPU has been installed with OpenMP Runtime support.
			
 
				    It should be used in your code to detect the availability of the
			
 
				    runtime support for OpenMP.
			
@@ -224,13 +224,6 @@
 
				 #undef STARPU_MAXCUDADEVS
			
 
				 
			
 
				 /**
			
 
				-   Define the maximum number of FPGA devices that are supported by
			
 
				-   StarPU.
			
 
				-   @ingroup API_FPGA_Extensions
			
 
				- */
			
 
				-#undef STARPU_MAXFPGADEVS
			
 
				-
			
 
				-/**
			
 
				    Define the maximum number of OpenCL devices that are supported by
			
 
				    StarPU.
			
 
				    @ingroup API_OpenCL_Extensions
			
@@ -238,6 +231,13 @@
 
				 #undef STARPU_MAXOPENCLDEVS
			
 
				 
			
 
				 /**
			
 
				+   Define the maximum number of FPGA devices that are supported by
			
 
				+   StarPU.
			
 
				+   @ingroup API_FPGA_Extensions
			
 
				+ */
			
 
				+#undef STARPU_MAXFPGADEVS
			
 
				+
			
 
				+/**
			
 
				    Define the maximum number of MIC devices that are supported by
			
 
				    StarPU.
			
 
				    @ingroup API_MIC_Extensions
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -124,16 +124,16 @@ struct starpu_data_copy_methods
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				-	   dst_node MIC node. Return 0 on success.
			
 
				+	   dst_node FPGA node. Return 0 on success.
			
 
				 	*/
			
 
				-	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				-	   dst_node FPGA node. Return 0 on success.
			
 
				+	   dst_node MIC node. Return 0 on success.
			
 
				 	*/
			
 
				-	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
@@ -151,13 +151,6 @@ struct starpu_data_copy_methods
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				-	   dst_node OpenCL node. Return 0 on success.
			
 
				-	*/
			
 
				-	int (*cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				-
			
 
				-	/**
			
 
				-	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				 	   \p dst_node CPU node. Return 0 on success.
			
 
				 	*/
			
@@ -166,30 +159,23 @@ struct starpu_data_copy_methods
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				-	   \p dst_node CUDA node. Return 0 on success.
			
 
				-	*/
			
 
				-	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				-
			
 
				-	/**
			
 
				-	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				 	   \p dst_node OpenCL node. Return 0 on success.
			
 
				 	*/
			
 
				 	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node MIC node to the \p dst_interface interface on the \p
			
 
				+	   \p src_node FPGA node to the \p dst_interface interface on the \p
			
 
				 	   dst_node CPU node. Return 0 on success.
			
 
				 	*/
			
 
				-	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node FPGA node to the \p dst_interface interface on the \p
			
 
				+	   \p src_node MIC node to the \p dst_interface interface on the \p
			
 
				 	   dst_node CPU node. Return 0 on success.
			
 
				 	*/
			
 
				-	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
@@ -288,32 +274,22 @@ struct starpu_data_copy_methods
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				-	   dst_node MPI Slave node, with the given even. Must return 0 if the
			
 
				-	   transfer was actually completed completely synchronously, or
			
 
				-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				-	   should be awaited for by the core.
			
 
				-	*/
			
 
				-	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				-
			
 
				-	/**
			
 
				-	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				-	   the \p dst_node CPU node, with the given event. Must return 0 if
			
 
				-	   the transfer was actually completed completely synchronously, or
			
 
				-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				-	   should be awaited for by the core.
			
 
				+	   dst_node FPGA node. Must return 0 if the transfer was actually
			
 
				+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+	   some transfers are still ongoing and should be awaited for by the
			
 
				+	   core.
			
 
				 	*/
			
 
				-	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				+	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				-	   the \p dst_node MPI Slave node, using the given stream. Must
			
 
				-	   return 0 if the transfer was actually completed completely
			
 
				-	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				-	   still ongoing and should be awaited for by the core.
			
 
				+	   \p src_node FPGA node to the \p dst_interface interface on the \p
			
 
				+	   dst_node CPU node. Must return 0 if the transfer was actually
			
 
				+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+	   some transfers are still ongoing and should be awaited for by the
			
 
				+	   core.
			
 
				 	*/
			
 
				-	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				+	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
@@ -338,22 +314,32 @@ struct starpu_data_copy_methods
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				 	   \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				-	   dst_node FPGA node. Must return 0 if the transfer was actually
			
 
				-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-	   some transfers are still ongoing and should be awaited for by the
			
 
				-	   core.
			
 
				+	   dst_node MPI Slave node, with the given even. Must return 0 if the
			
 
				+	   transfer was actually completed completely synchronously, or
			
 
				+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+	   should be awaited for by the core.
			
 
				 	*/
			
 
				-	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
 
				-	   \p src_node FPGA node to the \p dst_interface interface on the \p
			
 
				-	   dst_node CPU node. Must return 0 if the transfer was actually
			
 
				-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-	   some transfers are still ongoing and should be awaited for by the
			
 
				-	   core.
			
 
				+	   \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+	   the \p dst_node CPU node, with the given event. Must return 0 if
			
 
				+	   the transfer was actually completed completely synchronously, or
			
 
				+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+	   should be awaited for by the core.
			
 
				 	*/
			
 
				-	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				+
			
 
				+	/**
			
 
				+	   Define how to copy data from the \p src_interface interface on the
			
 
				+	   \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+	   the \p dst_node MPI Slave node, using the given stream. Must
			
 
				+	   return 0 if the transfer was actually completed completely
			
 
				+	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				+	   still ongoing and should be awaited for by the core.
			
 
				+	*/
			
 
				+	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
			
 
				 
			
 
				 	/**
			
 
				 	   Define how to copy data from the \p src_interface interface on the
			
--- a/include/starpu_driver.h
+++ b/include/starpu_driver.h
@@ -37,7 +37,13 @@ extern "C"
 
				 */
			
 
				 
			
 
				 /**
			
 
				-   structure for a driver
			
 
				+   Pre-initialize drivers
			
 
				+   So as to register information on device types, memory types, etc.
			
 
				+*/
			
 
				+void starpu_drivers_preinit(void);
			
 
				+
			
 
				+/**
			
 
				+   structure for designating a given driver
			
 
				 */
			
 
				 struct starpu_driver
			
 
				 {
			
@@ -53,11 +59,8 @@ struct starpu_driver
 
				 	{
			
 
				 		unsigned cpu_id;
			
 
				 		unsigned cuda_id;
			
 
				-		unsigned fpga_id;
			
 
				 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
 
				 		cl_device_id opencl_id;
			
 
				-#else
			
 
				-		unsigned opencl_id;
			
 
				 #endif
			
 
				 	} id;
			
 
				 };
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -35,8 +35,6 @@ extern "C"
 
				 struct starpu_task;
			
 
				 struct starpu_data_descr;
			
 
				 
			
 
				-#define STARPU_NARCH STARPU_ANY_WORKER
			
 
				-
			
 
				 /**
			
 
				    todo
			
 
				 */
			
@@ -408,7 +406,7 @@ int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, i
 
				 */
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
			
 
				 
			
 
				-char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
			
 
				+const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
			
 
				 
			
 
				 /**
			
 
				    Return the architecture name for \p arch
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -68,16 +68,16 @@ extern "C"
 
				 /**
			
 
				    To be used when setting the field starpu_codelet::where (or
			
 
				    starpu_task::where) to specify the codelet (or the task) may be
			
 
				-   executed on a MAX FPGA.
			
 
				+   executed on a OpenCL processing unit.
			
 
				 */
			
 
				-#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
			
 
				+#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
			
 
				 
			
 
				 /**
			
 
				    To be used when setting the field starpu_codelet::where (or
			
 
				    starpu_task::where) to specify the codelet (or the task) may be
			
 
				-   executed on a OpenCL processing unit.
			
 
				+   executed on a MAX FPGA.
			
 
				 */
			
 
				-#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
			
 
				+#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
			
 
				 
			
 
				 /**
			
 
				    To be used when setting the field starpu_codelet::where (or
			
@@ -180,14 +180,14 @@ typedef void (*starpu_cpu_func_t)(void **, void*);
 
				 typedef void (*starpu_cuda_func_t)(void **, void*);
			
 
				 
			
 
				 /**
			
 
				-   FPGA implementation of a codelet.
			
 
				+   OpenCL implementation of a codelet.
			
 
				 */
			
 
				-typedef void (*starpu_fpga_func_t)(void **, void*);
			
 
				+typedef void (*starpu_opencl_func_t)(void **, void*);
			
 
				 
			
 
				 /**
			
 
				-   OpenCL implementation of a codelet.
			
 
				+   FPGA implementation of a codelet.
			
 
				 */
			
 
				-typedef void (*starpu_opencl_func_t)(void **, void*);
			
 
				+typedef void (*starpu_fpga_func_t)(void **, void*);
			
 
				 
			
 
				 /**
			
 
				    MIC implementation of a codelet.
			
@@ -229,21 +229,21 @@ typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
 
				 
			
 
				 /**
			
 
				    @deprecated
			
 
				-   Setting the field starpu_codelet::fpga_func with this macro
			
 
				+   Setting the field starpu_codelet::opencl_func with this macro
			
 
				    indicates the codelet will have several implementations. The use of
			
 
				    this macro is deprecated. One should always only define the field
			
 
				-   starpu_codelet::fpga_funcs.
			
 
				+   starpu_codelet::opencl_funcs.
			
 
				 */
			
 
				-#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
			
 
				+#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
			
 
				 
			
 
				 /**
			
 
				    @deprecated
			
 
				-   Setting the field starpu_codelet::opencl_func with this macro
			
 
				+   Setting the field starpu_codelet::fpga_func with this macro
			
 
				    indicates the codelet will have several implementations. The use of
			
 
				    this macro is deprecated. One should always only define the field
			
 
				-   starpu_codelet::opencl_funcs.
			
 
				+   starpu_codelet::fpga_funcs.
			
 
				 */
			
 
				-#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
			
 
				+#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
			
 
				 
			
 
				 /**
			
 
				    Value to set in starpu_codelet::nbuffers to specify that the
			
@@ -349,19 +349,19 @@ struct starpu_codelet
 
				 	*/
			
 
				 	starpu_cuda_func_t cuda_func STARPU_DEPRECATED;
			
 
				 
			
 
				-        /**
			
 
				+	/**
			
 
				 	   @deprecated
			
 
				 	   Optional field which has been made deprecated. One should
			
 
				-	   use instead the starpu_codelet::fpga_funcs field.
			
 
				+	   use instead the starpu_codelet::opencl_funcs field.
			
 
				 	*/
			
 
				-	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
			
 
				+	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
			
 
				 
			
 
				         /**
			
 
				 	   @deprecated
			
 
				 	   Optional field which has been made deprecated. One should
			
 
				-	   use instead the starpu_codelet::opencl_funcs field.
			
 
				+	   use instead the starpu_codelet::fpga_funcs field.
			
 
				 	*/
			
 
				-	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
			
 
				+	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
			
 
				 
			
 
				 	/**
			
 
				 	   Optional array of function pointers to the CPU
			
@@ -396,23 +396,6 @@ struct starpu_codelet
 
				 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	/**
			
 
				-           Optional array of function pointers to the FPGA
			
 
				-           implementations of the codelet. The functions prototype
			
 
				-           must be:
			
 
				-           \code{.c}
			
 
				-           void fpga_func(void *buffers[], void *cl_arg)
			
 
				-           \endcode
			
 
				-           The first argument being the array of data managed by the
			
 
				-           data management library, and the second argument is a
			
 
				-           pointer to the argument passed from the field
			
 
				-           starpu_task::cl_arg. If the field starpu_codelet::where is
			
 
				-           set, then the field starpu_codelet::fpga_funcs is ignored if
			
 
				-           ::STARPU_FPGA does not appear in the field
			
 
				-           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
			
 
				-        */
			
 
				-	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				-
			
 
				-	/**
			
 
				 	   Optional array of flags for CUDA execution. They specify
			
 
				 	   some semantic details about CUDA kernel execution, such as
			
 
				 	   asynchronous execution.
			
@@ -441,6 +424,23 @@ struct starpu_codelet
 
				 	char opencl_flags[STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	/**
			
 
				+           Optional array of function pointers to the FPGA
			
 
				+           implementations of the codelet. The functions prototype
			
 
				+           must be:
			
 
				+           \code{.c}
			
 
				+           void fpga_func(void *buffers[], void *cl_arg)
			
 
				+           \endcode
			
 
				+           The first argument being the array of data managed by the
			
 
				+           data management library, and the second argument is a
			
 
				+           pointer to the argument passed from the field
			
 
				+           starpu_task::cl_arg. If the field starpu_codelet::where is
			
 
				+           set, then the field starpu_codelet::fpga_funcs is ignored if
			
 
				+           ::STARPU_FPGA does not appear in the field
			
 
				+           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
			
 
				+        */
			
 
				+	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				+
			
 
				+	/**
			
 
				 	   Optional array of function pointers to a function which
			
 
				 	   returns the MIC implementation of the codelet. The
			
 
				 	   functions prototype must be:
			
@@ -482,11 +482,6 @@ struct starpu_codelet
 
				 	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	/**
			
 
				-	   fpga kernel type
			
 
				-        */
			
 
				-	char *fpga_kernel_type[STARPU_MAXIMPLEMENTATIONS];
			
 
				-
			
 
				-	/**
			
 
				 	   Specify the number of arguments taken by the codelet. These
			
 
				 	   arguments are managed by the DSM and are accessed from the
			
 
				 	   <c>void *buffers[]</c> array. The constant argument passed
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -47,10 +47,11 @@ enum starpu_node_kind
 
				 	STARPU_CPU_RAM=1,
			
 
				 	STARPU_CUDA_RAM=2,
			
 
				 	STARPU_OPENCL_RAM=3,
			
 
				-	STARPU_DISK_RAM=4,
			
 
				-	STARPU_MIC_RAM=5,
			
 
				-	STARPU_FPGA_RAM=6,
			
 
				-	STARPU_MPI_MS_RAM=7
			
 
				+	STARPU_FPGA_RAM=4,
			
 
				+	STARPU_DISK_RAM=5,
			
 
				+	STARPU_MIC_RAM=6,
			
 
				+	STARPU_MPI_MS_RAM=7,
			
 
				+	STARPU_MAX_RAM=7
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -65,10 +66,10 @@ enum starpu_worker_archtype
 
				 	STARPU_CPU_WORKER=0,        /**< CPU core */
			
 
				 	STARPU_CUDA_WORKER=1,       /**< NVIDIA CUDA device */
			
 
				 	STARPU_OPENCL_WORKER=2,     /**< OpenCL device */
			
 
				-	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
			
 
				 	STARPU_FPGA_WORKER=4,       /**< FPGA device */
			
 
				+	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
			
 
				 	STARPU_MPI_MS_WORKER=5,     /**< MPI Slave device */
			
 
				-	STARPU_MAX_WORKER=5,        /**< maximum value of STARPU_*_WORKER */
			
 
				+	STARPU_NARCH = 6,           /**< Number of arch types */
			
 
				 	STARPU_ANY_WORKER=255       /**< any worker, used in the hypervisor */
			
 
				 };
			
 
				 
			
@@ -318,9 +319,9 @@ unsigned starpu_worker_is_slave_somewhere(int workerid);
 
				 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
			
 
				 
			
 
				 /**
			
 
				-   Return worker \p type as a trivial string (CPU, CUDA, etc.)
			
 
				+   Return worker \p type as a string suitable for environment variable names (CPU, CUDA, etc.)
			
 
				 */
			
 
				-const char *starpu_worker_get_type_as_short_string(enum starpu_worker_archtype type);
			
 
				+const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type);
			
 
				 
			
 
				 int starpu_bindid_get_workerids(int bindid, int **workerids);
			
 
				 
			
@@ -388,6 +389,12 @@ enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
				 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind);
			
 
				 
			
 
				 /**
			
 
				+   Return the type of memory node that arch type \p type operates on
			
 
				+  */
			
 
				+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type);
			
 
				+
			
 
				+
			
 
				+/**
			
 
				    @name Scheduling operations
			
 
				    @{
			
 
				 */
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -138,9 +138,9 @@ noinst_HEADERS = 						\
 
				 	drivers/mp_common/sink_common.h				\
			
 
				 	drivers/cpu/driver_cpu.h				\
			
 
				 	drivers/cuda/driver_cuda.h				\
			
 
				-	drivers/max/driver_fpga.h				\
			
 
				 	drivers/opencl/driver_opencl.h				\
			
 
				 	drivers/opencl/driver_opencl_utils.h			\
			
 
				+	drivers/max/driver_fpga.h				\
			
 
				 	debug/starpu_debug_helpers.h				\
			
 
				 	drivers/mic/driver_mic_common.h				\
			
 
				 	drivers/mic/driver_mic_source.h				\
			
@@ -156,7 +156,6 @@ noinst_HEADERS = 						\
 
				 	util/starpu_clusters_create.h				\
			
 
				 	util/starpu_task_insert_utils.h				\
			
 
				 	util/starpu_data_cpy.h					\
			
 
				-	starpu_parameters.h					\
			
 
				 	sched_policies/prio_deque.h				\
			
 
				 	sched_policies/sched_component.h
			
 
				 
			
@@ -322,16 +321,13 @@ if STARPU_HAVE_LEVELDB
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_leveldb.cpp
			
 
				 endif
			
 
				 
			
 
				-if STARPU_USE_FPGA
			
 
				-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
			
 
				-endif
			
 
				-
			
 
				 if STARPU_HAVE_HDF5
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_hdf5.c
			
 
				 endif
			
 
				 
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cpu/driver_cpu.c
			
 
				 
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/driver_cuda_init.c
			
 
				 if STARPU_USE_CUDA
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/driver_cuda.c
			
 
				 else
			
@@ -343,6 +339,7 @@ endif
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
			
 
				 
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl_init.c
			
 
				 if STARPU_USE_OPENCL
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl_utils.c
			
@@ -352,6 +349,11 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.
 
				 endif
			
 
				 endif
			
 
				 
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga_init.c
			
 
				+if STARPU_USE_FPGA
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
			
 
				+endif
			
 
				+
			
 
				 if STARPU_LINUX_SYS
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_unistd_o_direct.c
			
 
				 endif
			
@@ -387,6 +389,7 @@ endif
 
				 #										#
			
 
				 #########################################
			
 
				 
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_init.c
			
 
				 if STARPU_USE_MIC
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_common.c
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_source.c
			
@@ -400,6 +403,7 @@ endif
 
				 #                                       #
			
 
				 #########################################
			
 
				 
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_init.c
			
 
				 if STARPU_USE_MPI_MASTER_SLAVE
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_common.c
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_source.c
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -37,13 +37,8 @@
 
				 #include <starpu.h>
			
 
				 
			
 
				 /* some key to identify the worker kind */
			
 
				-#define _STARPU_FUT_APPS_KEY	0x100
			
 
				-#define _STARPU_FUT_CPU_KEY	0x101
			
 
				-#define _STARPU_FUT_CUDA_KEY	0x102
			
 
				-#define _STARPU_FUT_FPGA_KEY	0x109
			
 
				-#define _STARPU_FUT_OPENCL_KEY	0x103
			
 
				-#define _STARPU_FUT_MIC_KEY	0x104
			
 
				-#define _STARPU_FUT_MPI_KEY	0x106
			
 
				+#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100)
			
 
				+#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100)
			
 
				 
			
 
				 #define _STARPU_FUT_WORKER_INIT_START	0x5100
			
 
				 #define _STARPU_FUT_WORKER_INIT_END	0x5101
			
@@ -738,10 +733,9 @@ do {									\
 
				 		FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \
			
 
				 } while (0)
			
 
				 
			
 
				-/* workerkind = _STARPU_FUT_CPU_KEY for instance */
			
 
				 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	do {\
			
 
				 	if (_starpu_fxt_started) \
			
 
				-		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, workerkind, workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
			
 
				+		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
			
 
				 } while (0)
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_INIT_END(__workerid)		do {\
			
@@ -931,7 +925,7 @@ do {										\
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		do {\
			
 
				 	if (_starpu_fxt_started) \
			
 
				-		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid()); \
			
 
				+		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \
			
 
				 } while(0)
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_SCHEDULING_START	\
			
--- a/src/core/detect_combined_workers.c
+++ b/src/core/detect_combined_workers.c
@@ -252,7 +252,7 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 
				 	int cpu_workers[STARPU_NMAXWORKERS];
			
 
				 	unsigned ncpus = 0;
			
 
				 #ifdef STARPU_USE_MIC
			
 
				-	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
			
 
				+	unsigned nb_mics = _starpu_get_machine_config()->topology.ndevices[STARPU_MIC_WORKER];
			
 
				 	unsigned * nmics_table;
			
 
				 	int * mic_id;
			
 
				 	int ** mic_workers;
			
--- a/src/core/disk_ops/disk_leveldb.cpp
+++ b/src/core/disk_ops/disk_leveldb.cpp
@@ -27,7 +27,6 @@
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				 #include <datawizard/copy_driver.h>
			
 
				 #include <datawizard/memory_manager.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 
			
 
				 #define NITER	_starpu_calibration_minimum
			
 
				 
			
--- a/src/core/disk_ops/disk_stdio.c
+++ b/src/core/disk_ops/disk_stdio.c
@@ -28,7 +28,6 @@
 
				 #include <datawizard/copy_driver.h>
			
 
				 #include <datawizard/memory_manager.h>
			
 
				 #include <datawizard/memory_nodes.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 
			
 
				 #ifdef STARPU_HAVE_WINDOWS
			
 
				 #  include <io.h>
			
--- a/src/core/disk_ops/unistd/disk_unistd_global.c
+++ b/src/core/disk_ops/unistd/disk_unistd_global.c
@@ -37,7 +37,6 @@
 
				 #include <datawizard/copy_driver.h>
			
 
				 #include <datawizard/data_request.h>
			
 
				 #include <datawizard/memory_manager.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 #include <common/uthash.h>
			
 
				 
			
 
				 #ifdef STARPU_HAVE_WINDOWS
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -52,11 +52,7 @@ struct _starpu_worker;
 
				 /** codelet function */
			
 
				 typedef void (*_starpu_cl_func_t)(void **, void *);
			
 
				 
			
 
				-#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->where & STARPU_CPU)
			
 
				-#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->where & STARPU_CUDA)
			
 
				-#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->where & STARPU_OPENCL)
			
 
				-#define _STARPU_FPGA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_FPGA)
			
 
				-#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->where & STARPU_MIC)
			
 
				+#define _STARPU_MAY_PERFORM(j, arch)	((j)->task->where & STARPU_##arch)
			
 
				 
			
 
				 struct _starpu_data_descr
			
 
				 {
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -126,18 +126,8 @@ double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch* perf_arc
 
				 	int dev;
			
 
				 	for(dev = 0; dev < perf_arch->ndevices; dev++)
			
 
				 	{
			
 
				-		double coef = 0.0;
			
 
				-		if (perf_arch->devices[dev].type == STARPU_CPU_WORKER)
			
 
				-			coef = _STARPU_CPU_ALPHA;
			
 
				-		else if (perf_arch->devices[dev].type == STARPU_CUDA_WORKER)
			
 
				-			coef = _STARPU_CUDA_ALPHA;
			
 
				-		else if (perf_arch->devices[dev].type == STARPU_OPENCL_WORKER)
			
 
				-			coef = _STARPU_OPENCL_ALPHA;
			
 
				-		else if (perf_arch->devices[dev].type == STARPU_MIC_WORKER)
			
 
				-			coef = _STARPU_MIC_ALPHA;
			
 
				-		else if (perf_arch->devices[dev].type == STARPU_MPI_MS_WORKER)
			
 
				-			coef = _STARPU_MPI_MS_ALPHA;
			
 
				-
			
 
				+		enum starpu_worker_archtype archtype = perf_arch->devices[dev].type;
			
 
				+		double coef = starpu_driver_info[archtype].alpha;
			
 
				 		speedup += coef * (perf_arch->devices[dev].ncores);
			
 
				 	}
			
 
				 	return speedup;
			
@@ -305,7 +295,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 		if (!_starpu_data_is_multiformat_handle(handle))
			
 
				 			continue;
			
 
				 
			
 
				-		node_kind = _starpu_worker_get_node_kind(arch->devices[0].type);
			
 
				+		node_kind = starpu_worker_get_memory_node_kind(arch->devices[0].type);
			
 
				 		if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind))
			
 
				 			continue;
			
 
				 
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -87,10 +87,6 @@ void _starpu_load_bus_performance_files(void);
 
				 void _starpu_set_calibrate_flag(unsigned val);
			
 
				 unsigned _starpu_get_calibrate_flag(void);
			
 
				 
			
 
				-#if defined(STARPU_USE_FPGA)
			
 
				-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
			
 
				-#endif
			
 
				-
			
 
				 #if defined(STARPU_USE_CUDA)
			
 
				 unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
			
 
				 #endif
			
@@ -98,6 +94,10 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 
				 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid);
			
 
				 #endif
			
 
				 
			
 
				+#if defined(STARPU_USE_FPGA)
			
 
				+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
			
 
				+#endif
			
 
				+
			
 
				 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name);
			
 
				 
			
 
				 void _starpu_write_double(FILE *f, const char *format, double val);
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -119,22 +119,17 @@ static char cudadev_direct[STARPU_MAXNODES][STARPU_MAXNODES];
 
				 static uint64_t opencl_size[STARPU_MAXCUDADEVS];
			
 
				 #endif
			
 
				 
			
 
				-#ifdef STARPU_USE_FPGA
			
 
				-/* preference order of cores (logical indexes) */
			
 
				-static unsigned fpga_affinity_matrix[STARPU_MAXFPGADEVS][STARPU_MAXCPUS];
			
 
				-static double fpgadev_timing_htod[STARPU_MAXNODES] = {0.0};
			
 
				-static double fpgadev_latency_htod[STARPU_MAXNODES] = {0.0};
			
 
				-static double fpgadev_timing_dtoh[STARPU_MAXNODES] = {0.0};
			
 
				-static double fpgadev_latency_dtoh[STARPU_MAXNODES] = {0.0};
			
 
				-static struct dev_timing fpgadev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
			
 
				-#endif
			
 
				-
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 /* preference order of cores (logical indexes) */
			
 
				 static unsigned opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXNUMANODES];
			
 
				 static struct dev_timing opencldev_timing_per_numa[STARPU_MAXOPENCLDEVS*STARPU_MAXNUMANODES];
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_FPGA
			
 
				+/* preference order of cores (logical indexes) */
			
 
				+static unsigned fpga_affinity_matrix[STARPU_MAXFPGADEVS][STARPU_MAXCPUS];
			
 
				+#endif
			
 
				+
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
			
 
				 static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
			
@@ -301,7 +296,11 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
				 
			
 
				 	cudaFree(d_buffer);
			
 
				 
			
 
				+#if CUDART_VERSION >= 4000
			
 
				+	cudaDeviceReset();
			
 
				+#else
			
 
				 	cudaThreadExit();
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
			
@@ -411,7 +410,11 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 
				 	cudaSetDevice(src);
			
 
				 	cudaFree(s_buffer);
			
 
				 
			
 
				+#if CUDART_VERSION >= 4000
			
 
				+	cudaDeviceReset();
			
 
				+#else
			
 
				 	cudaThreadExit();
			
 
				+#endif
			
 
				 }
			
 
				 #endif
			
 
				 #endif
			
@@ -1122,13 +1125,6 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid)
 
				 }
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-#ifdef STARPU_USE_FPGA
			
 
				-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
			
 
				-{
			
 
				-        return fpga_affinity_matrix[fpgaid];
			
 
				-}
			
 
				-#endif /* STARPU_USE_FPGA */
			
 
				-
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
			
 
				 {
			
@@ -1136,6 +1132,13 @@ unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 
				 }
			
 
				 #endif /* STARPU_USE_OPENCL */
			
 
				 
			
 
				+#ifdef STARPU_USE_FPGA
			
 
				+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
			
 
				+{
			
 
				+        return fpga_affinity_matrix[fpgaid];
			
 
				+}
			
 
				+#endif /* STARPU_USE_FPGA */
			
 
				+
			
 
				 void starpu_bus_print_affinity(FILE *f)
			
 
				 {
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
@@ -3055,7 +3058,7 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
				 	int busid = starpu_bus_get_id(src_node, dst_node);
			
 
				 	int direct = starpu_bus_get_direct(busid);
			
 
				 #endif
			
 
				-	float ngpus = topology->ncudagpus+topology->nopenclgpus;
			
 
				+	float ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning FIXME: ngpus should not be used e.g. for slow disk transfers...
			
 
				 #endif
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -33,7 +33,6 @@
 
				 #include <core/perfmodel/regression.h>
			
 
				 #include <core/perfmodel/multiple_regression.h>
			
 
				 #include <common/config.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 #include <common/uthash.h>
			
 
				 #include <limits.h>
			
 
				 #include <core/task.h>
			
@@ -54,7 +53,7 @@ static int current_arch_comb;
 
				 static int nb_arch_combs;
			
 
				 static starpu_pthread_rwlock_t arch_combs_mutex;
			
 
				 static int historymaxerror;
			
 
				-static char ignore_devid[STARPU_ANY_WORKER];
			
 
				+static char ignore_devid[STARPU_NARCH];
			
 
				 
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
@@ -1030,16 +1029,9 @@ void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
 
				 		fprintf(f, "  <combination>\n");
			
 
				 		for(dev = 0; dev < ndevices; dev++)
			
 
				 		{
			
 
				-			const char *type;
			
 
				-			switch (arch_combs[comb]->devices[dev].type)
			
 
				-			{
			
 
				-				case STARPU_CPU_WORKER: type = "CPU"; break;
			
 
				-				case STARPU_CUDA_WORKER: type = "CUDA"; break;
			
 
				-				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
			
 
				-				case STARPU_MIC_WORKER: type = "MIC"; break;
			
 
				-				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
			
 
				-				default: STARPU_ASSERT(0);
			
 
				-			}
			
 
				+			enum starpu_worker_archtype archtype = arch_combs[comb]->devices[dev].type;
			
 
				+			const char *type = starpu_driver_info[archtype].name_upper;
			
 
				+			STARPU_ASSERT(type);
			
 
				 			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
			
 
				 					type,
			
 
				 					arch_combs[comb]->devices[dev].devid);
			
@@ -1225,21 +1217,22 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	starpu_perfmodel_initialize();
			
 
				 
			
 
				 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	unsigned ncores = conf->topology.nhwcpus;
			
 
				-	unsigned ncuda =  conf->topology.nhwcudagpus;
			
 
				-	unsigned nopencl = conf->topology.nhwopenclgpus;
			
 
				+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				 	unsigned nmic = 0;
			
 
				+	enum starpu_worker_archtype archtype;
			
 
				 #if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				 	unsigned i;
			
 
				 #endif
			
 
				 #if STARPU_MAXMICDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwmicdevices; i++)
			
 
				-		nmic += conf->topology.nhwmiccores[i];
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				 #endif
			
 
				 	unsigned nmpi = 0;
			
 
				 #if STARPU_MAXMPIDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwmpidevices; i++)
			
 
				-		nmpi += conf->topology.nhwmpicores[i];
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				 #endif
			
 
				 
			
 
				 	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
@@ -1249,11 +1242,14 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	current_arch_comb = 0;
			
 
				 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				-	ignore_devid[STARPU_CPU_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
			
 
				-	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
			
 
				-	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
			
 
				-	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);
			
 
				-	ignore_devid[STARPU_MPI_MS_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS", 0);
			
 
				+
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
			
 
				+		char name[128];
			
 
				+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
@@ -1546,32 +1542,11 @@ int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
			
 
				+const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
			
 
				 {
			
 
				-	switch(archtype)
			
 
				-	{
			
 
				-		case(STARPU_CPU_WORKER):
			
 
				-			return "cpu";
			
 
				-			break;
			
 
				-		case(STARPU_CUDA_WORKER):
			
 
				-			return "cuda";
			
 
				-			break;
			
 
				-		case(STARPU_OPENCL_WORKER):
			
 
				-			return "opencl";
			
 
				-			break;
			
 
				-		case(STARPU_FPGA_WORKER):
			
 
				-			return "fpga";
			
 
				-			break;
			
 
				-		case(STARPU_MIC_WORKER):
			
 
				-			return "mic";
			
 
				-			break;
			
 
				-		case(STARPU_MPI_MS_WORKER):
			
 
				-			return "mpi_ms";
			
 
				-			break;
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-			break;
			
 
				-	}
			
 
				+	const char *name = starpu_driver_info[archtype].name_lower;
			
 
				+	STARPU_ASSERT(name);
			
 
				+	return name;
			
 
				 }
			
 
				 
			
 
				 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl)
			
@@ -2107,7 +2082,7 @@ int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 
				 		fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":"");
			
 
				 		for(device=0 ; device<arch->ndevices ; device++)
			
 
				 		{
			
 
				-			char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type);
			
 
				+			const char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type);
			
 
				 			fprintf(output, "\t\tDevice %d: type: %s - devid: %d - ncores: %d\n", device, name, arch->devices[device].devid, arch->devices[device].ncores);
			
 
				 		}
			
 
				 	}
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -629,16 +629,15 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 				&& starpu_get_prefetch_flag()
			
 
				 				&& starpu_memory_nodes_get_count() > 1)
			
 
				 			{
			
 
				-				if (task->where == STARPU_CPU && config->cpus_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
			
 
				-				else if (task->where == STARPU_CUDA && config->cuda_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
			
 
				-				else if (task->cl->where == STARPU_FPGA && config->fpga_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->fpga_nodeid);
			
 
				-				else if (task->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
			
 
				-				else if (task->where == STARPU_MIC && config->mic_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
			
 
				+				enum starpu_worker_archtype type;
			
 
				+				for (type = 0; type < STARPU_NARCH; type++)
			
 
				+				{
			
 
				+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
			
 
				+						if (config->arch_nodeid[type] >= 0)
			
 
				+							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
			
 
				+						break;
			
 
				+					}
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			STARPU_ASSERT(sched_ctx->sched_policy->push_task);
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -631,35 +631,6 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 
				 		where |= STARPU_CPU;
			
 
				 	}
			
 
				 
			
 
				-       /* FPGA */
			
 
				-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
			
 
				-	{
			
 
				-		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
			
 
				-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				-	}
			
 
				-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
			
 
				-	{
			
 
				-		cl->fpga_funcs[0] = cl->fpga_func;
			
 
				-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				-	}
			
 
				-	some_impl = 0;
			
 
				-	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
			
 
				-		if (cl->fpga_funcs[i])
			
 
				-		{
			
 
				-			some_impl = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-	if (some_impl && cl->fpga_func == 0)
			
 
				-	{
			
 
				-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				-	}
			
 
				-	if (some_impl && is_where_unset)
			
 
				-	{
			
 
				-		where |= STARPU_FPGA;
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-
			
 
				 	/* CUDA */
			
 
				 	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
			
 
				 	{
			
@@ -714,6 +685,35 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 
				 		where |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
 
				+       /* FPGA */
			
 
				+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
			
 
				+	{
			
 
				+		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
			
 
				+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				+	}
			
 
				+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
			
 
				+	{
			
 
				+		cl->fpga_funcs[0] = cl->fpga_func;
			
 
				+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				+	}
			
 
				+	some_impl = 0;
			
 
				+	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
			
 
				+		if (cl->fpga_funcs[i])
			
 
				+		{
			
 
				+			some_impl = 1;
			
 
				+			break;
			
 
				+		}
			
 
				+	if (some_impl && cl->fpga_func == 0)
			
 
				+	{
			
 
				+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
			
 
				+	}
			
 
				+	if (some_impl && is_where_unset)
			
 
				+	{
			
 
				+		where |= STARPU_FPGA;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				 	some_impl = 0;
			
 
				 	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
			
 
				 		if (cl->mic_funcs[i])
			
@@ -1484,38 +1484,29 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
				 	switch (node_kind)
			
 
				 	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				+		case STARPU_MIC_RAM:
			
 
				+		case STARPU_MPI_MS_RAM:
			
 
				 			switch(starpu_node_get_kind(handle->mf_node))
			
 
				 			{
			
 
				 				case STARPU_CPU_RAM:
			
 
				-					return 0;
			
 
				-				case STARPU_CUDA_RAM:      /* Fall through */
			
 
				-				case STARPU_OPENCL_RAM:
			
 
				 				case STARPU_MIC_RAM:
			
 
				                                 case STARPU_MPI_MS_RAM:
			
 
				-					return 1;
			
 
				+					return 0;
			
 
				 				default:
			
 
				-					STARPU_ABORT();
			
 
				+					return 1;
			
 
				 			}
			
 
				 			break;
			
 
				-		case STARPU_CUDA_RAM:    /* Fall through */
			
 
				-		case STARPU_OPENCL_RAM:
			
 
				-		case STARPU_MIC_RAM:
			
 
				-		case STARPU_MPI_MS_RAM:
			
 
				+		default:
			
 
				 			switch(starpu_node_get_kind(handle->mf_node))
			
 
				 			{
			
 
				 				case STARPU_CPU_RAM:
			
 
				-					return 1;
			
 
				-				case STARPU_CUDA_RAM:
			
 
				-				case STARPU_OPENCL_RAM:
			
 
				 				case STARPU_MIC_RAM:
			
 
				                                 case STARPU_MPI_MS_RAM:
			
 
				-					return 0;
			
 
				+					return 1;
			
 
				 				default:
			
 
				-					STARPU_ABORT();
			
 
				+					return 0;
			
 
				 			}
			
 
				 			break;
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				 	}
			
 
				 	/* that instruction should never be reached */
			
 
				 	return -EINVAL;
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -97,14 +97,14 @@ static inline starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct
 
				 	return cl->cuda_funcs[nimpl];
			
 
				 }
			
 
				 
			
 
				-static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				 {
			
 
				-	return cl->fpga_funcs[nimpl];
			
 
				+	return cl->opencl_funcs[nimpl];
			
 
				 }
			
 
				 
			
 
				-static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				 {
			
 
				-	return cl->opencl_funcs[nimpl];
			
 
				+	return cl->fpga_funcs[nimpl];
			
 
				 }
			
 
				 
			
 
				 static inline starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
@@ -122,11 +122,6 @@ static inline const char *_starpu_task_get_cpu_name_nth_implementation(struct st
 
				 	return cl->cpu_funcs_name[nimpl];
			
 
				 }
			
 
				 
			
 
				-static inline char *_starpu_task_get_fpga_kernel_type_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				-{
			
 
				-	return cl->fpga_kernel_type[nimpl];
			
 
				-}
			
 
				-
			
 
				 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
			
 
				 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
			
 
				 
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -84,7 +84,7 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid);
 
				 #define STARPU_NUMA_UNINITIALIZED (-2)
			
 
				 #define STARPU_NUMA_MAIN_RAM (-1)
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
 
				 
			
 
				 struct handle_entry
			
 
				 {
			
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
 
				  * Discover the topology of the machine
			
 
				  */
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
 
				 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
			
 
				 						int *current, int *workers_gpuid,
			
 
				 						const char *varname, unsigned nhwgpus,
			
@@ -560,42 +560,18 @@ static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config
 
				 					    &(config->current_cuda_gpuid),
			
 
				 					    (int *)topology->workers_cuda_gpuid,
			
 
				 					    "STARPU_WORKERS_CUDAID",
			
 
				-					    topology->nhwcudagpus,
			
 
				+					    topology->nhwdevices[STARPU_CUDA_WORKER],
			
 
				 					    STARPU_CUDA_WORKER);
			
 
				 }
			
 
				 
			
 
				 static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
			
 
				+	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ndevices[STARPU_CUDA_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_cuda_gpuid[i];
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_USE_FPGA)
			
 
				-static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
			
 
				-{
			
 
				-	struct _starpu_machine_topology *topology = &config->topology;
			
 
				-	struct starpu_conf *uconf = &config->conf;
			
 
				-
			
 
				-        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
			
 
				-					    ? NULL
			
 
				-					    : (int *)uconf->workers_fpga_deviceid,
			
 
				-					    &(config->current_fpga_deviceid),
			
 
				-					    (int *)topology->workers_fpga_deviceid,
			
 
				-					    "STARPU_WORKERS_FPGAID",
			
 
				-					    topology->nhwfpgafpgas,
			
 
				-					    STARPU_FPGA_WORKER);
			
 
				-}
			
 
				-
			
 
				-static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
			
 
				-{
			
 
				-	unsigned i = ((config->current_fpga_deviceid++) % config->topology.nfpgafpgas);
			
 
				-
			
 
				-	return (int)config->topology.workers_fpga_deviceid[i];
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config)
			
 
				 {
			
@@ -608,7 +584,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 					    &(config->current_opencl_gpuid),
			
 
				 					    (int *)topology->workers_opencl_gpuid,
			
 
				 					    "STARPU_WORKERS_OPENCLID",
			
 
				-					    topology->nhwopenclgpus,
			
 
				+					    topology->nhwdevices[STARPU_OPENCL_WORKER],
			
 
				 					    STARPU_OPENCL_WORKER);
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
@@ -671,12 +647,36 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 
			
 
				 static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
			
 
				+	unsigned i = ((config->current_opencl_gpuid++) % config->topology.ndevices[STARPU_OPENCL_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_opencl_gpuid[i];
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#if defined(STARPU_USE_FPGA)
			
 
				+static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	struct _starpu_machine_topology *topology = &config->topology;
			
 
				+	struct starpu_conf *uconf = &config->conf;
			
 
				+
			
 
				+        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
			
 
				+					    ? NULL
			
 
				+					    : (int *)uconf->workers_fpga_deviceid,
			
 
				+					    &(config->current_fpga_deviceid),
			
 
				+					    (int *)topology->workers_fpga_deviceid,
			
 
				+					    "STARPU_WORKERS_FPGAID",
			
 
				+					    topology->nhwdevices[STARPU_FPGA_WORKER],
			
 
				+					    STARPU_FPGA_WORKER);
			
 
				+}
			
 
				+
			
 
				+static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	unsigned i = ((config->current_fpga_deviceid++) % config->topology.ndevices[STARPU_FPGA_WORKER]);
			
 
				+
			
 
				+	return (int)config->topology.workers_fpga_deviceid[i];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 #if 0
			
 
				 #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
			
 
				 static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
			
@@ -690,7 +690,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 
				 					    &(config->current_mic_deviceid),
			
 
				 					    (int *)topology->workers_mic_deviceid,
			
 
				 					    "STARPU_WORKERS_MICID",
			
 
				-					    topology->nhwmiccores,
			
 
				+					    topology->nhwdevices[STARPU_MIC_WORKER],
			
 
				 					    STARPU_MIC_WORKER);
			
 
				 }
			
 
				 #endif
			
@@ -700,7 +700,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 
				 #ifdef STARPU_USE_MIC
			
 
				 static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
			
 
				+	unsigned i = ((config->current_mic_deviceid++) % config->topology.ndevices[STARPU_MIC_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_mic_deviceid[i];
			
 
				 }
			
@@ -710,7 +710,7 @@ static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *c
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
			
 
				 {
			
 
				-	unsigned i = ((config->current_mpi_deviceid++) % config->topology.nmpidevices);
			
 
				+	unsigned i = ((config->current_mpi_deviceid++) % config->topology.ndevices[STARPU_MPI_MS_WORKER]);
			
 
				 
			
 
				 	return (int)config->topology.workers_mpi_ms_deviceid[i];
			
 
				 }
			
@@ -719,14 +719,14 @@ static void _starpu_init_mpi_topology(struct _starpu_machine_config *config, lon
 
				 {
			
 
				 	/* Discover the topology of the mpi node identifier by MPI_IDX. That
			
 
				 	 * means, make this StarPU instance aware of the number of cores available
			
 
				-	 * on this MPI device. Update the `nhwmpicores' topology field
			
 
				+	 * on this MPI device. Update the `nhwworker[STARPU_MPI_MS_WORKER]' topology field
			
 
				 	 * accordingly. */
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				 	int nbcores;
			
 
				 	_starpu_src_common_sink_nbcores(_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
			
 
				-	topology->nhwmpicores[mpi_idx] = nbcores;
			
 
				+	topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nbcores;
			
 
				 }
			
 
				 
			
 
				 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
			
@@ -736,14 +736,14 @@ static void _starpu_init_mic_topology(struct _starpu_machine_config *config, lon
 
				 {
			
 
				 	/* Discover the topology of the mic node identifier by MIC_IDX. That
			
 
				 	 * means, make this StarPU instance aware of the number of cores available
			
 
				-	 * on this MIC device. Update the `nhwmiccores' topology field
			
 
				+	 * on this MIC device. Update the `nhwworker[STARPU_MIC_WORKER]' topology field
			
 
				 	 * accordingly. */
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				 	int nbcores;
			
 
				 	_starpu_src_common_sink_nbcores(_starpu_mic_nodes[mic_idx], &nbcores);
			
 
				-	topology->nhwmiccores[mic_idx] = nbcores;
			
 
				+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = nbcores;
			
 
				 }
			
 
				 
			
 
				 static int _starpu_init_mic_node(struct _starpu_machine_config *config, int mic_idx,
			
@@ -862,7 +862,8 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 
			
 
				 	nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
			
 
				 
			
 
				-	topology->nhwcpus = 0;
			
 
				+	topology->nhwdevices[STARPU_CPU_WORKER] = 1;
			
 
				+	topology->nhwworker[STARPU_CPU_WORKER][0] = 0;
			
 
				 	topology->nhwpus = 0;
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -906,7 +907,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
			
 
				 #elif defined(STARPU_HAVE_HWLOC)
			
 
				 	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
			
 
				 	 * accordingly. */
			
@@ -926,24 +927,24 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 							 HWLOC_OBJ_PU);
			
 
				 	}
			
 
				 
			
 
				-	topology->nhwcpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
			
 
				+	topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
			
 
				 	topology->nhwpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth);
			
 
				 
			
 
				 #elif defined(HAVE_SYSCONF)
			
 
				 	/* Discover the CPUs relying on the sysconf(3) function and fills
			
 
				 	 * CONFIG accordingly. */
			
 
				 
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
			
 
				 
			
 
				 #elif defined(_WIN32)
			
 
				 	/* Discover the CPUs on Cygwin and MinGW systems. */
			
 
				 
			
 
				 	SYSTEM_INFO sysinfo;
			
 
				 	GetSystemInfo(&sysinfo);
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
			
 
				 #else
			
 
				 #warning no way to know number of cores, assuming 1
			
 
				-	config->topology.nhwcpus = config->topology.nhwpus = 1;
			
 
				+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = 1;
			
 
				 #endif
			
 
				 
			
 
				 	if (config->conf.ncuda != 0)
			
@@ -953,7 +954,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				         if (config->conf.nfpga != 0)
			
 
				 		_starpu_fpga_discover_devices(config);
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-        config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
			
 
				+        config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count();
			
 
				 #endif
			
 
				 
			
 
				 	topology_is_initialized = 1;
			
@@ -968,7 +969,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 	unsigned i;
			
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				-	int nhyperthreads = topology->nhwpus / topology->nhwcpus;
			
 
				+	int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 	unsigned bind_on_core = 0;
			
 
				 	int scale = 1;
			
 
				 
			
@@ -1032,7 +1033,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 						}
			
 
				 						else
			
 
				 						{
			
 
				-							endval = (bind_on_core ? topology->nhwcpus : topology->nhwpus) - 1;
			
 
				+							endval = (bind_on_core ? topology->nhwworker[STARPU_CPU_WORKER][0] : topology->nhwpus) - 1;
			
 
				 							if (*strval)
			
 
				 								strval++;
			
 
				 						}
			
@@ -1136,7 +1137,7 @@ static inline unsigned _starpu_get_next_bindid(struct _starpu_machine_config *co
 
				 	STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?");
			
 
				 
			
 
				 	unsigned current_preferred;
			
 
				-	unsigned nhyperthreads = topology->nhwpus / topology->nhwcpus;
			
 
				+	unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 	unsigned ncores = topology->nhwpus / nhyperthreads;
			
 
				 	unsigned i;
			
 
				 
			
@@ -1219,7 +1220,7 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 	_starpu_init_topology(config);
			
 
				 
			
 
				-	return config->topology.nhwcpus;
			
 
				+	return config->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
 
				 
			
 
				 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
			
@@ -1304,7 +1305,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-	topology->nhwmiccores[mic_idx] = 0;
			
 
				+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = 0;
			
 
				 
			
 
				 	_starpu_init_mic_topology(config, mic_idx);
			
 
				 
			
@@ -1316,29 +1317,29 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 	{
			
 
				 		/* Nothing was specified, so let's use the number of
			
 
				 		 * detected mic cores. ! */
			
 
				-		nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+		nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
			
 
				+		if ((unsigned) nmiccores > topology->nhwworker[STARPU_MIC_WORKER][mic_idx])
			
 
				 		{
			
 
				 			/* The user requires more MIC cores than there is available */
			
 
				-			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
			
 
				-			nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwworker[STARPU_MIC_WORKER][mic_idx]);
			
 
				+			nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nmiccores[mic_idx] = nmiccores;
			
 
				-	STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				-			  "topology->nmiccores[mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				-			  mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				+	topology->nworker[STARPU_MIC_WORKER][mic_idx] = nmiccores;
			
 
				+	STARPU_ASSERT_MSG(topology->nworker[STARPU_MIC_WORKER][mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				+			  "topology->nworker[STARPU_MIC_WORKER][mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				+			  mic_idx, topology->nworker[STARPU_MIC_WORKER][mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	/* _starpu_initialize_workers_mic_deviceid (config); */
			
 
				 
			
 
				 	mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
			
 
				-	mic_worker_set[mic_idx].nworkers = topology->nmiccores[mic_idx];
			
 
				+	mic_worker_set[mic_idx].nworkers = topology->nworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 	unsigned miccore_id;
			
 
				-	for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
			
 
				+	for (miccore_id = 0; miccore_id < topology->nworker[STARPU_MIC_WORKER][mic_idx]; miccore_id++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + miccore_id;
			
 
				 		config->workers[worker_idx].set = &mic_worker_set[mic_idx];
			
@@ -1355,7 +1356,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
				 	}
			
 
				 	_starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
			
 
				 
			
 
				-	topology->nworkers += topology->nmiccores[mic_idx];
			
 
				+	topology->nworkers += topology->nworker[STARPU_MIC_WORKER][mic_idx];
			
 
				 }
			
 
				 
			
 
				 static COIENGINE mic_handles[STARPU_MAXMICDEVS];
			
@@ -1369,7 +1370,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				 {
			
 
				         struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-        topology->nhwmpicores[mpi_idx] = 0;
			
 
				+        topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = 0;
			
 
				 
			
 
				         _starpu_init_mpi_topology(config, mpi_idx);
			
 
				 
			
@@ -1380,28 +1381,28 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				         {
			
 
				                 /* Nothing was specified, so let's use the number of
			
 
				                  * detected mpi cores. ! */
			
 
				-                nmpicores = topology->nhwmpicores[mpi_idx];
			
 
				+                nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				         }
			
 
				         else
			
 
				         {
			
 
				-                if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
			
 
				+                if ((unsigned) nmpicores > topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx])
			
 
				                 {
			
 
				                         /* The user requires more MPI cores than there is available */
			
 
				                         _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
			
 
				-				    nmpicores, topology->nhwmpicores[mpi_idx]);
			
 
				-                        nmpicores = topology->nhwmpicores[mpi_idx];
			
 
				+				    nmpicores, topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx]);
			
 
				+                        nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				                 }
			
 
				         }
			
 
				 
			
 
				-        topology->nmpicores[mpi_idx] = nmpicores;
			
 
				-        STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				-                        "topology->nmpicores[mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				-                        mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				+        topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] = nmpicores;
			
 
				+        STARPU_ASSERT_MSG(topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
			
 
				+                        "topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
			
 
				+                        mpi_idx, topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
			
 
				 
			
 
				         mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
			
 
				-        mpi_worker_set[mpi_idx].nworkers = topology->nmpicores[mpi_idx];
			
 
				+        mpi_worker_set[mpi_idx].nworkers = topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				         unsigned mpicore_id;
			
 
				-        for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
			
 
				+        for (mpicore_id = 0; mpicore_id < topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx]; mpicore_id++)
			
 
				         {
			
 
				                 int worker_idx = topology->nworkers + mpicore_id;
			
 
				                 config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
			
@@ -1418,7 +1419,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 
				         }
			
 
				 	_starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
			
 
				 
			
 
				-        topology->nworkers += topology->nmpicores[mpi_idx];
			
 
				+        topology->nworkers += topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1461,13 +1462,13 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		topology->nmicdevices = 0;
			
 
				+		topology->ndevices[STARPU_MIC_WORKER] = 0;
			
 
				 		unsigned i;
			
 
				 		for (i = 0; i < (unsigned) reqmicdevices; i++)
			
 
				 			if (0 == _starpu_init_mic_node(config, i, &mic_handles[i], &_starpu_mic_process[i]))
			
 
				-				topology->nmicdevices++;
			
 
				+				topology->ndevices[STARPU_MIC_WORKER]++;
			
 
				 
			
 
				-		for (i = 0; i < topology->nmicdevices; i++)
			
 
				+		for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
			
 
				 			_starpu_init_mic_config(config, user_conf, i);
			
 
				 	}
			
 
				 #endif
			
@@ -1498,10 +1499,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		topology->nmpidevices = reqmpidevices;
			
 
				+		topology->ndevices[STARPU_MPI_MS_WORKER] = reqmpidevices;
			
 
				 
			
 
				 		/* if user don't want to use MPI slaves, we close the slave processes */
			
 
				-		if (no_mp_config && topology->nmpidevices == 0)
			
 
				+		if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0)
			
 
				 		{
			
 
				 			_starpu_mpi_common_mp_deinit();
			
 
				 			exit(0);
			
@@ -1510,10 +1511,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 
				 		if (!no_mp_config)
			
 
				 		{
			
 
				 			unsigned i;
			
 
				-			for (i = 0; i < topology->nmpidevices; i++)
			
 
				+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 				_starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
			
 
				 
			
 
				-			for (i = 0; i < topology->nmpidevices; i++)
			
 
				+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 				_starpu_init_mpi_config(config, user_conf, i);
			
 
				 		}
			
 
				 	}
			
@@ -1549,12 +1550,12 @@ static void _starpu_deinit_mp_config(struct _starpu_machine_config *config)
 
				 	unsigned i;
			
 
				 
			
 
				 #ifdef STARPU_USE_MIC
			
 
				-	for (i = 0; i < topology->nmicdevices; i++)
			
 
				+	for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
			
 
				 		_starpu_deinit_mic_node(i);
			
 
				 	_starpu_mic_clear_kernels();
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-	for (i = 0; i < topology->nmpidevices; i++)
			
 
				+	for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				 		_starpu_deinit_mpi_node(i);
			
 
				 #endif
			
 
				 }
			
@@ -1664,9 +1665,10 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 	}
			
 
				 
			
 
				 	/* Now we know how many CUDA devices will be used */
			
 
				-	topology->ncudagpus = ncuda;
			
 
				-	topology->nworkerpercuda = nworker_per_cuda;
			
 
				-	STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
			
 
				+	topology->ndevices[STARPU_CUDA_WORKER] = ncuda;
			
 
				+	for (i = 0; i < ncuda; i++)
			
 
				+		topology->nworker[STARPU_CUDA_WORKER][i] = nworker_per_cuda;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_CUDA_WORKER] <= STARPU_MAXCUDADEVS);
			
 
				 
			
 
				 	_starpu_initialize_workers_cuda_gpuid(config);
			
 
				 
			
@@ -1693,11 +1695,11 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 	if (!topology->cuda_th_per_dev)
			
 
				 	{
			
 
				 		cuda_worker_set[0].workers = &config->workers[topology->nworkers];
			
 
				-		cuda_worker_set[0].nworkers = topology->ncudagpus * nworker_per_cuda;
			
 
				+		cuda_worker_set[0].nworkers = topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
			
 
				 	}
			
 
				 
			
 
				 	unsigned cudagpu;
			
 
				-	for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
			
 
				+	for (cudagpu = 0; cudagpu < topology->ndevices[STARPU_CUDA_WORKER]; cudagpu++)
			
 
				 	{
			
 
				 		int devid = _starpu_get_next_cuda_gpuid(config);
			
 
				 		int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
			
@@ -1769,7 +1771,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 #endif
			
 
				         }
			
 
				 
			
 
				-	topology->nworkers += topology->ncudagpus * nworker_per_cuda;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
			
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
@@ -1813,20 +1815,22 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nopenclgpus = nopencl;
			
 
				-	STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
			
 
				+	for (i = 0; i < nopencl; i++)
			
 
				+		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	_starpu_initialize_workers_opencl_gpuid(config);
			
 
				 
			
 
				 	unsigned openclgpu;
			
 
				-	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
			
 
				+	for (openclgpu = 0; openclgpu < topology->ndevices[STARPU_OPENCL_WORKER]; openclgpu++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + openclgpu;
			
 
				 		int devid = _starpu_get_next_opencl_gpuid(config);
			
 
				 		if (devid == -1)
			
 
				 		{
			
 
				 			// There is no more devices left
			
 
				-			topology->nopenclgpus = openclgpu;
			
 
				+			topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu;
			
 
				 			break;
			
 
				 		}
			
 
				 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
			
@@ -1841,7 +1845,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->nopenclgpus;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 #endif
			
 
				 
			
 
				 
			
@@ -1884,19 +1888,21 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	topology->nfpgafpgas = nfpga;
			
 
				-	STARPU_ASSERT(topology->nfpgafpgas + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_FPGA_WORKER] = nfpga;
			
 
				+	for (i = 0; i < nfpga; i++)
			
 
				+		topology->nworker[STARPU_FPGA_WORKER][i] = 1;
			
 
				+	STARPU_ASSERT(topology->ndevices[STARPU_FPGA_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	_starpu_initialize_workers_fpga_deviceid(config);
			
 
				 
			
 
				 	unsigned fpgafpga;
			
 
				-	for (fpgafpga = 0; fpgafpga < topology->nfpgafpgas; fpgafpga++)
			
 
				+	for (fpgafpga = 0; fpgafpga < topology->ndevices[STARPU_FPGA_WORKER]; fpgafpga++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + fpgafpga;
			
 
				 		int devid = _starpu_get_next_fpga_deviceid(config);
			
 
				 		if (devid == -1)
			
 
				 		{ // There is no more devices left
			
 
				-			topology->nfpgafpgas = fpgafpga;
			
 
				+			topology->ndevices[STARPU_FPGA_WORKER] = fpgafpga;
			
 
				 			break;
			
 
				 		}
			
 
				 		config->workers[worker_idx].arch = STARPU_FPGA_WORKER;
			
@@ -1911,7 +1917,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_FPGA;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->nfpgafpgas;
			
 
				+	topology->nworkers += topology->ndevices[STARPU_FPGA_WORKER];
			
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
@@ -1931,13 +1937,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 			unsigned mic_busy_cpus = 0;
			
 
				 			int j = 0;
			
 
				 			for (j = 0; j < STARPU_MAXMICDEVS; j++)
			
 
				-				mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
			
 
				+				mic_busy_cpus += (topology->nworker[STARPU_MIC_WORKER][j] ? 1 : 0);
			
 
				 
			
 
				 			unsigned mpi_ms_busy_cpus = 0;
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
			
 
				 			for (j = 0; j < STARPU_MAXMPIDEVS; j++)
			
 
				-				mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
			
 
				+				mpi_ms_busy_cpus += (topology->nworker[STARPU_MPI_MS_WORKER][j] ? 1 : 0);
			
 
				 #else
			
 
				 			mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
			
 
				 #endif
			
@@ -1945,15 +1951,15 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 			unsigned cuda_busy_cpus = 0;
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 			cuda_busy_cpus =
			
 
				-				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ncudagpus ? 1 : 0) :
			
 
				-				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ncudagpus) : topology->ncudagpus;
			
 
				+				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ndevices[STARPU_CUDA_WORKER] ? 1 : 0) :
			
 
				+				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ndevices[STARPU_CUDA_WORKER]) : topology->ndevices[STARPU_CUDA_WORKER];
			
 
				 #endif
			
 
				 			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
			
 
				 				+ cuda_busy_cpus
			
 
				-				+ topology->nopenclgpus
			
 
				-				+ topology->nfpgafpgas;
			
 
				+				+ topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				+				+ topology->ndevices[STARPU_FPGA_WORKER];
			
 
				 
			
 
				-			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
			
 
				+			long avail_cpus = (long) topology->nhwworker[STARPU_CPU_WORKER][0] - (long) already_busy_cpus;
			
 
				 			if (avail_cpus < 0)
			
 
				 				avail_cpus = 0;
			
 
				 			int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
			
@@ -1983,12 +1989,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 
			
 
				 	}
			
 
				 
			
 
				-	topology->ncpus = ncpu;
			
 
				-	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+	topology->ndevices[STARPU_CPU_WORKER] = 1;
			
 
				+	topology->nworker[STARPU_CPU_WORKER][0] = ncpu;
			
 
				+	STARPU_ASSERT(topology->nworker[STARPU_CPU_WORKER][0] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				 
			
 
				 	unsigned cpu;
			
 
				 	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
			
 
				-	for (cpu = 0; cpu < topology->ncpus; cpu++)
			
 
				+	for (cpu = 0; cpu < topology->nworker[STARPU_CPU_WORKER][0]; cpu++)
			
 
				 	{
			
 
				 		int worker_idx = topology->nworkers + cpu;
			
 
				 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
			
@@ -2003,7 +2010,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 		config->worker_mask |= STARPU_CPU;
			
 
				 	}
			
 
				 
			
 
				-	topology->nworkers += topology->ncpus;
			
 
				+	topology->nworkers += topology->nworker[STARPU_CPU_WORKER][0];
			
 
				 #endif
			
 
				 
			
 
				 	if (topology->nworkers == 0)
			
@@ -2136,7 +2143,7 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
				 
			
 
				 			if (workerid >= 0)
			
 
				 				/* This shouldn't happen for workers */
			
 
				-				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwcpus, config->topology.nhwpus);
			
 
				+				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus);
			
 
				 			ret = -1;
			
 
				 		}
			
 
				 		else
			
@@ -2426,7 +2433,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
			
 
				-		for (i = 0; i < config->topology.ncudagpus; i++)
			
 
				+		for (i = 0; i < config->topology.ndevices[STARPU_CUDA_WORKER]; i++)
			
 
				 		{
			
 
				 			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
			
 
				 			if (obj)
			
@@ -2461,7 +2468,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 		}
			
 
				 #endif
			
 
				 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
			
 
				-		if (config->topology.nopenclgpus > 0)
			
 
				+		if (config->topology.ndevices[STARPU_OPENCL_WORKER] > 0)
			
 
				 		{
			
 
				 			cl_int err;
			
 
				 			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
@@ -2657,17 +2664,18 @@ static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_confi
 
				 	int cuda_globalbindid = -1;
			
 
				 #endif
			
 
				 
			
 
				+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
			
 
				+	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
			
 
				+	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
			
 
				+#endif
			
 
				+
			
 
				 #if defined(STARPU_USE_FPGA)
			
 
				 	unsigned fpga_init[STARPU_MAXFPGADEVS] = { };
			
 
				 	unsigned fpga_memory_nodes[STARPU_MAXFPGADEVS];
			
 
				 	unsigned fpga_bindid[STARPU_MAXFPGADEVS];
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				-	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
			
 
				-	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
			
 
				-	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
			
 
				-#endif
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 	unsigned mic_init[STARPU_MAXMICDEVS] = { };
			
 
				 	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
			
@@ -3128,6 +3136,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 
				 {
			
 
				 	int ret;
			
 
				 	unsigned i;
			
 
				+	enum starpu_worker_archtype type;
			
 
				 
			
 
				 	ret = _starpu_init_machine_config(config, no_mp_config);
			
 
				 	if (ret)
			
@@ -3141,57 +3150,16 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 
				 
			
 
				 	_starpu_mem_chunk_init_last();
			
 
				 
			
 
				-	config->cpus_nodeid = -1;
			
 
				-	config->cuda_nodeid = -1;
			
 
				-	config->opencl_nodeid = -1;
			
 
				-	config->mic_nodeid = -1;
			
 
				-	config->fpga_nodeid = -1;
			
 
				-	config->mpi_nodeid = -1;
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		config->arch_nodeid[type] = -1;
			
 
				+
			
 
				 	for (i = 0; i < starpu_worker_get_count(); i++)
			
 
				 	{
			
 
				-		switch (starpu_worker_get_type(i))
			
 
				-		{
			
 
				-			case STARPU_CPU_WORKER:
			
 
				-				if (config->cpus_nodeid == -1)
			
 
				-					config->cpus_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->cpus_nodeid = -2;
			
 
				-				break;
			
 
				-
			
 
				-			case STARPU_CUDA_WORKER:
			
 
				-				if (config->cuda_nodeid == -1)
			
 
				-					config->cuda_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->cuda_nodeid = -2;
			
 
				-				break;
			
 
				-
			
 
				-                        case STARPU_FPGA_WORKER:
			
 
				-				if (config->fpga_nodeid == -1)
			
 
				-					config->fpga_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->fpga_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->fpga_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_OPENCL_WORKER:
			
 
				-				if (config->opencl_nodeid == -1)
			
 
				-					config->opencl_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->opencl_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_MIC_WORKER:
			
 
				-				if (config->mic_nodeid == -1)
			
 
				-					config->mic_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->mic_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_MPI_MS_WORKER:
			
 
				-				if (config->mpi_nodeid == -1)
			
 
				-					config->mpi_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->mpi_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->mpi_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_ANY_WORKER:
			
 
				-				STARPU_ASSERT(0);
			
 
				-		}
			
 
				+		type = starpu_worker_get_type(i);
			
 
				+		if (config->arch_nodeid[type] == -1)
			
 
				+			config->arch_nodeid[type] = starpu_worker_get_memory_node(i);
			
 
				+		else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i))
			
 
				+			config->arch_nodeid[type] = -2;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -3219,7 +3187,7 @@ void starpu_topology_print(FILE *output)
 
				 	unsigned worker;
			
 
				 	unsigned nworkers = starpu_worker_get_count();
			
 
				 	unsigned ncombinedworkers = topology->ncombinedworkers;
			
 
				-	unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
			
 
				+	unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	hwloc_topology_t topo = topology->hwtopology;
			
@@ -3308,5 +3276,5 @@ unsigned _starpu_get_nhyperthreads()
 
				 {
			
 
				 	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 
			
 
				-	return config->topology.nhwpus / config->topology.nhwcpus;
			
 
				+	return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -45,6 +45,10 @@
 
				 #include <drivers/cpu/driver_cpu.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/max/driver_fpga.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/mpi/driver_mpi_source.h>
			
 
				+#include <drivers/disk/driver_disk.h>
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <core/simgrid.h>
			
@@ -203,6 +207,20 @@ void _starpu__workers_c__register_kobs(void)
 
				 	/* TODO */
			
 
				 }
			
 
				 
			
 
				+struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
			
 
				+
			
 
				+void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info)
			
 
				+{
			
 
				+	starpu_driver_info[archtype] = *info;
			
 
				+}
			
 
				+
			
 
				+struct starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1];
			
 
				+
			
 
				+void starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct starpu_memory_driver_info *info)
			
 
				+{
			
 
				+	starpu_memory_driver_info[kind] = *info;
			
 
				+}
			
 
				+
			
 
				 /* Initialize value of static argc and argv, called when the process begins
			
 
				  */
			
 
				 void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
			
@@ -402,14 +420,14 @@ static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype
 
				 		starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
			
 
				 		return func != NULL;
			
 
				 	}
			
 
				-        case STARPU_FPGA_WORKER:
			
 
				+	case STARPU_OPENCL_WORKER:
			
 
				 	{
			
 
				-		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
			
 
				+		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
			
 
				 		return func != NULL;
			
 
				 	}
			
 
				-	case STARPU_OPENCL_WORKER:
			
 
				+        case STARPU_FPGA_WORKER:
			
 
				 	{
			
 
				-		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
			
 
				+		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
			
 
				 		return func != NULL;
			
 
				 	}
			
 
				 	case STARPU_MIC_WORKER:
			
@@ -632,10 +650,12 @@ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
 
				 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
			
 
				 				return 0;
			
 
				 			break;
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				 		case STARPU_OPENCL_WORKER:
			
 
				 			if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
			
 
				 				return 0;
			
 
				 			break;
			
 
				+#endif
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 		}
			
@@ -759,23 +779,23 @@ static void _starpu_worker_deinit(struct _starpu_worker *workerarg)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync)
			
 
				+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync)
			
 
				 {
			
 
				 	unsigned devid = worker->devid;
			
 
				 	unsigned memnode = worker->memory_node;
			
 
				-	_STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode, worker->bindid, sync);
			
 
				+	_STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync);
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync STARPU_ATTRIBUTE_UNUSED)
			
 
				+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				-	(void) fut_key;
			
 
				+	(void) archtype;
			
 
				 	int devid = worker->devid;
			
 
				 	(void) devid;
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	_STARPU_TRACE_REGISTER_THREAD(worker->bindid);
			
 
				-	_starpu_worker_start(worker, fut_key, sync);
			
 
				+	_starpu_worker_start(worker, archtype, sync);
			
 
				 #endif
			
 
				 	_starpu_set_local_worker_key(worker);
			
 
				 
			
@@ -886,30 +906,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 			}
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_USE_FPGA)
			
 
				-			case STARPU_FPGA_WORKER:
			
 
				-				driver.id.fpga_id = workerarg->devid;
			
 
				-				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
			
 
				-				{
			
 
				-					workerarg->run_by_starpu = 0;
			
 
				-					break;
			
 
				-				}
			
 
				-				STARPU_PTHREAD_CREATE_ON(
			
 
				-					workerarg->name,
			
 
				-					&workerarg->worker_thread,
			
 
				-					NULL,
			
 
				-					_starpu_fpga_worker,
			
 
				-					workerarg,
			
 
				-					_starpu_simgrid_get_host_by_worker(workerarg));
			
 
				-#ifdef STARPU_USE_FXT
			
 
				-				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				-				while (!workerarg->worker_is_running)
			
 
				-					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				-#endif
			
 
				-				break;
			
 
				-#endif
			
 
				-
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				 			case STARPU_OPENCL_WORKER:
			
 
				 			{
			
@@ -937,6 +933,29 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 			}
			
 
				 #endif
			
 
				 
			
 
				+#if defined(STARPU_USE_FPGA)
			
 
				+			case STARPU_FPGA_WORKER:
			
 
				+				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
			
 
				+				{
			
 
				+					workerarg->run_by_starpu = 0;
			
 
				+					break;
			
 
				+				}
			
 
				+				STARPU_PTHREAD_CREATE_ON(
			
 
				+					workerarg->name,
			
 
				+					&workerarg->worker_thread,
			
 
				+					NULL,
			
 
				+					_starpu_fpga_worker,
			
 
				+					workerarg,
			
 
				+					_starpu_simgrid_get_host_by_worker(workerarg));
			
 
				+#ifdef STARPU_USE_FXT
			
 
				+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				+				while (!workerarg->worker_is_running)
			
 
				+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+#endif
			
 
				+				break;
			
 
				+#endif
			
 
				+
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 			case STARPU_MIC_WORKER:
			
 
				 			{
			
@@ -1017,7 +1036,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 	}
			
 
				 
			
 
				 #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
			
 
				-        if (pconfig->topology.nmpidevices > 0)
			
 
				+        if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
			
 
				         {
			
 
				                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
			
 
				                 struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
			
@@ -1157,6 +1176,14 @@ int starpu_conf_init(struct starpu_conf *conf)
 
				 		conf->disable_asynchronous_opencl_copy = 0;
			
 
				 #endif
			
 
				 
			
 
				+#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
			
 
				+	conf->disable_asynchronous_fpga_copy = 1;
			
 
				+#else
			
 
				+	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
			
 
				+	if (conf->disable_asynchronous_fpga_copy == -1)
			
 
				+		conf->disable_asynchronous_fpga_copy = 0;
			
 
				+#endif
			
 
				+
			
 
				 #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
			
 
				 	conf->disable_asynchronous_mic_copy = 1;
			
 
				 #else
			
@@ -1173,14 +1200,6 @@ int starpu_conf_init(struct starpu_conf *conf)
 
				 		conf->disable_asynchronous_mpi_ms_copy = 0;
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
			
 
				-	conf->disable_asynchronous_fpga_copy = 1;
			
 
				-#else
			
 
				-	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
			
 
				-	if (conf->disable_asynchronous_fpga_copy == -1)
			
 
				-		conf->disable_asynchronous_fpga_copy = 0;
			
 
				-#endif
			
 
				-
			
 
				 	/* 64MiB by default */
			
 
				 	conf->trace_buffer_size = ((uint64_t) starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20;
			
 
				 
			
@@ -1231,8 +1250,8 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 
				 	if (main_thread_bind)
			
 
				 		conf->reserve_ncpus++;
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables);
			
 
				-        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables);
			
 
				+        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables);
			
 
				 #ifdef STARPU_SIMGRID
			
@@ -1249,9 +1268,9 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 
				 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy, conf->precedence_over_environment_variables);
			
 
				 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables);
			
 
				-	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
			
 
				 }
			
 
				 
			
 
				 struct starpu_tree* starpu_workers_get_tree(void)
			
@@ -1426,6 +1445,16 @@ int _starpu_get_catch_signals(void)
 
				 	return _starpu_config.conf.catch_signals;
			
 
				 }
			
 
				 
			
 
				+void starpu_drivers_preinit(void) {
			
 
				+	_starpu_cpu_preinit();
			
 
				+	_starpu_cuda_preinit();
			
 
				+	_starpu_opencl_preinit();
			
 
				+	_starpu_fpga_preinit();
			
 
				+	_starpu_mic_preinit();
			
 
				+	_starpu_mpi_ms_preinit();
			
 
				+	_starpu_disk_preinit();
			
 
				+}
			
 
				+
			
 
				 int starpu_init(struct starpu_conf *user_conf)
			
 
				 {
			
 
				 	return starpu_initialize(user_conf, NULL, NULL);
			
@@ -1610,6 +1639,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
				 
			
 
				 	_starpu_load_bus_performance_files();
			
 
				 
			
 
				+	/* Let drivers register themselves */
			
 
				+	starpu_drivers_preinit();
			
 
				+
			
 
				 	/* Note: nothing before here should be allocating anything, in case we
			
 
				 	 * actually return ENODEV here */
			
 
				 
			
@@ -2118,32 +2150,22 @@ unsigned starpu_worker_is_slave_somewhere(int workerid)
 
				 
			
 
				 int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
			
 
				 {
			
 
				-	switch (type)
			
 
				-	{
			
 
				-		case STARPU_CPU_WORKER:
			
 
				-			return _starpu_config.topology.ncpus;
			
 
				-
			
 
				-		case STARPU_CUDA_WORKER:
			
 
				-			return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
			
 
				+	unsigned n = 0;
			
 
				 
			
 
				-		case STARPU_OPENCL_WORKER:
			
 
				-			return _starpu_config.topology.nopenclgpus;
			
 
				-
			
 
				-		case STARPU_MIC_WORKER:
			
 
				-			return _starpu_config.topology.nmicdevices;
			
 
				-
			
 
				-                case STARPU_MPI_MS_WORKER:
			
 
				-                        return _starpu_config.topology.nmpidevices;
			
 
				-
			
 
				-                case STARPU_ANY_WORKER:
			
 
				-                        return _starpu_config.topology.ncpus+
			
 
				-				_starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda+
			
 
				-                                _starpu_config.topology.nopenclgpus+
			
 
				-                                _starpu_config.topology.nmicdevices+
			
 
				-                                _starpu_config.topology.nmpidevices;
			
 
				-		default:
			
 
				+	if (type != STARPU_ANY_WORKER)
			
 
				+	{
			
 
				+		if (type >= STARPU_NARCH)
			
 
				 			return -EINVAL;
			
 
				+
			
 
				+		unsigned i;
			
 
				+		for (i = 0; i < _starpu_config.topology.ndevices[type]; i++)
			
 
				+			n += _starpu_config.topology.nworker[type][i];
			
 
				+		return n;
			
 
				 	}
			
 
				+
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		n += starpu_worker_get_count_by_type(type);
			
 
				+	return n;
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_combined_worker_get_count(void)
			
@@ -2153,17 +2175,17 @@ unsigned starpu_combined_worker_get_count(void)
 
				 
			
 
				 unsigned starpu_cpu_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.ncpus;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_cuda_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_opencl_worker_get_count(void)
			
 
				 {
			
 
				-	return _starpu_config.topology.nopenclgpus;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER);
			
 
				 }
			
 
				 
			
 
				 int starpu_asynchronous_copy_disabled(void)
			
@@ -2176,14 +2198,14 @@ int starpu_asynchronous_cuda_copy_disabled(void)
 
				 	return _starpu_config.conf.disable_asynchronous_cuda_copy;
			
 
				 }
			
 
				 
			
 
				-int starpu_asynchronous_fpga_copy_disabled(void)
			
 
				+int starpu_asynchronous_opencl_copy_disabled(void)
			
 
				 {
			
 
				-	return _starpu_config.conf.disable_asynchronous_fpga_copy;
			
 
				+	return _starpu_config.conf.disable_asynchronous_opencl_copy;
			
 
				 }
			
 
				 
			
 
				-int starpu_asynchronous_opencl_copy_disabled(void)
			
 
				+int starpu_asynchronous_fpga_copy_disabled(void)
			
 
				 {
			
 
				-	return _starpu_config.conf.disable_asynchronous_opencl_copy;
			
 
				+	return _starpu_config.conf.disable_asynchronous_fpga_copy;
			
 
				 }
			
 
				 
			
 
				 int starpu_asynchronous_mic_copy_disabled(void)
			
@@ -2198,17 +2220,12 @@ int starpu_asynchronous_mpi_ms_copy_disabled(void)
 
				 
			
 
				 unsigned starpu_mic_worker_get_count(void)
			
 
				 {
			
 
				-	int i = 0, count = 0;
			
 
				-
			
 
				-	for (i = 0; i < STARPU_MAXMICDEVS; i++)
			
 
				-		count += _starpu_config.topology.nmiccores[i];
			
 
				-
			
 
				-	return count;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_MIC_WORKER);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_mpi_ms_worker_get_count(void)
			
 
				 {
			
 
				-        return _starpu_config.topology.nmpidevices;
			
 
				+	return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER);
			
 
				 }
			
 
				 
			
 
				 /* When analyzing performance, it is useful to see what is the processing unit
			
@@ -2615,30 +2632,18 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 
				 
			
 
				 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
			
 
				 {
			
 
				-	switch (type) {
			
 
				-		case STARPU_CPU_WORKER: return "STARPU_CPU_WORKER";
			
 
				-		case STARPU_CUDA_WORKER: return "STARPU_CUDA_WORKER";
			
 
				-		case STARPU_OPENCL_WORKER: return "STARPU_OPENCL_WORKER";
			
 
				-		case STARPU_FPGA_WORKER: return "STARPU_FPGA_WORKER";
			
 
				-		case STARPU_MIC_WORKER: return "STARPU_MIC_WORKER";
			
 
				-		case STARPU_MPI_MS_WORKER: return "STARPU_MPI_MS_WORKER";
			
 
				-		case STARPU_ANY_WORKER: return "STARPU_ANY_WORKER";
			
 
				-		default: return "STARPU_unknown_WORKER";
			
 
				-	}
			
 
				+	const char *ret = starpu_driver_info[type].name_upper;
			
 
				+	if (!ret)
			
 
				+		ret = "unknown";
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-const char *starpu_worker_get_type_as_short_string(enum starpu_worker_archtype type)
			
 
				+const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
			
 
				 {
			
 
				-	switch (type) {
			
 
				-		case STARPU_CPU_WORKER: return "CPU";
			
 
				-		case STARPU_CUDA_WORKER: return "CUDA";
			
 
				-		case STARPU_OPENCL_WORKER: return "OPENCL";
			
 
				-		case STARPU_FPGA_WORKER: return "FPGA";
			
 
				-		case STARPU_MIC_WORKER: return "MIC";
			
 
				-		case STARPU_MPI_MS_WORKER: return "MPI_MS";
			
 
				-		case STARPU_ANY_WORKER: return "ANY";
			
 
				-		default: return "STARPU_unknown_WORKER";
			
 
				-	}
			
 
				+	const char *ret = starpu_driver_info[type].name_var;
			
 
				+	if (!ret)
			
 
				+		ret = "UNKNOWN";
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx)
			
@@ -2846,22 +2851,9 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type)
			
 
				+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
			
 
				 {
			
 
				-	switch(type)
			
 
				-	{
			
 
				-		case STARPU_CPU_WORKER:
			
 
				-			return STARPU_CPU_RAM;
			
 
				-		case STARPU_CUDA_WORKER:
			
 
				-			return STARPU_CUDA_RAM;
			
 
				-		case STARPU_OPENCL_WORKER:
			
 
				-			return STARPU_OPENCL_RAM;
			
 
				-			break;
			
 
				-		case STARPU_MIC_WORKER:
			
 
				-			return STARPU_MIC_RAM;
			
 
				-		case STARPU_MPI_MS_WORKER:
			
 
				-			return STARPU_MPI_MS_RAM;
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				+	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
			
 
				+	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
			
 
				+	return kind;
			
 
				 }
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -59,8 +59,6 @@
 
				 
			
 
				 #include <datawizard/datawizard.h>
			
 
				 
			
 
				-#include <starpu_parameters.h>
			
 
				-
			
 
				 #define STARPU_MAX_PIPELINE 4
			
 
				 
			
 
				 enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
			
@@ -271,66 +269,33 @@ struct _starpu_machine_topology
 
				 	/** custom hwloc tree*/
			
 
				 	struct starpu_tree *tree;
			
 
				 
			
 
				-	/** Total number of CPU cores, as detected by the topology code. May
			
 
				-	 * be different from the actual number of CPU workers.
			
 
				-	 */
			
 
				-	unsigned nhwcpus;
			
 
				-
			
 
				 	/** Total number of PUs (i.e. threads), as detected by the topology code. May
			
 
				-	 * be different from the actual number of PU workers.
			
 
				+	 * be different from the actual number of CPU workers.
			
 
				 	 */
			
 
				 	unsigned nhwpus;
			
 
				 
			
 
				-	/** Total number of CUDA devices, as detected. May be different
			
 
				-	 * from the actual number of CUDA workers.
			
 
				+	/** Total number of devices, as detected. May be different from the
			
 
				+	 * actual number of devices run by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwcudagpus;
			
 
				-
			
 
				-	/** Total number of OpenCL devices, as detected. May be
			
 
				-	 * different from the actual number of OpenCL workers.
			
 
				+	unsigned nhwdevices[STARPU_NARCH];
			
 
				+	/** Total number of worker for each device, as detected. May be different from the
			
 
				+	 * actual number of workers run by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwopenclgpus;
			
 
				+	unsigned nhwworker[STARPU_NARCH][STARPU_NMAXDEVS];
			
 
				 
			
 
				-        /* Total number of FPGA devices, as detected. May be different
			
 
				-	 * from the actual number of FPGA workers.
			
 
				+	/** Actual number of devices used by StarPU.
			
 
				 	 */
			
 
				-	unsigned nhwfpgafpgas;
			
 
				+	unsigned ndevices[STARPU_NARCH];
			
 
				 
			
 
				-	/** Total number of MPI nodes, as detected. May be different
			
 
				-	 * from the actual number of node workers.
			
 
				+	/** Number of worker per device
			
 
				 	 */
			
 
				-	unsigned nhwmpi;
			
 
				+	unsigned nworker[STARPU_NARCH][STARPU_NMAXDEVS];
			
 
				 
			
 
				-	/** Actual number of CPU workers used by StarPU. */
			
 
				-	unsigned ncpus;
			
 
				-
			
 
				-	/** Actual number of CUDA GPUs used by StarPU. */
			
 
				-	unsigned ncudagpus;
			
 
				-	unsigned nworkerpercuda;
			
 
				+	/** Whether we should have one thread per stream */
			
 
				 	int cuda_th_per_stream;
			
 
				+	/** Whether we should have one thread per device */
			
 
				 	int cuda_th_per_dev;
			
 
				 
			
 
				-	/** Actual number of OpenCL workers used by StarPU. */
			
 
				-	unsigned nopenclgpus;
			
 
				-
			
 
				-	/** Actual number of MPI workers used by StarPU. */
			
 
				-	unsigned nmpidevices;
			
 
				-        unsigned nhwmpidevices;
			
 
				-
			
 
				-	unsigned nhwmpicores[STARPU_MAXMPIDEVS]; /**< Each MPI node has its set of cores. */
			
 
				-	unsigned nmpicores[STARPU_MAXMPIDEVS];
			
 
				-
			
 
				-        /* Actual number of Fpga workers used by StarPU. */
			
 
				-	unsigned nfpgafpgas;
			
 
				-
			
 
				-	/** Topology of MP nodes (MIC) as well as necessary
			
 
				-	 * objects to communicate with them. */
			
 
				-	unsigned nhwmicdevices;
			
 
				-	unsigned nmicdevices;
			
 
				-
			
 
				-	unsigned nhwmiccores[STARPU_MAXMICDEVS]; /**< Each MIC node has its set of cores. */
			
 
				-	unsigned nmiccores[STARPU_MAXMICDEVS];
			
 
				-
			
 
				 	/** Indicates the successive logical PU identifier that should be used
			
 
				 	 * to bind the workers. It is either filled according to the
			
 
				 	 * user's explicit parameters (from starpu_conf) or according
			
@@ -356,7 +321,13 @@ struct _starpu_machine_topology
 
				 	 */
			
 
				 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-        unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
			
 
				+	/** Indicates the successive FPGA identifier that should be
			
 
				+	 * used by the FPGA driver.  It is either filled according
			
 
				+	 * to the user's explicit parameters (from starpu_conf) or
			
 
				+	 * according to the STARPU_WORKERS_FPGAID env. variable.
			
 
				+	 * Otherwise, they are taken in ID order.
			
 
				+	 */
			
 
				+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 	/*** Indicates the successive MIC devices that should be used
			
 
				 	 * by the MIC driver.  It is either filled according to the
			
@@ -390,27 +361,17 @@ struct _starpu_machine_config
 
				 	/** Which GPU(s) do we use for OpenCL ? */
			
 
				 	int current_opencl_gpuid;
			
 
				 
			
 
				+        /* Which FPGA(s) do we use for FPGA? */
			
 
				+	int current_fpga_deviceid;
			
 
				+
			
 
				 	/** Which MIC do we use? */
			
 
				 	int current_mic_deviceid;
			
 
				 
			
 
				 	/** Which MPI do we use? */
			
 
				 	int current_mpi_deviceid;
			
 
				 
			
 
				-        /* Which FPGA(s) do we use for FPGA? */
			
 
				-	int current_fpga_deviceid;
			
 
				-
			
 
				-	/** Memory node for cpus, if only one */
			
 
				-	int cpus_nodeid;
			
 
				-	/** Memory node for CUDA, if only one */
			
 
				-	int cuda_nodeid;
			
 
				-	/** Memory node for OpenCL, if only one */
			
 
				-	int opencl_nodeid;
			
 
				-	/** Memory node for MIC, if only one */
			
 
				-	int mic_nodeid;
			
 
				-	/** Memory node for MPI, if only one */
			
 
				-	int mpi_nodeid;
			
 
				-        /* Memory node for FPGA, if only one */
			
 
				-	int fpga_nodeid;
			
 
				+	/** Memory node for different worker types, if only one */
			
 
				+	int arch_nodeid [STARPU_NARCH];
			
 
				 
			
 
				 	/** Separate out previous variables from per-worker data. */
			
 
				 	char padding1[STARPU_CACHELINE_SIZE];
			
@@ -465,6 +426,31 @@ struct _starpu_machine_config
 
				 	int perf_counter_pause_depth;
			
 
				 };
			
 
				 
			
 
				+/** Provides information for a device driver */
			
 
				+struct starpu_driver_info {
			
 
				+	const char *name_upper;	/**< Name of worker type in upper case */
			
 
				+	const char *name_var;	/**< Name of worker type for environment variables */
			
 
				+	const char *name_lower;	/**< Name of worker type in lower case */
			
 
				+	enum starpu_node_kind memory_kind;	/**< Kind of memory in device */
			
 
				+	double alpha;	/**< Typical relative speed compared to a CPU core */
			
 
				+};
			
 
				+
			
 
				+/** Device driver information, indexed by enum starpu_worker_archtype */
			
 
				+extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
			
 
				+
			
 
				+void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
			
 
				+
			
 
				+/** Provides information for a memory node driver */
			
 
				+struct starpu_memory_driver_info {
			
 
				+	const char *name_upper;	/**< Name of memory in upper case */
			
 
				+	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
			
 
				+};
			
 
				+
			
 
				+/** Memory driver information, indexed by enum starpu_node_kind */
			
 
				+extern struct starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1];
			
 
				+
			
 
				+void starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct starpu_memory_driver_info *info);
			
 
				+
			
 
				 extern int _starpu_worker_parallel_blocks;
			
 
				 
			
 
				 extern struct _starpu_machine_config _starpu_config STARPU_ATTRIBUTE_INTERNAL;
			
@@ -522,9 +508,9 @@ unsigned _starpu_worker_can_block(unsigned memnode, struct _starpu_worker *worke
 
				 void _starpu_block_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
			
 
				 
			
 
				 /** This function initializes the current driver for the given worker */
			
 
				-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
			
 
				+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
			
 
				 /** This function initializes the current thread for the given worker */
			
 
				-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
			
 
				+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
			
 
				 
			
 
				 static inline unsigned _starpu_worker_get_count(void)
			
 
				 {
			
@@ -673,8 +659,6 @@ static inline unsigned __starpu_worker_get_id_check(const char *f, int l)
 
				 }
			
 
				 #define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l)
			
 
				 
			
 
				-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type);
			
 
				-
			
 
				 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx);
			
 
				 
			
 
				 struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid);
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -109,7 +109,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
				 	int i_ram = -1;
			
 
				 	int i_gpu = -1;
			
 
				 	int i_disk = -1;
			
 
				-	int i_fpga = -1;
			
 
				 
			
 
				 	/* Revert to dumb strategy: take RAM unless only a GPU has it */
			
 
				 	for (i = 0; i < nnodes; i++)
			
@@ -140,30 +139,24 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
				 			 * 	Unless peer transfer is supported (and it would then have been selected above).
			
 
				 			 * 	Other should be ok */
			
 
				 
			
 
				-			if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
			
 
				-			    starpu_node_get_kind(i) == STARPU_OPENCL_RAM ||
			
 
				-			    starpu_node_get_kind(i) == STARPU_MIC_RAM)
			
 
				-				i_gpu = i;
			
 
				-
			
 
				 			if (starpu_node_get_kind(i) == STARPU_CPU_RAM ||
			
 
				 			    starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
			
 
				 				i_ram = i;
			
 
				-			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
			
 
				+			else if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
			
 
				 				i_disk = i;
			
 
				-			if (starpu_node_get_kind(i) == STARPU_FPGA_RAM)
			
 
				-				i_fpga = i;
			
 
				+			else
			
 
				+				i_gpu = i;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	/* we have to use cpu_ram in first */
			
 
				 	if (i_ram != -1)
			
 
				 		src_node = i_ram;
			
 
				-	/* no luck we have to use the disk memory */
			
 
				 	else if (i_gpu != -1)
			
 
				+	/* otherwise a gpu */
			
 
				 		src_node = i_gpu;
			
 
				-	else if (i_fpga != -1)
			
 
				-		src_node = i_fpga;
			
 
				 	else
			
 
				+	/* no luck we have to use the disk memory */
			
 
				 		src_node = i_disk;
			
 
				 
			
 
				 	STARPU_ASSERT(src_node != -1);
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -181,14 +181,7 @@ int starpu_memory_node_get_devid(unsigned node)
 
				 }
			
 
				 
			
 
				 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
			
 
				-	switch (node_kind) {
			
 
				-		// case STARPU_UNUSED:
			
 
				-		case STARPU_CPU_RAM: return STARPU_CPU_WORKER;
			
 
				-		case STARPU_CUDA_RAM: return STARPU_CUDA_WORKER;
			
 
				-		case STARPU_OPENCL_RAM: return STARPU_OPENCL_WORKER;
			
 
				-		// case STARPU_DISK_RAM:
			
 
				-		case STARPU_MIC_RAM: return STARPU_MIC_WORKER;
			
 
				-		case STARPU_MPI_MS_RAM: return STARPU_MPI_MS_WORKER;
			
 
				-		default: STARPU_ASSERT_MSG(0, "ambiguous memory node kind %d", node_kind);
			
 
				-	}
			
 
				+	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
			
 
				+	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
			
 
				+	return archtype;
			
 
				 }
			
--- a/src/datawizard/node_ops.c
+++ b/src/datawizard/node_ops.c
@@ -28,25 +28,7 @@
 
				 
			
 
				 const char* _starpu_node_get_prefix(enum starpu_node_kind kind)
			
 
				 {
			
 
				-	switch (kind)
			
 
				-	{
			
 
				-		case STARPU_CPU_RAM:
			
 
				-			return "NUMA";
			
 
				-		case STARPU_CUDA_RAM:
			
 
				-			return "CUDA";
			
 
				-		case STARPU_OPENCL_RAM:
			
 
				-			return "OpenCL";
			
 
				-		case STARPU_DISK_RAM:
			
 
				-			return "Disk";
			
 
				-		case STARPU_MIC_RAM:
			
 
				-			return "MIC";
			
 
				-		case STARPU_FPGA_RAM:
			
 
				-			return "FPGA";
			
 
				-		case STARPU_MPI_MS_RAM:
			
 
				-			return "MPI_MS";
			
 
				-		case STARPU_UNUSED:
			
 
				-		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				-			return "unknown";
			
 
				-	}
			
 
				+	const char *ret = starpu_memory_driver_info[kind].name_upper;
			
 
				+	STARPU_ASSERT(ret);
			
 
				+	return ret;
			
 
				 }
			
--- a/src/datawizard/node_ops.h
+++ b/src/datawizard/node_ops.h
@@ -46,10 +46,10 @@ typedef int (*copy3d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_
 
				 
			
 
				 struct _starpu_node_ops
			
 
				 {
			
 
				-	copy_interface_func_t copy_interface_to[STARPU_MPI_MS_RAM+1];
			
 
				-	copy_data_t copy_data_to[STARPU_MPI_MS_RAM+1];
			
 
				-	copy2d_data_t copy2d_data_to[STARPU_MPI_MS_RAM+1];
			
 
				-	copy3d_data_t copy3d_data_to[STARPU_MPI_MS_RAM+1];
			
 
				+	copy_interface_func_t copy_interface_to[STARPU_MAX_RAM+1];
			
 
				+	copy_data_t copy_data_to[STARPU_MAX_RAM+1];
			
 
				+	copy2d_data_t copy2d_data_to[STARPU_MAX_RAM+1];
			
 
				+	copy3d_data_t copy3d_data_to[STARPU_MAX_RAM+1];
			
 
				 	void (*wait_request_completion)(struct _starpu_async_channel *async_channel);
			
 
				 	unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel);
			
 
				 	int (*is_direct_access_supported)(unsigned node, unsigned handling_node);
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -39,30 +39,18 @@
 
				 #include <starpu_hash.h>
			
 
				 
			
 
				 #define CPUS_WORKER_COLORS_NB	8
			
 
				-#define CUDA_WORKER_COLORS_NB	9
			
 
				-#define OPENCL_WORKER_COLORS_NB 9
			
 
				-#define MIC_WORKER_COLORS_NB	9
			
 
				-#define MPI_MS_WORKER_COLORS_NB	9
			
 
				-#define OTHER_WORKER_COLORS_NB	4
			
 
				+#define ACCEL_WORKER_COLORS_NB	9
			
 
				 
			
 
				 /* How many times longer an idle period has to be before the smoothing
			
 
				  * heuristics avoids averaging codelet gflops */
			
 
				 #define IDLE_FACTOR 2
			
 
				 
			
 
				 static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
			
 
				-static char *cuda_worker_colors[CUDA_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
			
 
				-static char *opencl_worker_colors[OPENCL_WORKER_COLORS_NB] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
			
 
				-static char *mic_worker_colors[MIC_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				-static char *mpi_ms_worker_colors[MPI_MS_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				-static char *other_worker_colors[OTHER_WORKER_COLORS_NB] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
			
 
				+static char *accel_worker_colors[ACCEL_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
			
 
				 static char *worker_colors[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-static unsigned opencl_index = 0;
			
 
				-static unsigned cuda_index = 0;
			
 
				 static unsigned cpus_index = 0;
			
 
				-static unsigned mic_index = 0;
			
 
				-static unsigned mpi_ms_index = 0;
			
 
				-static unsigned other_index = 0;
			
 
				+static unsigned accel_index = 0;
			
 
				 static uint64_t* number_events = NULL;
			
 
				 
			
 
				 static unsigned long fut_keymask;
			
@@ -395,14 +383,6 @@ out:
 
				 	free(data);
			
 
				 }
			
 
				 
			
 
				-static void set_next_other_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = other_worker_colors[other_index++];
			
 
				-	if (other_index == OTHER_WORKER_COLORS_NB) other_index = 0;
			
 
				-}
			
 
				-
			
 
				 static void set_next_cpu_worker_color(int workerid)
			
 
				 {
			
 
				 	if (workerid >= STARPU_NMAXWORKERS)
			
@@ -411,36 +391,12 @@ static void set_next_cpu_worker_color(int workerid)
 
				 	if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0;
			
 
				 }
			
 
				 
			
 
				-static void set_next_cuda_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = cuda_worker_colors[cuda_index++];
			
 
				-	if (cuda_index == CUDA_WORKER_COLORS_NB) cuda_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_opencl_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = opencl_worker_colors[opencl_index++];
			
 
				-	if (opencl_index == OPENCL_WORKER_COLORS_NB) opencl_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_mic_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = mic_worker_colors[mic_index++];
			
 
				-	if (mic_index == MIC_WORKER_COLORS_NB) mic_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_mpi_ms_worker_color(int workerid)
			
 
				+static void set_next_accel_worker_color(int workerid)
			
 
				 {
			
 
				 	if (workerid >= STARPU_NMAXWORKERS)
			
 
				 		return;
			
 
				-	worker_colors[workerid] = mpi_ms_worker_colors[mpi_ms_index++];
			
 
				-	if (mpi_ms_index == MPI_MS_WORKER_COLORS_NB) mpi_ms_index = 0;
			
 
				+	worker_colors[workerid] = accel_worker_colors[accel_index++];
			
 
				+	if (accel_index == ACCEL_WORKER_COLORS_NB) accel_index = 0;
			
 
				 }
			
 
				 
			
 
				 static const char *get_worker_color(int workerid)
			
@@ -1231,56 +1187,24 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
				 
			
 
				 	new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set);
			
 
				 
			
 
				-	char *kindstr = "";
			
 
				+	const char *kindstr;
			
 
				 	struct starpu_perfmodel_arch arch;
			
 
				 	arch.ndevices = 1;
			
 
				 	_STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device));
			
 
				 
			
 
				-	switch (ev->param[0])
			
 
				-	{
			
 
				-		case _STARPU_FUT_APPS_KEY:
			
 
				-			set_next_other_worker_color(workerid);
			
 
				-			kindstr = "APPS";
			
 
				-			break;
			
 
				-		case _STARPU_FUT_CPU_KEY:
			
 
				-			set_next_cpu_worker_color(workerid);
			
 
				-			kindstr = "CPU";
			
 
				-			arch.devices[0].type = STARPU_CPU_WORKER;
			
 
				-			arch.devices[0].devid = 0;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_CUDA_KEY:
			
 
				-			set_next_cuda_worker_color(workerid);
			
 
				-			kindstr = "CUDA";
			
 
				-			arch.devices[0].type = STARPU_CUDA_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_OPENCL_KEY:
			
 
				-			set_next_opencl_worker_color(workerid);
			
 
				-			kindstr = "OPENCL";
			
 
				-			arch.devices[0].type = STARPU_OPENCL_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_MIC_KEY:
			
 
				-			set_next_mic_worker_color(workerid);
			
 
				-			kindstr = "mic";
			
 
				-			arch.devices[0].type = STARPU_MIC_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_MPI_KEY:
			
 
				-			set_next_mpi_ms_worker_color(workerid);
			
 
				-			kindstr = "mpi_ms";
			
 
				-			arch.devices[0].type = STARPU_MPI_MS_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				+	enum starpu_worker_archtype archtype = _STARPU_FUT_KEY_WORKER(ev->param[0]);
			
 
				+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
			
 
				+
			
 
				+	kindstr = starpu_worker_get_type_as_string(archtype);
			
 
				+	arch.devices[0].type = archtype;
			
 
				+	arch.devices[0].devid = 0;
			
 
				+	arch.devices[0].ncores = 1;
			
 
				+
			
 
				+	if (archtype == STARPU_CPU_WORKER)
			
 
				+		set_next_cpu_worker_color(workerid);
			
 
				+	else
			
 
				+		set_next_accel_worker_color(workerid);
			
 
				 
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				 	double now = get_event_time_stamp(ev, options);
			
 
				 
			
 
				 	if (out_paje_file)
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -60,6 +60,25 @@
 
				 #include <windows.h>
			
 
				 #endif
			
 
				 
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "CPU",
			
 
				+	.name_var = "CPU",
			
 
				+	.name_lower = "cpu",
			
 
				+	.memory_kind = STARPU_CPU_RAM,
			
 
				+	.alpha = 0.5f,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "NUMA",
			
 
				+	.worker_archtype = STARPU_CPU_WORKER,
			
 
				+};
			
 
				+
			
 
				+void _starpu_cpu_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_CPU_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_CPU_RAM, &memory_driver_info);
			
 
				+}
			
 
				+
			
 
				 
			
 
				 #ifdef STARPU_USE_CPU
			
 
				 /* Actually launch the job on a cpu worker.
			
@@ -190,7 +209,7 @@ int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker)
 
				 {
			
 
				 	int devid = cpu_worker->devid;
			
 
				 
			
 
				-	_starpu_driver_start(cpu_worker, _STARPU_FUT_CPU_KEY, 1);
			
 
				+	_starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1);
			
 
				 	snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid);
			
 
				 	snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid);
			
 
				 	starpu_pthread_setname(cpu_worker->short_name);
			
@@ -361,7 +380,7 @@ int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker)
 
				 	 * job. */
			
 
				 
			
 
				 	/* can a cpu perform that task ? */
			
 
				-	if (!_STARPU_CPU_MAY_PERFORM(j))
			
 
				+	if (!_STARPU_MAY_PERFORM(j, CPU))
			
 
				 	{
			
 
				 		/* put it and the end of the queue ... XXX */
			
 
				 		_starpu_push_task_to_workers(task);
			
@@ -417,7 +436,7 @@ int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker)
 
				 	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				 
			
 
				 	cpu_worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -534,6 +553,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 
				 #else
			
 
				 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_FPGA
			
 
				+	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
			
 
				+#endif
			
 
				 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk,
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 	.copy_interface_to[STARPU_MIC_RAM] = _starpu_mic_copy_interface_from_cpu_to_mic,
			
@@ -545,9 +567,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 
				 #else
			
 
				 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 #endif
			
 
				-#ifdef STARPU_USE_FPGA
			
 
				-	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
			
 
				-#endif
			
 
				 
			
 
				 	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data,
			
@@ -561,6 +580,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 
				 #else
			
 
				 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_FPGA
			
 
				+	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
			
 
				+#endif
			
 
				 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk,
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 	.copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic,
			
@@ -572,9 +594,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 
				 #else
			
 
				 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 #endif
			
 
				-#ifdef STARPU_USE_FPGA
			
 
				-	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
			
 
				-#endif
			
 
				 
			
 
				 	.copy2d_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,
			
--- a/src/drivers/cpu/driver_cpu.h
+++ b/src/drivers/cpu/driver_cpu.h
@@ -22,6 +22,8 @@
 
				 #include <common/config.h>
			
 
				 #include <datawizard/node_ops.h>
			
 
				 
			
 
				+void _starpu_cpu_preinit(void);
			
 
				+
			
 
				 extern struct _starpu_driver_ops _starpu_driver_cpu_ops;
			
 
				 extern struct _starpu_node_ops _starpu_driver_cpu_node_ops;
			
 
				 
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -110,7 +110,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 
				 	/* Discover the number of CUDA devices. Fill the result in CONFIG. */
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	config->topology.nhwcudagpus = _starpu_simgrid_get_nbhosts("CUDA");
			
 
				+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = _starpu_simgrid_get_nbhosts("CUDA");
			
 
				 #else
			
 
				 	int cnt;
			
 
				 	cudaError_t cures;
			
@@ -118,7 +118,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 
				 	cures = cudaGetDeviceCount (&cnt);
			
 
				 	if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				 		cnt = 0;
			
 
				-	config->topology.nhwcudagpus = cnt;
			
 
				+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt;
			
 
				 #ifdef HAVE_LIBNVIDIA_ML
			
 
				 	nvmlInit();
			
 
				 #endif
			
@@ -684,12 +684,12 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 	int lastdevid = -1;
			
 
				 	unsigned i;
			
 
				 
			
 
				-	_starpu_driver_start(worker0, _STARPU_FUT_CUDA_KEY, 0);
			
 
				+	_starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0);
			
 
				 	_starpu_set_local_worker_set_key(worker_set);
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	for (i = 1; i < worker_set->nworkers; i++)
			
 
				-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_CUDA_KEY, 0);
			
 
				+		_starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				 	for (i = 0; i < worker_set->nworkers; i++)
			
@@ -710,7 +710,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 		init_device_context(devid, memnode);
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
 
				-		if (worker->config->topology.nworkerpercuda > 1 && props[devid].concurrentKernels == 0)
			
 
				+		if (worker->config->topology.nworker[STARPU_CUDA_WORKER][devid] > 1 && props[devid].concurrentKernels == 0)
			
 
				 			_STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid);
			
 
				 #endif /* !STARPU_SIMGRID */
			
 
				 	}
			
@@ -723,7 +723,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 		struct _starpu_worker *worker = &worker_set->workers[i];
			
 
				 		unsigned devid = worker->devid;
			
 
				 		unsigned workerid = worker->workerid;
			
 
				-		unsigned subdev = i % _starpu_get_machine_config()->topology.nworkerpercuda;
			
 
				+		unsigned subdev = i % _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid];
			
 
				 
			
 
				 		float size = (float) global_mem[devid] / (1<<30);
			
 
				 #ifdef STARPU_SIMGRID
			
@@ -968,7 +968,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 
				 		j = _starpu_get_job_associated_to_task(task);
			
 
				 
			
 
				 		/* can CUDA do that task ? */
			
 
				-		if (!_STARPU_CUDA_MAY_PERFORM(j))
			
 
				+		if (!_STARPU_MAY_PERFORM(j, CUDA))
			
 
				 		{
			
 
				 			/* this is neither a cuda or a cublas task */
			
 
				 			_starpu_worker_refuse_task(worker, task);
			
@@ -1067,7 +1067,7 @@ int _starpu_cuda_driver_deinit(struct _starpu_worker_set *worker_set)
 
				 	}
			
 
				 
			
 
				 	worker_set->workers[0].worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CUDA_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -1832,37 +1832,17 @@ struct _starpu_driver_ops _starpu_driver_cuda_ops =
 
				 #ifdef STARPU_SIMGRID
			
 
				 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy2d_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,
			
 
				 	.copy2d_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy3d_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
			
 
				 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	.wait_request_completion = NULL,
			
 
				 	.test_request_completion = NULL,
			
@@ -1874,31 +1854,15 @@ struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 
				 #else
			
 
				 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu,
			
 
				 	.copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu,
			
 
				 	.copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy2d_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu,
			
 
				 	.copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda,
			
 
				-	.copy2d_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy2d_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy3d_data_to[STARPU_UNUSED] = NULL,
			
 
				 #if 0
			
 
				 	.copy3d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cpu,
			
 
				 	.copy3d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cuda,
			
@@ -1906,10 +1870,6 @@ struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 
				 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
			
 
				 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				 #endif
			
 
				-	.copy3d_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy3d_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	.wait_request_completion = _starpu_cuda_wait_request_completion,
			
 
				 	.test_request_completion = _starpu_cuda_test_request_completion,
			
--- a/src/drivers/cuda/driver_cuda.h
+++ b/src/drivers/cuda/driver_cuda.h
@@ -22,6 +22,8 @@
 
				 
			
 
				 #include <common/config.h>
			
 
				 
			
 
				+void _starpu_cuda_preinit(void);
			
 
				+
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 #include <cuda.h>
			
 
				 #include <cuda_runtime_api.h>
			
--- a/src/drivers/cuda/driver_cuda_init.c
+++ b/src/drivers/cuda/driver_cuda_init.c
@@ -0,0 +1,37 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				+#include <drivers/cuda/driver_cuda.h>
			
 
				+
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "CUDA",
			
 
				+	.name_var = "CUDA",
			
 
				+	.name_lower = "cuda",
			
 
				+	.memory_kind = STARPU_CUDA_RAM,
			
 
				+	.alpha = 13.33f,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "CUDA",
			
 
				+	.worker_archtype = STARPU_CUDA_WORKER,
			
 
				+};
			
 
				+
			
 
				+void _starpu_cuda_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_CUDA_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_CUDA_RAM, &memory_driver_info);
			
 
				+}
			
--- a/src/drivers/cuda/starpu_cublas.c
+++ b/src/drivers/cuda/starpu_cublas.c
@@ -100,9 +100,11 @@ void starpu_cublas_shutdown(void)
 
				 void starpu_cublas_set_stream(void)
			
 
				 {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				+	unsigned workerid = starpu_worker_get_id_check();
			
 
				+	int devid = starpu_worker_get_devid(workerid);
			
 
				 	if (!_starpu_get_machine_config()->topology.cuda_th_per_dev ||
			
 
				 		(!_starpu_get_machine_config()->topology.cuda_th_per_stream &&
			
 
				-		 _starpu_get_machine_config()->topology.nworkerpercuda > 1))
			
 
				+		 _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid] > 1))
			
 
				 		cublasSetKernelStream(starpu_cuda_get_local_stream());
			
 
				 #endif
			
 
				 }
			
--- a/src/drivers/disk/driver_disk.c
+++ b/src/drivers/disk/driver_disk.c
@@ -23,6 +23,16 @@
 
				 #include <datawizard/coherency.h>
			
 
				 #include <datawizard/memory_nodes.h>
			
 
				 
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "Disk",
			
 
				+	.worker_archtype = (enum starpu_worker_archtype) -1,
			
 
				+};
			
 
				+
			
 
				+void _starpu_disk_preinit(void)
			
 
				+{
			
 
				+	starpu_memory_driver_info_register(STARPU_DISK_RAM, &memory_driver_info);
			
 
				+}
			
 
				+
			
 
				 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel)
			
 
				 {
			
 
				 	STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_CPU_RAM);
			
@@ -251,21 +261,11 @@ void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, i
 
				 
			
 
				 struct _starpu_node_ops _starpu_driver_disk_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu,
			
 
				-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu,
			
 
				-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	/* TODO: copy2D/3D? */
			
 
				 
			
--- a/src/drivers/disk/driver_disk.h
+++ b/src/drivers/disk/driver_disk.h
@@ -22,6 +22,8 @@
 
				 
			
 
				 #include <datawizard/node_ops.h>
			
 
				 
			
 
				+void _starpu_disk_preinit(void);
			
 
				+
			
 
				 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel);
			
 
				 
			
 
				 int _starpu_disk_copy_disk_to_src(void * src, size_t src_offset, unsigned src_node, void * dst, unsigned dst_node, size_t size, void * async_channel);
			
--- a/src/drivers/max/driver_fpga.c
+++ b/src/drivers/max/driver_fpga.c
@@ -80,7 +80,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 
				 	n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
			
 
				 	if (n != -1)
			
 
				 	{
			
 
				-		config->topology.nhwfpgafpgas = nfpgafpgas = n;
			
 
				+		config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -134,7 +134,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 
				 
			
 
				         //LMemInterface addLMemInterface()
			
 
				         //// pour récupérer l'accès à la LMem
			
 
				-	config->topology.nhwfpgafpgas = nfpgafpgas = n;
			
 
				+	config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
			
 
				 }
			
 
				 
			
 
				 unsigned _starpu_fpga_get_device_count(void)
			
@@ -208,7 +208,7 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker)
 
				 {
			
 
				 	int devid = worker->devid;
			
 
				 	//fpga_msg("successful till here");
			
 
				-	_starpu_driver_start(worker, _STARPU_FUT_CPU_KEY, 1);
			
 
				+	_starpu_driver_start(worker, STARPU_FPGA_WORKER, 1);
			
 
				 	/* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
			
 
				 	// TODO: drop test when we allocated a memory node for fpga
			
 
				 	if (worker->memory_node != STARPU_MAIN_RAM)
			
@@ -259,16 +259,11 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker
 
				 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
			
 
				 	{
			
 
				 		_starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
			
 
				-		//char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
			
 
				-		//printf("chanel reserved: %d \n",chnl);
			
 
				 
			
 
				 		STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
			
 
				 		if (_starpu_get_disable_kernels() <= 0)
			
 
				 		{
			
 
				 			_STARPU_TRACE_START_EXECUTING();
			
 
				-			//int chnl = fpga_reserve_chanel_of_kernel_type(kernel_type);
			
 
				-			//_starpu_fpga_transfer_data(_STARPU_TASK_GET_INTERFACES(task), j, chnl);
			
 
				-			//fpga_release_chanel(chnl);
			
 
				 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
			
 
				 			_STARPU_TRACE_END_EXECUTING();
			
 
				 		}
			
@@ -308,7 +303,7 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 
				 	j = _starpu_get_job_associated_to_task(task);
			
 
				 
			
 
				 	/* can a cpu perform that task ? */
			
 
				-	if (!_STARPU_FPGA_MAY_PERFORM(j))
			
 
				+	if (!_STARPU_MAY_PERFORM(j, FPGA))
			
 
				 	{
			
 
				 		/* put it and the end of the queue ... XXX */
			
 
				 		_starpu_push_task_to_workers(task);
			
@@ -399,7 +394,7 @@ int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
 
				 	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				 
			
 
				 	fpga_worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_FPGA_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -642,21 +637,11 @@ struct _starpu_driver_ops _starpu_driver_fpga_ops =
 
				 // TODO: transfers
			
 
				 struct _starpu_node_ops _starpu_driver_fpga_node_ops =
			
 
				 {
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	//.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
			
 
				 	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	//.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
			
 
				 	.copy_interface_to[STARPU_FPGA_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				         .wait_request_completion = NULL,
			
 
				 	.test_request_completion = NULL,
			
--- a/src/drivers/max/driver_fpga.h
+++ b/src/drivers/max/driver_fpga.h
@@ -30,6 +30,8 @@
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				 #include <common/fxt.h>
			
 
				 
			
 
				+void _starpu_fpga_preinit(void);
			
 
				+
			
 
				 #ifdef STARPU_USE_FPGA
			
 
				 typedef unsigned * fpga_mem;
			
 
				 
			
--- a/src/drivers/max/driver_fpga_init.c
+++ b/src/drivers/max/driver_fpga_init.c
@@ -0,0 +1,37 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				+#include <drivers/max/driver_fpga.h>
			
 
				+
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "FPGA",
			
 
				+	.name_var = "FPGA",
			
 
				+	.name_lower = "fpga",
			
 
				+	.memory_kind = STARPU_FPGA_RAM,
			
 
				+	.alpha = 0.5,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "FPGA",
			
 
				+	.worker_archtype = STARPU_FPGA_WORKER,
			
 
				+};
			
 
				+
			
 
				+void _starpu_fpga_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_FPGA_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_FPGA_RAM, &memory_driver_info);
			
 
				+}
			
--- a/src/starpu_parameters.h
+++ b/src/starpu_parameters.h
@@ -1,7 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				- * Copyright (C) 2013       Thibaut Lambert
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,19 +14,24 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#ifndef _STARPU_PARAMETERS_H
			
 
				-#define _STARPU_PARAMETERS_H
			
 
				+#include <core/workers.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				-/** @file */
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "MIC",
			
 
				+	.name_var = "MIC",
			
 
				+	.name_lower = "mic",
			
 
				+	.memory_kind = STARPU_MIC_RAM,
			
 
				+	.alpha = 0.5f,
			
 
				+};
			
 
				 
			
 
				-/* Parameters which are not worth being added to ./configure options, but
			
 
				- * still interesting to easily change */
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "MIC",
			
 
				+	.worker_archtype = STARPU_MIC_WORKER,
			
 
				+};
			
 
				 
			
 
				-/* Assumed relative performance ratios */
			
 
				-/* TODO: benchmark a bit instead */
			
 
				-#define _STARPU_CPU_ALPHA	1.0f
			
 
				-#define _STARPU_CUDA_ALPHA	13.33f
			
 
				-#define _STARPU_OPENCL_ALPHA	12.22f
			
 
				-#define _STARPU_MIC_ALPHA	0.5f
			
 
				-#define _STARPU_MPI_MS_ALPHA	1.0f
			
 
				-#endif /* _STARPU_PARAMETERS_H */
			
 
				+void _starpu_mic_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_MIC_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_MIC_RAM, &memory_driver_info);
			
 
				+}
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -241,7 +241,7 @@ unsigned starpu_mic_device_get_count(void)
 
				     struct _starpu_machine_config *config = _starpu_get_machine_config ();
			
 
				     struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-    return topology->nmicdevices;
			
 
				+    return topology->ndevices[STARPU_MIC_WORKER];
			
 
				 }
			
 
				 
			
 
				 starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
			
@@ -521,16 +521,16 @@ void *_starpu_mic_src_worker(void *arg)
 
				 
			
 
				 	/* unsigned memnode = baseworker->memory_node; */
			
 
				 
			
 
				-	_starpu_driver_start(baseworker, _STARPU_FUT_MIC_KEY, 0);
			
 
				+	_starpu_driver_start(baseworker, STARPU_MIC_WORKER, 0);
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	for (i = 1; i < worker_set->nworkers; i++)
			
 
				-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MIC_KEY, 0);
			
 
				+		_starpu_worker_start(&worker_set->workers[i], STARPU_FUT_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				 	// Current task for a thread managing a worker set has no sense.
			
 
				 	_starpu_set_current_task(NULL);
			
 
				 
			
 
				-	for (i = 0; i < config->topology.nmiccores[devid]; i++)
			
 
				+	for (i = 0; i < config->topology.nworker[STARPU_MIC_WORKER][devid]; i++)
			
 
				 	{
			
 
				 		struct _starpu_worker *worker = &config->workers[baseworkerid+i];
			
 
				 		snprintf(worker->name, sizeof(worker->name), "MIC %u core %u", devid, i);
			
@@ -686,21 +686,11 @@ void _starpu_mic_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, in
 
				 /* TODO: MIC -> MIC */
			
 
				 struct _starpu_node_ops _starpu_driver_mic_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mic_copy_interface_from_mic_to_cpu,
			
 
				-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mic_copy_data_from_mic_to_cpu,
			
 
				-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	/* TODO: copy2D/3D? */
			
 
				 
			
--- a/src/drivers/mic/driver_mic_source.h
+++ b/src/drivers/mic/driver_mic_source.h
@@ -23,6 +23,8 @@
 
				 #include <starpu_mic.h>
			
 
				 #include <common/config.h>
			
 
				 
			
 
				+void _starpu_mic_preinit(void);
			
 
				+
			
 
				 #ifdef STARPU_USE_MIC
			
 
				 
			
 
				 #include <source/COIProcess_source.h>
			
--- a/src/drivers/mpi/driver_mpi_common.c
+++ b/src/drivers/mpi/driver_mpi_common.c
@@ -127,7 +127,7 @@ void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node)
 
				 {
			
 
				         struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
			
 
				 
			
 
				-        node->nb_cores = topology->nhwcpus;
			
 
				+        node->nb_cores = topology->nhwworker[STARPU_CPU_WORKER][0];
			
 
				 }
			
 
				 
			
 
				 int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node)
			
--- a/src/drivers/mpi/driver_mpi_init.c
+++ b/src/drivers/mpi/driver_mpi_init.c
@@ -0,0 +1,37 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				+#include <drivers/mpi/driver_mpi_source.h>
			
 
				+
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "MPI_MS",
			
 
				+	.name_var = "MPI_MS",
			
 
				+	.name_lower = "mpi_ms",
			
 
				+	.memory_kind = STARPU_MPI_MS_RAM,
			
 
				+	.alpha = 1.0f,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "MPI_MS",
			
 
				+	.worker_archtype = STARPU_MPI_MS_WORKER,
			
 
				+};
			
 
				+
			
 
				+void _starpu_mpi_ms_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_MPI_MS_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_MPI_MS_RAM, &memory_driver_info);
			
 
				+}
			
--- a/src/drivers/mpi/driver_mpi_source.c
+++ b/src/drivers/mpi/driver_mpi_source.c
@@ -310,17 +310,17 @@ void *_starpu_mpi_src_worker(void *arg)
 
				 
			
 
				                 /* unsigned memnode = baseworker->memory_node; */
			
 
				 
			
 
				-                _starpu_driver_start(baseworker, _STARPU_FUT_MPI_KEY, 0);
			
 
				+                _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0);
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				                 for (i = 1; i < worker_set->nworkers; i++)
			
 
				-                        _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MPI_KEY, 0);
			
 
				+                        _starpu_worker_start(&worker_set->workers[i], STARPU_MPI_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				                 // Current task for a thread managing a worker set has no sense.
			
 
				                 _starpu_set_current_task(NULL);
			
 
				 
			
 
				-                for (i = 0; i < config->topology.nmpicores[devid]; i++)
			
 
				+                for (i = 0; i < config->topology.nworker[STARPU_MPI_MS_WORKER][devid]; i++)
			
 
				                 {
			
 
				                         struct _starpu_worker *worker = &config->workers[baseworkerid+i];
			
 
				                         snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i);
			
@@ -546,20 +546,10 @@ void _starpu_mpi_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, in
 
				 
			
 
				 struct _starpu_node_ops _starpu_driver_mpi_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mpi_copy_interface_from_mpi_to_cpu,
			
 
				-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_interface_from_mpi_to_mpi,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mpi_copy_data_from_mpi_to_cpu,
			
 
				-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_data_from_mpi_to_mpi,
			
 
				 
			
 
				 	/* TODO: copy2D/3D? */
			
--- a/src/drivers/mpi/driver_mpi_source.h
+++ b/src/drivers/mpi/driver_mpi_source.h
@@ -23,8 +23,9 @@
 
				 #include <starpu_mpi_ms.h>
			
 
				 #include <datawizard/node_ops.h>
			
 
				 
			
 
				-#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				+void _starpu_mpi_ms_preinit(void);
			
 
				 
			
 
				+#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 extern struct _starpu_node_ops _starpu_driver_mpi_node_ops;
			
 
				 
			
 
				 /** Array of structures containing all the informations useful to send
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -71,7 +71,7 @@ _starpu_opencl_discover_devices(struct _starpu_machine_config *config)
 
				 	/* As OpenCL must have been initialized before calling this function,
			
 
				 	 * `nb_device' is ensured to be correctly set. */
			
 
				 	STARPU_ASSERT(init_done == 1);
			
 
				-	config->topology.nhwopenclgpus = nb_devices;
			
 
				+	config->topology.nhwdevices[STARPU_OPENCL_WORKER] = nb_devices;
			
 
				 }
			
 
				 
			
 
				 static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
			
@@ -623,7 +623,7 @@ int _starpu_opencl_driver_init(struct _starpu_worker *worker)
 
				 {
			
 
				 	int devid = worker->devid;
			
 
				 
			
 
				-	_starpu_driver_start(worker, _STARPU_FUT_OPENCL_KEY, 0);
			
 
				+	_starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0);
			
 
				 
			
 
				 	_starpu_opencl_init_context(devid);
			
 
				 
			
@@ -817,7 +817,7 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *worker)
 
				 		worker->current_task = task;
			
 
				 
			
 
				 	/* can OpenCL do that task ? */
			
 
				-	if (!_STARPU_OPENCL_MAY_PERFORM(j))
			
 
				+	if (!_STARPU_MAY_PERFORM(j, OPENCL))
			
 
				 	{
			
 
				 		/* this is not a OpenCL task */
			
 
				 		_starpu_worker_refuse_task(worker, task);
			
@@ -853,7 +853,7 @@ int _starpu_opencl_driver_deinit(struct _starpu_worker *worker)
 
				         _starpu_opencl_deinit_context(devid);
			
 
				 
			
 
				 	worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_OPENCL_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -1356,21 +1356,11 @@ int _starpu_opencl_is_direct_access_supported(unsigned node, unsigned handling_n
 
				 #ifdef STARPU_SIMGRID
			
 
				 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	.wait_request_completion = NULL,
			
 
				 	.test_request_completion = NULL,
			
@@ -1382,21 +1372,11 @@ struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 
				 #else
			
 
				 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
			
 
				 {
			
 
				-	.copy_interface_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu,
			
 
				-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
			
 
				 	.copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl,
			
 
				-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				-	.copy_data_to[STARPU_UNUSED] = NULL,
			
 
				 	.copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu,
			
 
				-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
			
 
				 	.copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl,
			
 
				-	.copy_data_to[STARPU_DISK_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MIC_RAM] = NULL,
			
 
				-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
			
 
				 
			
 
				 	/* TODO: copy2D/3D? */
			
 
				 
			
--- a/src/drivers/opencl/driver_opencl.h
+++ b/src/drivers/opencl/driver_opencl.h
@@ -36,6 +36,8 @@
 
				 #include <core/workers.h>
			
 
				 #include <datawizard/node_ops.h>
			
 
				 
			
 
				+void _starpu_opencl_preinit(void);
			
 
				+
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				 struct _starpu_machine_config;
			
 
				 void _starpu_opencl_discover_devices(struct _starpu_machine_config *config);
			
--- a/src/drivers/opencl/driver_opencl_init.c
+++ b/src/drivers/opencl/driver_opencl_init.c
@@ -0,0 +1,37 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				+#include <drivers/opencl/driver_opencl.h>
			
 
				+
			
 
				+static struct starpu_driver_info driver_info = {
			
 
				+	.name_upper = "OpenCL",
			
 
				+	.name_var = "OPENCL",
			
 
				+	.name_lower = "opencl",
			
 
				+	.memory_kind = STARPU_OPENCL_RAM,
			
 
				+	.alpha = 12.22f,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_memory_driver_info memory_driver_info = {
			
 
				+	.name_upper = "OpenCL",
			
 
				+	.worker_archtype = STARPU_OPENCL_WORKER,
			
 
				+};
			
 
				+
			
 
				+void _starpu_opencl_preinit(void)
			
 
				+{
			
 
				+	starpu_driver_info_register(STARPU_OPENCL_WORKER, &driver_info);
			
 
				+	starpu_memory_driver_info_register(STARPU_OPENCL_RAM, &memory_driver_info);
			
 
				+}
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -231,12 +231,9 @@ static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
 
				 static void initialize_duration(struct bound_task *task)
			
 
				 {
			
 
				 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.nhwcpus);
			
 
				-	task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.nhwcudagpus,NULL);
			
 
				-	task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nhwopenclgpus,NULL);
			
 
				-	task->duration[STARPU_FPGA_WORKER] = initialize_arch_duration(conf->topology.nhwfpgafpgas,NULL);
			
 
				-	task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nhwmicdevices,conf->topology.nmiccores);
			
 
				-	task->duration[STARPU_MPI_MS_WORKER] = initialize_arch_duration(conf->topology.nhwmpidevices,conf->topology.nmpicores);
			
 
				+	enum starpu_worker_archtype type;
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		task->duration[type] = initialize_arch_duration(conf->topology.nhwdevices[type], conf->topology.nworker[type]);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_perfmodel_device device =
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -577,7 +577,7 @@ int starpu_bus_get_ngpus(int busid)
 
				 	int ngpus = bus_ngpus[busid];
			
 
				 	if (!ngpus)
			
 
				 		/* Unknown number of GPUs, assume it's shared by all GPUs */
			
 
				-		ngpus = topology->ncudagpus+topology->nopenclgpus;
			
 
				+		ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
			
 
				 	return ngpus;
			
 
				 }
			
 
				 
			
--- a/src/sched_policies/heteroprio.c
+++ b/src/sched_policies/heteroprio.c
@@ -37,7 +37,7 @@
 
				 #define DBL_MAX __DBL_MAX__
			
 
				 #endif
			
 
				 
			
 
				-#define STARPU_NB_TYPES (STARPU_MAX_WORKER+1)
			
 
				+#define STARPU_NB_TYPES STARPU_NARCH
			
 
				 
			
 
				 /* A bucket corresponds to a Pair of priorities
			
 
				  * When a task is pushed with a priority X, it will be stored
			
@@ -112,7 +112,7 @@ struct _starpu_heteroprio_data
 
				 
			
 
				 static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch)
			
 
				 {
			
 
				-	if (arch > STARPU_MAX_WORKER)
			
 
				+	if (arch >= STARPU_NARCH)
			
 
				 		return 0;
			
 
				 	return STARPU_WORKER_TO_MASK(arch);
			
 
				 }
			
@@ -174,7 +174,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 
				 	enum starpu_worker_archtype type;
			
 
				 
			
 
				 	// By default each type of devices uses 1 bucket and no slow factor
			
 
				-	for (type = 0; type <= STARPU_MAX_WORKER; type++)
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				 		if (starpu_worker_get_count_by_type(type) > 0)
			
 
				 			starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1);
			
 
				 
			
@@ -183,7 +183,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 
				 	for(prio=min_prio ; prio<=max_prio ; prio++)
			
 
				 	{
			
 
				 		// By default each type of devices uses 1 bucket and no slow factor
			
 
				-		for (type = 0; type <= STARPU_MAX_WORKER; type++)
			
 
				+		for (type = 0; type < STARPU_NARCH; type++)
			
 
				 			if (starpu_worker_get_count_by_type(type) > 0)
			
 
				 				starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio);
			
 
				 	}
			
--- a/src/sched_policies/modular_ez.c
+++ b/src/sched_policies/modular_ez.c
@@ -112,8 +112,8 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 
				 		nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count();
			
 
				 		if (starpu_memory_nodes_get_count() > nummaxids)
			
 
				 			nummaxids = starpu_memory_nodes_get_count();
			
 
				-		if (STARPU_ANY_WORKER > nummaxids)
			
 
				-			nummaxids = STARPU_ANY_WORKER;
			
 
				+		if (STARPU_NARCH > nummaxids)
			
 
				+			nummaxids = STARPU_NARCH;
			
 
				 
			
 
				 		if (sched == 0)
			
 
				 			decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK;
			
@@ -154,7 +154,7 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 
				 			/* Count available architecture types */
			
 
				 			enum starpu_worker_archtype type;
			
 
				 			nbelow = 0;
			
 
				-			for (type = STARPU_CPU_WORKER; type < STARPU_ANY_WORKER; type++)
			
 
				+			for (type = 0; type < STARPU_NARCH; type++)
			
 
				 			{
			
 
				 				if (starpu_worker_get_count_by_type(type))
			
 
				 				{
			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -22,7 +22,6 @@
 
				 #include <limits.h>
			
 
				 #include <core/workers.h>
			
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 #include <core/detect_combined_workers.h>
			
 
				 #include <core/sched_policy.h>
			
 
				 #include <core/task.h>
			
--- a/src/util/fstarpu.c
+++ b/src/util/fstarpu.c
@@ -81,6 +81,7 @@ static const intptr_t fstarpu_cuda_worker = STARPU_CUDA_WORKER;
 
				 static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER;
			
 
				 static const intptr_t fstarpu_mic_worker = STARPU_MIC_WORKER;
			
 
				 static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER;
			
 
				+static const intptr_t fstarpu_narch = STARPU_NARCH;
			
 
				 
			
 
				 static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS;
			
 
				 
			
@@ -174,6 +175,7 @@ intptr_t fstarpu_get_constant(char *s)
 
				 	else if (!strcmp(s, "FSTARPU_OPENCL_WORKER"))	{ return fstarpu_opencl_worker; }
			
 
				 	else if (!strcmp(s, "FSTARPU_MIC_WORKER"))	{ return fstarpu_mic_worker; }
			
 
				 	else if (!strcmp(s, "FSTARPU_ANY_WORKER"))	{ return fstarpu_any_worker; }
			
 
				+	else if (!strcmp(s, "FSTARPU_NARCH"))	{ return fstarpu_narch; }
			
 
				 
			
 
				 	else if (!strcmp(s, "FSTARPU_NMAXBUFS"))	{ return fstarpu_nmaxbufs; }
			
 
				 
			
--- a/src/util/openmp_runtime_support_omp_api.c
+++ b/src/util/openmp_runtime_support_omp_api.c
@@ -65,7 +65,7 @@ int starpu_omp_get_max_threads()
 
				 
			
 
				 int starpu_omp_get_num_procs (void)
			
 
				 {
			
 
				-	/* starpu_cpu_worker_get_count defined as topology.ncpus */
			
 
				+	/* starpu_cpu_worker_get_count defined as topology.nworkers[STARPU_CPU_WORKER] */
			
 
				 	return starpu_cpu_worker_get_count();
			
 
				 }
			
 
				 
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -17,6 +17,8 @@ include $(top_srcdir)/starpu.mk
 
				 
			
 
				 AM_CFLAGS += -Wno-unused
			
 
				 AM_CXXFLAGS += -Wno-unused
			
 
				+AM_FFLAGS += -Wno-unused
			
 
				+AM_FCFLAGS += -Wno-unused
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
			
 
				 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
			
 
				 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
			
--- a/tests/datawizard/in_place_partition.c
+++ b/tests/datawizard/in_place_partition.c
@@ -30,7 +30,11 @@ int main(int argc, char **argv)
 
				 	int ret;
			
 
				 	unsigned n, i, size;
			
 
				 
			
 
				-	ret = starpu_initialize(NULL, &argc, &argv);
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_conf_init(&conf);
			
 
				+	conf.nfpga = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
--- a/tests/datawizard/wt_broadcast.c
+++ b/tests/datawizard/wt_broadcast.c
@@ -90,7 +90,11 @@ int main(void)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = starpu_init(NULL);
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_conf_init(&conf);
			
 
				+	conf.nfpga = 0;
			
 
				+
			
 
				+	ret = starpu_init(&conf);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
--- a/tests/microbenchs/prefetch_data_on_node.c
+++ b/tests/microbenchs/prefetch_data_on_node.c
@@ -102,7 +102,11 @@ int main(int argc, char **argv)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = starpu_initialize(NULL, &argc, &argv);
			
 
				+	struct starpu_conf conf;
			
 
				+	starpu_conf_init(&conf);
			
 
				+	conf.nfpga = 0;
			
 
				+
			
 
				+	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
--- a/tools/dev/checker/rename_internal.sed
+++ b/tools/dev/checker/rename_internal.sed
@@ -26,9 +26,6 @@ s/\b_starpu_get_memory_node_description\b/_starpu_memory_node_get_description/g
 
				 
			
 
				 s/\bheft_policy\b/_starpu_sched_heft_policy/g
			
 
				 s/\bstruct starpu_priority_taskq_s\b/struct _starpu_priority_taskq/g
			
 
				-s/\bSTARPU_FUT_APPS_KEY\b/_STARPU_FUT_APPS_KEY/g
			
 
				-s/\bSTARPU_FUT_CPU_KEY\b/_STARPU_FUT_CPU_KEY/g
			
 
				-s/\bSTARPU_FUT_CUDA_KEY\b/_STARPU_FUT_CUDA_KEY/g
			
 
				 s/\bSTARPU_FUT_DATA_COPY\b/_STARPU_FUT_DATA_COPY/g
			
 
				 s/\bSTARPU_FUT_DO_PROBE3STR\b/_STARPU_FUT_DO_PROBE3STR/g
			
 
				 s/\bSTARPU_FUT_DO_PROBE4STR\b/_STARPU_FUT_DO_PROBE4STR/g
			
@@ -46,7 +43,6 @@ s/\bSTARPU_FUT_END_PUSH_OUTPUT\b/_STARPU_FUT_END_PUSH_OUTPUT/g
 
				 s/\bSTARPU_FUT_JOB_POP\b/_STARPU_FUT_JOB_POP/g
			
 
				 s/\bSTARPU_FUT_JOB_PUSH\b/_STARPU_FUT_JOB_PUSH/g
			
 
				 s/\bSTARPU_FUT_NEW_MEM_NODE\b/_STARPU_FUT_NEW_MEM_NODE/g
			
 
				-s/\bSTARPU_FUT_OPENCL_KEY\b/_STARPU_FUT_OPENCL_KEY/g
			
 
				 s/\bSTARPU_FUT_SET_PROFILING\b/_STARPU_FUT_SET_PROFILING/g
			
 
				 s/\bSTARPU_FUT_START_ALLOC\b/_STARPU_FUT_START_ALLOC/g
			
 
				 s/\bSTARPU_FUT_START_ALLOC_REUSE\b/_STARPU_FUT_START_ALLOC_REUSE/g
			
@@ -123,10 +119,6 @@ s/\bSTARPU_TRACE_WORK_STEALING\b/_STARPU_TRACE_WORK_STEALING/g
 
				 #s/\bSTARPU_DEFAULT_BETA\b/_STARPU_DEFAULT_BETA/g
			
 
				 #s/\bSTARPU_DEFAULT_GAMMA\b/_STARPU_DEFAULT_GAMMA/g
			
 
				 #s/\bSTARPU_CALIBRATION_MINIMUM\b/_STARPU_CALIBRATION_MINIMUM/g
			
 
				-#s/\bSTARPU_CPU_ALPHA\b/_STARPU_CPU_ALPHA/g
			
 
				-#s/\bSTARPU_CUDA_ALPHA\b/_STARPU_CUDA_ALPHA/g
			
 
				-#s/\bSTARPU_OPENCL_ALPHA\b/_STARPU_OPENCL_ALPHA/g
			
 
				-#s/\bSTARPU_GORDON_ALPHA\b/_STARPU_GORDON_ALPHA/g
			
 
				 #
			
 
				 #s/\bstarpu_memory_node_to_devid\b/_starpu_memory_node_to_devid/g
			
 
				 #s/\bstarpu_memchunk_recently_used\b/_starpu_memchunk_recently_used/g
			
@@ -136,11 +128,7 @@ s/\bSTARPU_TRACE_WORK_STEALING\b/_STARPU_TRACE_WORK_STEALING/g
 
				 #s/\bstarpu_data_end_reduction_mode_terminate\b/_starpu_data_end_reduction_mode_terminate/g
			
 
				 #s/\bcreate_request_to_fetch_data\b/_starpu_create_request_to_fetch_data/g
			
 
				 #
			
 
				-#s/\bSTARPU_CPU_MAY_PERFORM\b/_STARPU_CPU_MAY_PERFORM/g
			
 
				-#s/\bSTARPU_CUDA_MAY_PERFORM\b/_STARPU_CUDA_MAY_PERFORM/g
			
 
				-#s/\bSTARPU_SPU_MAY_PERFORM\b/_STARPU_SPU_MAY_PERFORM/g
			
 
				-#s/\bSTARPU_GORDON_MAY_PERFORM\b/_STARPU_GORDON_MAY_PERFORM/g
			
 
				-#s/\bSTARPU_OPENCL_MAY_PERFORM\b/_STARPU_OPENCL_MAY_PERFORM/g
			
 
				+#s/\bSTARPU_MAY_PERFORM\b/_STARPU_MAY_PERFORM/g
			
 
				 #
			
 
				 #s/\bSTARPU_TAG_SIZE\b/_STARPU_TAG_SIZE/g
			
 
				 #s/\bSTARPU_HTBL_NODE_SIZE\b/_STARPU_HTBL_NODE_SIZE/g
			
--- a/tools/starpu_perfmodel_display.c
+++ b/tools/starpu_perfmodel_display.c
@@ -156,6 +156,7 @@ int main(int argc, char **argv)
 
				 #endif
			
 
				 
			
 
				 	parse_args(argc, argv);
			
 
				+	starpu_drivers_preinit();
			
 
				 	starpu_perfmodel_initialize();
			
 
				 
			
 
				         if (plist)
			
--- a/tools/starpu_perfmodel_plot.c
+++ b/tools/starpu_perfmodel_plot.c
@@ -555,6 +555,7 @@ int main(int argc, char **argv)
 
				 #endif
			
 
				 
			
 
				 	parse_args(argc, argv, &options, &directory);
			
 
				+	starpu_drivers_preinit();
			
 
				 	starpu_perfmodel_initialize();
			
 
				 
			
 
				 	if (options.directory)