Просмотр исходного кода

Merge branch 'fpga' of gitlab.inria.fr:starpu/starpu into fpga

Samuel Thibault лет назад: 5
Родитель
Сommit
e962578553
74 измененных файлов с 923 добавлено и 1044 удалено
  1. 1 1
      Makefile.am
  2. 17 3
      configure.ac
  3. 3 3
      doc/doxygen/doxygen.cfg
  4. 0 1
      doc/doxygen_dev/Makefile.am
  5. 0 1
      doc/doxygen_dev/doxygen-config.cfg.in
  6. 2 0
      examples/Makefile.am
  7. 12 8
      examples/cpp/add_vectors_interface.cpp
  8. 1 2
      examples/fortran90/f90_example.f90
  9. 1 2
      examples/native_fortran/nf_example.f90
  10. 2 1
      examples/scheduler/dummy_modular_sched.c
  11. 40 40
      include/starpu.h
  12. 14 14
      include/starpu_config.h.in
  13. 40 54
      include/starpu_data_interfaces.h
  14. 7 4
      include/starpu_driver.h
  15. 1 3
      include/starpu_perfmodel.h
  16. 36 41
      include/starpu_task.h
  17. 15 8
      include/starpu_worker.h
  18. 10 6
      src/Makefile.am
  19. 4 10
      src/common/fxt.h
  20. 1 1
      src/core/detect_combined_workers.c
  21. 0 1
      src/core/disk_ops/disk_leveldb.cpp
  22. 0 1
      src/core/disk_ops/disk_stdio.c
  23. 0 1
      src/core/disk_ops/unistd/disk_unistd_global.c
  24. 1 5
      src/core/jobs.h
  25. 3 13
      src/core/perfmodel/perfmodel.c
  26. 4 4
      src/core/perfmodel/perfmodel.h
  27. 21 18
      src/core/perfmodel/perfmodel_bus.c
  28. 25 50
      src/core/perfmodel/perfmodel_history.c
  29. 9 10
      src/core/sched_policy.c
  30. 36 45
      src/core/task.c
  31. 4 9
      src/core/task.h
  32. 135 167
      src/core/topology.c
  33. 111 119
      src/core/workers.c
  34. 52 68
      src/core/workers.h
  35. 5 12
      src/datawizard/coherency.c
  36. 3 10
      src/datawizard/memory_nodes.c
  37. 3 21
      src/datawizard/node_ops.c
  38. 4 4
      src/datawizard/node_ops.h
  39. 19 95
      src/debug/traces/starpu_fxt.c
  40. 28 9
      src/drivers/cpu/driver_cpu.c
  41. 2 0
      src/drivers/cpu/driver_cpu.h
  42. 8 48
      src/drivers/cuda/driver_cuda.c
  43. 2 0
      src/drivers/cuda/driver_cuda.h
  44. 37 0
      src/drivers/cuda/driver_cuda_init.c
  45. 3 1
      src/drivers/cuda/starpu_cublas.c
  46. 10 10
      src/drivers/disk/driver_disk.c
  47. 2 0
      src/drivers/disk/driver_disk.h
  48. 5 20
      src/drivers/max/driver_fpga.c
  49. 2 0
      src/drivers/max/driver_fpga.h
  50. 37 0
      src/drivers/max/driver_fpga_init.c
  51. 19 15
      src/starpu_parameters.h
  52. 4 14
      src/drivers/mic/driver_mic_source.c
  53. 2 0
      src/drivers/mic/driver_mic_source.h
  54. 1 1
      src/drivers/mpi/driver_mpi_common.c
  55. 37 0
      src/drivers/mpi/driver_mpi_init.c
  56. 3 13
      src/drivers/mpi/driver_mpi_source.c
  57. 2 1
      src/drivers/mpi/driver_mpi_source.h
  58. 4 24
      src/drivers/opencl/driver_opencl.c
  59. 2 0
      src/drivers/opencl/driver_opencl.h
  60. 37 0
      src/drivers/opencl/driver_opencl_init.c
  61. 3 6
      src/profiling/bound.c
  62. 1 1
      src/profiling/profiling.c
  63. 4 4
      src/sched_policies/heteroprio.c
  64. 3 3
      src/sched_policies/modular_ez.c
  65. 0 1
      src/sched_policies/parallel_heft.c
  66. 2 0
      src/util/fstarpu.c
  67. 1 1
      src/util/openmp_runtime_support_omp_api.c
  68. 2 0
      tests/Makefile.am
  69. 5 1
      tests/datawizard/in_place_partition.c
  70. 5 1
      tests/datawizard/wt_broadcast.c
  71. 5 1
      tests/microbenchs/prefetch_data_on_node.c
  72. 1 13
      tools/dev/checker/rename_internal.sed
  73. 1 0
      tools/starpu_perfmodel_display.c
  74. 1 0
      tools/starpu_perfmodel_plot.c

+ 1 - 1
Makefile.am

@@ -82,8 +82,8 @@ versinclude_HEADERS = 				\
 	include/starpu_util.h			\
 	include/starpu_util.h			\
 	include/starpu_fxt.h			\
 	include/starpu_fxt.h			\
 	include/starpu_cuda.h			\
 	include/starpu_cuda.h			\
-	include/starpu_fpga.h			\
 	include/starpu_opencl.h			\
 	include/starpu_opencl.h			\
+	include/starpu_fpga.h			\
 	include/starpu_openmp.h			\
 	include/starpu_openmp.h			\
 	include/starpu_sink.h			\
 	include/starpu_sink.h			\
 	include/starpu_mic.h			\
 	include/starpu_mic.h			\

+ 17 - 3
configure.ac

@@ -1848,9 +1848,6 @@ AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
 			nmaxmicthreads=$enableval, nmaxmicthreads=120)
 			nmaxmicthreads=$enableval, nmaxmicthreads=120)
 AC_MSG_RESULT($nmaxmicthread)
 AC_MSG_RESULT($nmaxmicthread)
 
 
-AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
-	[maximum number of MIC cores])
-
 AC_ARG_WITH(coi-dir,
 AC_ARG_WITH(coi-dir,
 	[AS_HELP_STRING([--with-coi-dir=<path>],
 	[AS_HELP_STRING([--with-coi-dir=<path>],
 	[specify the MIC's COI installation directory])],
 	[specify the MIC's COI installation directory])],
@@ -2487,6 +2484,23 @@ nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev +  $n
 AC_MSG_CHECKING(Maximum number of workers)
 AC_MSG_CHECKING(Maximum number of workers)
 AC_MSG_RESULT($nmaxworkers)
 AC_MSG_RESULT($nmaxworkers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
+nmaxdevs=0
+if test $nmaxdevs -lt $nmaxcudadev; then
+	nmaxdevs=$nmaxcudadev
+fi
+if test $nmaxdevs -lt $nmaxopencldev; then
+	nmaxdevs=$nmaxopencldev
+fi
+if test $nmaxdevs -lt $nmaxfpgadev; then
+	nmaxdevs=$nmaxfpgadev
+fi
+if test $nmaxdevs -lt $nmaxmicdev; then
+	nmaxdevs=$nmaxmicdev
+fi
+if test $nmaxdevs -lt $nmaxmpidev; then
+	nmaxdevs=$nmaxmpidev
+fi
+AC_DEFINE_UNQUOTED(STARPU_NMAXDEVS, [$nmaxdevs], [Maximum number of device per device arch])
 
 
 # Computes the maximun number of combined worker
 # Computes the maximun number of combined worker
 nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`
 nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`

+ 3 - 3
doc/doxygen/doxygen.cfg

@@ -1613,10 +1613,10 @@ INCLUDE_FILE_PATTERNS  =
 # undefined via #undef or recursively expanded use the := operator
 # undefined via #undef or recursively expanded use the := operator
 # instead of the = operator.
 # instead of the = operator.
 
 
-PREDEFINED             = STARPU_USE_OPENCL=1 \
-                         STARPU_USE_CUDA=1 \
-                         STARPU_USE_MIC=1 \
+PREDEFINED             = STARPU_USE_CUDA=1 \
+                         STARPU_USE_OPENCL=1 \
                          STARPU_USE_FPGA=1 \
                          STARPU_USE_FPGA=1 \
+                         STARPU_USE_MIC=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \

+ 0 - 1
doc/doxygen_dev/Makefile.am

@@ -172,7 +172,6 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/src/common/list.h	\
 	$(top_srcdir)/src/common/list.h	\
 	$(top_srcdir)/src/debug/starpu_debug_helpers.h	\
 	$(top_srcdir)/src/debug/starpu_debug_helpers.h	\
 	$(top_srcdir)/src/debug/traces/starpu_fxt.h	\
 	$(top_srcdir)/src/debug/traces/starpu_fxt.h	\
-	$(top_srcdir)/src/starpu_parameters.h	\
 	$(top_srcdir)/src/sched_policies/fifo_queues.h	\
 	$(top_srcdir)/src/sched_policies/fifo_queues.h	\
 	$(top_srcdir)/src/sched_policies/helper_mct.h	\
 	$(top_srcdir)/src/sched_policies/helper_mct.h	\
 	$(top_srcdir)/src/sched_policies/sched_component.h	\
 	$(top_srcdir)/src/sched_policies/sched_component.h	\

+ 0 - 1
doc/doxygen_dev/doxygen-config.cfg.in

@@ -73,7 +73,6 @@ INPUT                  = @top_srcdir@/doc/doxygen_dev/chapters         \
 			 @top_srcdir@/src/common/list.h \
 			 @top_srcdir@/src/common/list.h \
 			 @top_srcdir@/src/debug/starpu_debug_helpers.h \
 			 @top_srcdir@/src/debug/starpu_debug_helpers.h \
 			 @top_srcdir@/src/debug/traces/starpu_fxt.h \
 			 @top_srcdir@/src/debug/traces/starpu_fxt.h \
-			 @top_srcdir@/src/starpu_parameters.h \
 			 @top_srcdir@/src/sched_policies/fifo_queues.h \
 			 @top_srcdir@/src/sched_policies/fifo_queues.h \
 			 @top_srcdir@/src/sched_policies/helper_mct.h \
 			 @top_srcdir@/src/sched_policies/helper_mct.h \
 			 @top_srcdir@/src/sched_policies/sched_component.h \
 			 @top_srcdir@/src/sched_policies/sched_component.h \

+ 2 - 0
examples/Makefile.am

@@ -20,6 +20,8 @@ include $(top_srcdir)/starpu.mk
 
 
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CXXFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FFLAGS += $(MAGMA_CFLAGS) -Wno-unused
+AM_FCFLAGS += $(MAGMA_CFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 12 - 8
examples/cpp/add_vectors_interface.cpp

@@ -171,16 +171,17 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	.ram_to_ram = NULL,
 	.ram_to_ram = NULL,
 	.ram_to_cuda = NULL,
 	.ram_to_cuda = NULL,
 	.ram_to_opencl = NULL,
 	.ram_to_opencl = NULL,
+	.ram_to_fpga = NULL,
 	.ram_to_mic = NULL,
 	.ram_to_mic = NULL,
 
 
 	.cuda_to_ram = NULL,
 	.cuda_to_ram = NULL,
 	.cuda_to_cuda = NULL,
 	.cuda_to_cuda = NULL,
-	.cuda_to_opencl = NULL,
 
 
 	.opencl_to_ram = NULL,
 	.opencl_to_ram = NULL,
-	.opencl_to_cuda = NULL,
 	.opencl_to_opencl = NULL,
 	.opencl_to_opencl = NULL,
 
 
+	.fpga_to_ram = NULL,
+
 	.mic_to_ram = NULL,
 	.mic_to_ram = NULL,
 
 
 	.ram_to_mpi_ms = NULL,
 	.ram_to_mpi_ms = NULL,
@@ -195,13 +196,16 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	.opencl_to_ram_async = NULL,
 	.opencl_to_ram_async = NULL,
 	.opencl_to_opencl_async = NULL,
 	.opencl_to_opencl_async = NULL,
 
 
-	.ram_to_mpi_ms_async = NULL,
-	.mpi_ms_to_ram_async = NULL,
-	.mpi_ms_to_mpi_ms_async = NULL,
+	.ram_to_fpga_async = NULL,
+	.fpga_to_ram_async = NULL,
 
 
 	.ram_to_mic_async = NULL,
 	.ram_to_mic_async = NULL,
 	.mic_to_ram_async = NULL,
 	.mic_to_ram_async = NULL,
 
 
+	.ram_to_mpi_ms_async = NULL,
+	.mpi_ms_to_ram_async = NULL,
+	.mpi_ms_to_mpi_ms_async = NULL,
+
 	.any_to_any = vector_interface_copy_any_to_any,
 	.any_to_any = vector_interface_copy_any_to_any,
 };
 };
 #else
 #else
@@ -213,13 +217,14 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
-
 	NULL,
 	NULL,
+
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 
 
 	NULL,
 	NULL,
 	NULL,
 	NULL,
+
 	NULL,
 	NULL,
 
 
 	NULL,
 	NULL,
@@ -238,14 +243,13 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
 
 	NULL,
 	NULL,
 	NULL,
 	NULL,
-	NULL,
 
 
 	NULL,
 	NULL,
 	NULL,
 	NULL,
-	NULL,
 
 
 	NULL,
 	NULL,
 	NULL,
 	NULL,
+	NULL,
 
 
 	vector_interface_copy_any_to_any,
 	vector_interface_copy_any_to_any,
 };
 };

+ 1 - 2
examples/fortran90/f90_example.f90

@@ -33,8 +33,7 @@ PROGRAM f90_example
   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
-  REAL(KIND=C_DOUBLE)            :: r, coeff2
+  INTEGER(KIND=C_INT)            :: it,it_tot
 
 
   !Initialization with arbitrary data
   !Initialization with arbitrary data
   Nelt           = 2
   Nelt           = 2

+ 1 - 2
examples/native_fortran/nf_example.f90

@@ -32,8 +32,7 @@ PROGRAM f90_example
   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
   TYPE(type_mesh_elt),POINTER    :: elt   => NULL()
   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
   INTEGER(KIND=C_INT)            :: i,Nelt,res,cpus
   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
-  INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
-  REAL(KIND=C_DOUBLE)            :: r, coeff2
+  INTEGER(KIND=C_INT)            :: it,it_tot
   REAL(KIND=C_DOUBLE),TARGET     :: flops
   REAL(KIND=C_DOUBLE),TARGET     :: flops
 
 
   TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet
   TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet

+ 2 - 1
examples/scheduler/dummy_modular_sched.c

@@ -215,7 +215,8 @@ static struct starpu_codelet dummy_codelet =
 	.cpu_funcs = {dummy_func},
 	.cpu_funcs = {dummy_func},
 	.cpu_funcs_name = {"dummy_func"},
 	.cpu_funcs_name = {"dummy_func"},
 	.cuda_funcs = {dummy_func},
 	.cuda_funcs = {dummy_func},
-        .opencl_funcs = {dummy_func},
+	.opencl_funcs = {dummy_func},
+	.fpga_funcs = {dummy_func},
 	.model = &starpu_perfmodel_nop,
 	.model = &starpu_perfmodel_nop,
 	.nbuffers = 0,
 	.nbuffers = 0,
 	.name = "dummy",
 	.name = "dummy",

+ 40 - 40
include/starpu.h

@@ -254,6 +254,40 @@ struct starpu_conf
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 
 
 	/**
 	/**
+	   If this flag is set, the FPGA workers will be attached to
+	   the FPGA devices specified in the
+	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
+	   affects the FPGA devices in a round-robin fashion. This
+	   can also be specified with the environment variable \ref
+	   STARPU_WORKERS_FPGAID.
+	   (default = 0)
+	*/
+        unsigned use_explicit_workers_fpga_deviceid;
+
+	/**
+	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
+	   is set, this array contains the logical identifiers of the
+	   FPGA devices to be used.
+	*/
+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
+
+#ifdef STARPU_USE_FPGA
+	/**
+           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
+	   This is an array of starpu_max_load, the last of which shall have
+	   file set to NULL. In order to use all available devices,
+	   starpu_max_load::engine_id_pattern can be set to "*", but only the
+           last non-NULL entry can be set so.
+
+	   If this is not set, it is assumed that the basic static SLiC
+           interface is used.
+        */
+	struct starpu_max_load *fpga_load;
+#else
+	void *fpga_load;
+#endif
+
+	/**
 	   If this flag is set, the MIC workers will be attached to
 	   If this flag is set, the MIC workers will be attached to
 	   the MIC devices specified in the array
 	   the MIC devices specified in the array
 	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
 	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
@@ -290,40 +324,6 @@ struct starpu_conf
 	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
 	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
 
 
 	/**
 	/**
-	   If this flag is set, the FPGA workers will be attached to
-	   the FPGA devices specified in the
-	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
-	   affects the FPGA devices in a round-robin fashion. This
-	   can also be specified with the environment variable \ref
-	   STARPU_WORKERS_FPGAID.
-	   (default = 0)
-	*/
-        unsigned use_explicit_workers_fpga_deviceid;
-
-	/**
-	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
-	   is set, this array contains the logical identifiers of the
-	   FPGA devices to be used.
-	*/
-	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
-
-#ifdef STARPU_USE_FPGA
-	/**
-           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
-	   This is an array of starpu_max_load, the last of which shall have
-	   file set to NULL. In order to use all available devices,
-	   starpu_max_load::engine_id_pattern can be set to "*", but only the
-           last non-NULL entry can be set so.
-
-	   If this is not set, it is assumed that the basic static SLiC
-           interface is used.
-        */
-	struct starpu_max_load *fpga_load;
-#else
-	void *fpga_files;
-#endif
-
-	/**
 	   If this flag is set, StarPU will recalibrate the bus.  If
 	   If this flag is set, StarPU will recalibrate the bus.  If
 	   this value is equal to -1, the default value is used. This
 	   this value is equal to -1, the default value is used. This
 	   can also be specified with the environment variable \ref
 	   can also be specified with the environment variable \ref
@@ -658,6 +658,12 @@ int starpu_asynchronous_cuda_copy_disabled(void);
 int starpu_asynchronous_opencl_copy_disabled(void);
 int starpu_asynchronous_opencl_copy_disabled(void);
 
 
 /**
 /**
+   Return 1 if asynchronous data transfers between CPU and FPGA
+   devices are disabled.
+*/
+int starpu_asynchronous_fpga_copy_disabled(void);
+
+/**
    Return 1 if asynchronous data transfers between CPU and MIC devices
    Return 1 if asynchronous data transfers between CPU and MIC devices
    are disabled.
    are disabled.
 */
 */
@@ -669,12 +675,6 @@ int starpu_asynchronous_mic_copy_disabled(void);
 */
 */
 int starpu_asynchronous_mpi_ms_copy_disabled(void);
 int starpu_asynchronous_mpi_ms_copy_disabled(void);
 
 
-/**
-   Return 1 if asynchronous data transfers between CPU and FPGA
-   devices are disabled.
-*/
-int starpu_asynchronous_fpga_copy_disabled(void);
-
 void starpu_display_stats(void);
 void starpu_display_stats(void);
 
 
 void starpu_get_version(int *major, int *minor, int *release);
 void starpu_get_version(int *major, int *minor, int *release);

+ 14 - 14
include/starpu_config.h.in

@@ -65,6 +65,13 @@
 #undef STARPU_USE_OPENCL
 #undef STARPU_USE_OPENCL
 
 
 /**
 /**
+   Defined when StarPU has been installed with FPGA support. It should
+   be used in your code to detect the availability of FPGA.
+   @ingroup API_FPGA_Extensions
+*/
+#undef STARPU_USE_FPGA
+
+/**
    Defined when StarPU has been installed with MIC support. It should
    Defined when StarPU has been installed with MIC support. It should
    be used in your code to detect the availability of MIC.
    be used in your code to detect the availability of MIC.
    @ingroup API_MIC_Extensions
    @ingroup API_MIC_Extensions
@@ -80,13 +87,6 @@
 #undef STARPU_USE_MPI_MASTER_SLAVE
 #undef STARPU_USE_MPI_MASTER_SLAVE
 
 
 /**
 /**
-   Defined when StarPU has been installed with FPGA support. It should
-   be used in your code to detect the availability of FPGA.
-   @ingroup API_FPGA_Extensions
-*/
-#undef STARPU_USE_FPGA
-
-/**
    Defined when StarPU has been installed with OpenMP Runtime support.
    Defined when StarPU has been installed with OpenMP Runtime support.
    It should be used in your code to detect the availability of the
    It should be used in your code to detect the availability of the
    runtime support for OpenMP.
    runtime support for OpenMP.
@@ -224,13 +224,6 @@
 #undef STARPU_MAXCUDADEVS
 #undef STARPU_MAXCUDADEVS
 
 
 /**
 /**
-   Define the maximum number of FPGA devices that are supported by
-   StarPU.
-   @ingroup API_FPGA_Extensions
- */
-#undef STARPU_MAXFPGADEVS
-
-/**
    Define the maximum number of OpenCL devices that are supported by
    Define the maximum number of OpenCL devices that are supported by
    StarPU.
    StarPU.
    @ingroup API_OpenCL_Extensions
    @ingroup API_OpenCL_Extensions
@@ -238,6 +231,13 @@
 #undef STARPU_MAXOPENCLDEVS
 #undef STARPU_MAXOPENCLDEVS
 
 
 /**
 /**
+   Define the maximum number of FPGA devices that are supported by
+   StarPU.
+   @ingroup API_FPGA_Extensions
+ */
+#undef STARPU_MAXFPGADEVS
+
+/**
    Define the maximum number of MIC devices that are supported by
    Define the maximum number of MIC devices that are supported by
    StarPU.
    StarPU.
    @ingroup API_MIC_Extensions
    @ingroup API_MIC_Extensions

+ 40 - 54
include/starpu_data_interfaces.h

@@ -124,16 +124,16 @@ struct starpu_data_copy_methods
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node MIC node. Return 0 on success.
+	   dst_node FPGA node. Return 0 on success.
 	*/
 	*/
-	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node FPGA node. Return 0 on success.
+	   dst_node MIC node. Return 0 on success.
 	*/
 	*/
-	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
@@ -151,13 +151,6 @@ struct starpu_data_copy_methods
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node CUDA node to the \p dst_interface interface on the \p
-	   dst_node OpenCL node. Return 0 on success.
-	*/
-	int (*cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
-
-	/**
-	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node OpenCL node to the \p dst_interface interface on the
 	   \p src_node OpenCL node to the \p dst_interface interface on the
 	   \p dst_node CPU node. Return 0 on success.
 	   \p dst_node CPU node. Return 0 on success.
 	*/
 	*/
@@ -166,30 +159,23 @@ struct starpu_data_copy_methods
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node OpenCL node to the \p dst_interface interface on the
 	   \p src_node OpenCL node to the \p dst_interface interface on the
-	   \p dst_node CUDA node. Return 0 on success.
-	*/
-	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
-
-	/**
-	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node OpenCL node to the \p dst_interface interface on the
 	   \p dst_node OpenCL node. Return 0 on success.
 	   \p dst_node OpenCL node. Return 0 on success.
 	*/
 	*/
 	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MIC node to the \p dst_interface interface on the \p
+	   \p src_node FPGA node to the \p dst_interface interface on the \p
 	   dst_node CPU node. Return 0 on success.
 	   dst_node CPU node. Return 0 on success.
 	*/
 	*/
-	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node FPGA node to the \p dst_interface interface on the \p
+	   \p src_node MIC node to the \p dst_interface interface on the \p
 	   dst_node CPU node. Return 0 on success.
 	   dst_node CPU node. Return 0 on success.
 	*/
 	*/
-	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
@@ -288,32 +274,22 @@ struct starpu_data_copy_methods
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node MPI Slave node, with the given even. Must return 0 if the
-	   transfer was actually completed completely synchronously, or
-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
-	   should be awaited for by the core.
-	*/
-	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
-
-	/**
-	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MPI Slave node to the \p dst_interface interface on
-	   the \p dst_node CPU node, with the given event. Must return 0 if
-	   the transfer was actually completed completely synchronously, or
-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
-	   should be awaited for by the core.
+	   dst_node FPGA node. Must return 0 if the transfer was actually
+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
+	   some transfers are still ongoing and should be awaited for by the
+	   core.
 	*/
 	*/
-	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MPI Slave node to the \p dst_interface interface on
-	   the \p dst_node MPI Slave node, using the given stream. Must
-	   return 0 if the transfer was actually completed completely
-	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
-	   still ongoing and should be awaited for by the core.
+	   \p src_node FPGA node to the \p dst_interface interface on the \p
+	   dst_node CPU node. Must return 0 if the transfer was actually
+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
+	   some transfers are still ongoing and should be awaited for by the
+	   core.
 	*/
 	*/
-	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
@@ -338,22 +314,32 @@ struct starpu_data_copy_methods
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node FPGA node. Must return 0 if the transfer was actually
-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
-	   some transfers are still ongoing and should be awaited for by the
-	   core.
+	   dst_node MPI Slave node, with the given even. Must return 0 if the
+	   transfer was actually completed completely synchronously, or
+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
+	   should be awaited for by the core.
 	*/
 	*/
-	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node FPGA node to the \p dst_interface interface on the \p
-	   dst_node CPU node. Must return 0 if the transfer was actually
-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
-	   some transfers are still ongoing and should be awaited for by the
-	   core.
+	   \p src_node MPI Slave node to the \p dst_interface interface on
+	   the \p dst_node CPU node, with the given event. Must return 0 if
+	   the transfer was actually completed completely synchronously, or
+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
+	   should be awaited for by the core.
 	*/
 	*/
-	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+
+	/**
+	   Define how to copy data from the \p src_interface interface on the
+	   \p src_node MPI Slave node to the \p dst_interface interface on
+	   the \p dst_node MPI Slave node, using the given stream. Must
+	   return 0 if the transfer was actually completed completely
+	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
+	   still ongoing and should be awaited for by the core.
+	*/
+	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
 
 
 	/**
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   Define how to copy data from the \p src_interface interface on the

+ 7 - 4
include/starpu_driver.h

@@ -37,7 +37,13 @@ extern "C"
 */
 */
 
 
 /**
 /**
-   structure for a driver
+   Pre-initialize drivers
+   So as to register information on device types, memory types, etc.
+*/
+void starpu_drivers_preinit(void);
+
+/**
+   structure for designating a given driver
 */
 */
 struct starpu_driver
 struct starpu_driver
 {
 {
@@ -53,11 +59,8 @@ struct starpu_driver
 	{
 	{
 		unsigned cpu_id;
 		unsigned cpu_id;
 		unsigned cuda_id;
 		unsigned cuda_id;
-		unsigned fpga_id;
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 		cl_device_id opencl_id;
 		cl_device_id opencl_id;
-#else
-		unsigned opencl_id;
 #endif
 #endif
 	} id;
 	} id;
 };
 };

+ 1 - 3
include/starpu_perfmodel.h

@@ -35,8 +35,6 @@ extern "C"
 struct starpu_task;
 struct starpu_task;
 struct starpu_data_descr;
 struct starpu_data_descr;
 
 
-#define STARPU_NARCH STARPU_ANY_WORKER
-
 /**
 /**
    todo
    todo
 */
 */
@@ -408,7 +406,7 @@ int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, i
 */
 */
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
 
 
-char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
+const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
 
 
 /**
 /**
    Return the architecture name for \p arch
    Return the architecture name for \p arch

+ 36 - 41
include/starpu_task.h

@@ -68,16 +68,16 @@ extern "C"
 /**
 /**
    To be used when setting the field starpu_codelet::where (or
    To be used when setting the field starpu_codelet::where (or
    starpu_task::where) to specify the codelet (or the task) may be
    starpu_task::where) to specify the codelet (or the task) may be
-   executed on a MAX FPGA.
+   executed on a OpenCL processing unit.
 */
 */
-#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
+#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
 
 
 /**
 /**
    To be used when setting the field starpu_codelet::where (or
    To be used when setting the field starpu_codelet::where (or
    starpu_task::where) to specify the codelet (or the task) may be
    starpu_task::where) to specify the codelet (or the task) may be
-   executed on a OpenCL processing unit.
+   executed on a MAX FPGA.
 */
 */
-#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
+#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
 
 
 /**
 /**
    To be used when setting the field starpu_codelet::where (or
    To be used when setting the field starpu_codelet::where (or
@@ -180,14 +180,14 @@ typedef void (*starpu_cpu_func_t)(void **, void*);
 typedef void (*starpu_cuda_func_t)(void **, void*);
 typedef void (*starpu_cuda_func_t)(void **, void*);
 
 
 /**
 /**
-   FPGA implementation of a codelet.
+   OpenCL implementation of a codelet.
 */
 */
-typedef void (*starpu_fpga_func_t)(void **, void*);
+typedef void (*starpu_opencl_func_t)(void **, void*);
 
 
 /**
 /**
-   OpenCL implementation of a codelet.
+   FPGA implementation of a codelet.
 */
 */
-typedef void (*starpu_opencl_func_t)(void **, void*);
+typedef void (*starpu_fpga_func_t)(void **, void*);
 
 
 /**
 /**
    MIC implementation of a codelet.
    MIC implementation of a codelet.
@@ -229,21 +229,21 @@ typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
 
 
 /**
 /**
    @deprecated
    @deprecated
-   Setting the field starpu_codelet::fpga_func with this macro
+   Setting the field starpu_codelet::opencl_func with this macro
    indicates the codelet will have several implementations. The use of
    indicates the codelet will have several implementations. The use of
    this macro is deprecated. One should always only define the field
    this macro is deprecated. One should always only define the field
-   starpu_codelet::fpga_funcs.
+   starpu_codelet::opencl_funcs.
 */
 */
-#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
+#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
 
 
 /**
 /**
    @deprecated
    @deprecated
-   Setting the field starpu_codelet::opencl_func with this macro
+   Setting the field starpu_codelet::fpga_func with this macro
    indicates the codelet will have several implementations. The use of
    indicates the codelet will have several implementations. The use of
    this macro is deprecated. One should always only define the field
    this macro is deprecated. One should always only define the field
-   starpu_codelet::opencl_funcs.
+   starpu_codelet::fpga_funcs.
 */
 */
-#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
+#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
 
 
 /**
 /**
    Value to set in starpu_codelet::nbuffers to specify that the
    Value to set in starpu_codelet::nbuffers to specify that the
@@ -349,19 +349,19 @@ struct starpu_codelet
 	*/
 	*/
 	starpu_cuda_func_t cuda_func STARPU_DEPRECATED;
 	starpu_cuda_func_t cuda_func STARPU_DEPRECATED;
 
 
-        /**
+	/**
 	   @deprecated
 	   @deprecated
 	   Optional field which has been made deprecated. One should
 	   Optional field which has been made deprecated. One should
-	   use instead the starpu_codelet::fpga_funcs field.
+	   use instead the starpu_codelet::opencl_funcs field.
 	*/
 	*/
-	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
+	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
 
 
         /**
         /**
 	   @deprecated
 	   @deprecated
 	   Optional field which has been made deprecated. One should
 	   Optional field which has been made deprecated. One should
-	   use instead the starpu_codelet::opencl_funcs field.
+	   use instead the starpu_codelet::fpga_funcs field.
 	*/
 	*/
-	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
+	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
 
 
 	/**
 	/**
 	   Optional array of function pointers to the CPU
 	   Optional array of function pointers to the CPU
@@ -396,23 +396,6 @@ struct starpu_codelet
 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
 
 
 	/**
 	/**
-           Optional array of function pointers to the FPGA
-           implementations of the codelet. The functions prototype
-           must be:
-           \code{.c}
-           void fpga_func(void *buffers[], void *cl_arg)
-           \endcode
-           The first argument being the array of data managed by the
-           data management library, and the second argument is a
-           pointer to the argument passed from the field
-           starpu_task::cl_arg. If the field starpu_codelet::where is
-           set, then the field starpu_codelet::fpga_funcs is ignored if
-           ::STARPU_FPGA does not appear in the field
-           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
-        */
-	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
-
-	/**
 	   Optional array of flags for CUDA execution. They specify
 	   Optional array of flags for CUDA execution. They specify
 	   some semantic details about CUDA kernel execution, such as
 	   some semantic details about CUDA kernel execution, such as
 	   asynchronous execution.
 	   asynchronous execution.
@@ -441,6 +424,23 @@ struct starpu_codelet
 	char opencl_flags[STARPU_MAXIMPLEMENTATIONS];
 	char opencl_flags[STARPU_MAXIMPLEMENTATIONS];
 
 
 	/**
 	/**
+           Optional array of function pointers to the FPGA
+           implementations of the codelet. The functions prototype
+           must be:
+           \code{.c}
+           void fpga_func(void *buffers[], void *cl_arg)
+           \endcode
+           The first argument being the array of data managed by the
+           data management library, and the second argument is a
+           pointer to the argument passed from the field
+           starpu_task::cl_arg. If the field starpu_codelet::where is
+           set, then the field starpu_codelet::fpga_funcs is ignored if
+           ::STARPU_FPGA does not appear in the field
+           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
+        */
+	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
+
+	/**
 	   Optional array of function pointers to a function which
 	   Optional array of function pointers to a function which
 	   returns the MIC implementation of the codelet. The
 	   returns the MIC implementation of the codelet. The
 	   functions prototype must be:
 	   functions prototype must be:
@@ -482,11 +482,6 @@ struct starpu_codelet
 	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 
 
 	/**
 	/**
-	   fpga kernel type
-        */
-	char *fpga_kernel_type[STARPU_MAXIMPLEMENTATIONS];
-
-	/**
 	   Specify the number of arguments taken by the codelet. These
 	   Specify the number of arguments taken by the codelet. These
 	   arguments are managed by the DSM and are accessed from the
 	   arguments are managed by the DSM and are accessed from the
 	   <c>void *buffers[]</c> array. The constant argument passed
 	   <c>void *buffers[]</c> array. The constant argument passed

+ 15 - 8
include/starpu_worker.h

@@ -47,10 +47,11 @@ enum starpu_node_kind
 	STARPU_CPU_RAM=1,
 	STARPU_CPU_RAM=1,
 	STARPU_CUDA_RAM=2,
 	STARPU_CUDA_RAM=2,
 	STARPU_OPENCL_RAM=3,
 	STARPU_OPENCL_RAM=3,
-	STARPU_DISK_RAM=4,
-	STARPU_MIC_RAM=5,
-	STARPU_FPGA_RAM=6,
-	STARPU_MPI_MS_RAM=7
+	STARPU_FPGA_RAM=4,
+	STARPU_DISK_RAM=5,
+	STARPU_MIC_RAM=6,
+	STARPU_MPI_MS_RAM=7,
+	STARPU_MAX_RAM=7
 };
 };
 
 
 /**
 /**
@@ -65,10 +66,10 @@ enum starpu_worker_archtype
 	STARPU_CPU_WORKER=0,        /**< CPU core */
 	STARPU_CPU_WORKER=0,        /**< CPU core */
 	STARPU_CUDA_WORKER=1,       /**< NVIDIA CUDA device */
 	STARPU_CUDA_WORKER=1,       /**< NVIDIA CUDA device */
 	STARPU_OPENCL_WORKER=2,     /**< OpenCL device */
 	STARPU_OPENCL_WORKER=2,     /**< OpenCL device */
-	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
 	STARPU_FPGA_WORKER=4,       /**< FPGA device */
 	STARPU_FPGA_WORKER=4,       /**< FPGA device */
+	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
 	STARPU_MPI_MS_WORKER=5,     /**< MPI Slave device */
 	STARPU_MPI_MS_WORKER=5,     /**< MPI Slave device */
-	STARPU_MAX_WORKER=5,        /**< maximum value of STARPU_*_WORKER */
+	STARPU_NARCH = 6,           /**< Number of arch types */
 	STARPU_ANY_WORKER=255       /**< any worker, used in the hypervisor */
 	STARPU_ANY_WORKER=255       /**< any worker, used in the hypervisor */
 };
 };
 
 
@@ -318,9 +319,9 @@ unsigned starpu_worker_is_slave_somewhere(int workerid);
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
 
 
 /**
 /**
-   Return worker \p type as a trivial string (CPU, CUDA, etc.)
+   Return worker \p type as a string suitable for environment variable names (CPU, CUDA, etc.)
 */
 */
-const char *starpu_worker_get_type_as_short_string(enum starpu_worker_archtype type);
+const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type);
 
 
 int starpu_bindid_get_workerids(int bindid, int **workerids);
 int starpu_bindid_get_workerids(int bindid, int **workerids);
 
 
@@ -388,6 +389,12 @@ enum starpu_node_kind starpu_node_get_kind(unsigned node);
 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind);
 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind);
 
 
 /**
 /**
+   Return the type of memory node that arch type \p type operates on
+  */
+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type);
+
+
+/**
    @name Scheduling operations
    @name Scheduling operations
    @{
    @{
 */
 */

+ 10 - 6
src/Makefile.am

@@ -138,9 +138,9 @@ noinst_HEADERS = 						\
 	drivers/mp_common/sink_common.h				\
 	drivers/mp_common/sink_common.h				\
 	drivers/cpu/driver_cpu.h				\
 	drivers/cpu/driver_cpu.h				\
 	drivers/cuda/driver_cuda.h				\
 	drivers/cuda/driver_cuda.h				\
-	drivers/max/driver_fpga.h				\
 	drivers/opencl/driver_opencl.h				\
 	drivers/opencl/driver_opencl.h				\
 	drivers/opencl/driver_opencl_utils.h			\
 	drivers/opencl/driver_opencl_utils.h			\
+	drivers/max/driver_fpga.h				\
 	debug/starpu_debug_helpers.h				\
 	debug/starpu_debug_helpers.h				\
 	drivers/mic/driver_mic_common.h				\
 	drivers/mic/driver_mic_common.h				\
 	drivers/mic/driver_mic_source.h				\
 	drivers/mic/driver_mic_source.h				\
@@ -156,7 +156,6 @@ noinst_HEADERS = 						\
 	util/starpu_clusters_create.h				\
 	util/starpu_clusters_create.h				\
 	util/starpu_task_insert_utils.h				\
 	util/starpu_task_insert_utils.h				\
 	util/starpu_data_cpy.h					\
 	util/starpu_data_cpy.h					\
-	starpu_parameters.h					\
 	sched_policies/prio_deque.h				\
 	sched_policies/prio_deque.h				\
 	sched_policies/sched_component.h
 	sched_policies/sched_component.h
 
 
@@ -322,16 +321,13 @@ if STARPU_HAVE_LEVELDB
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_leveldb.cpp
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_leveldb.cpp
 endif
 endif
 
 
-if STARPU_USE_FPGA
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
-endif
-
 if STARPU_HAVE_HDF5
 if STARPU_HAVE_HDF5
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_hdf5.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_hdf5.c
 endif
 endif
 
 
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cpu/driver_cpu.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cpu/driver_cpu.c
 
 
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/driver_cuda_init.c
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/driver_cuda.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/driver_cuda.c
 else
 else
@@ -343,6 +339,7 @@ endif
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
 
 
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl_init.c
 if STARPU_USE_OPENCL
 if STARPU_USE_OPENCL
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl_utils.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl_utils.c
@@ -352,6 +349,11 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.
 endif
 endif
 endif
 endif
 
 
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga_init.c
+if STARPU_USE_FPGA
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
+endif
+
 if STARPU_LINUX_SYS
 if STARPU_LINUX_SYS
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_unistd_o_direct.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_unistd_o_direct.c
 endif
 endif
@@ -387,6 +389,7 @@ endif
 #										#
 #										#
 #########################################
 #########################################
 
 
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_init.c
 if STARPU_USE_MIC
 if STARPU_USE_MIC
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_common.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_common.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_source.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_source.c
@@ -400,6 +403,7 @@ endif
 #                                       #
 #                                       #
 #########################################
 #########################################
 
 
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_init.c
 if STARPU_USE_MPI_MASTER_SLAVE
 if STARPU_USE_MPI_MASTER_SLAVE
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_common.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_common.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_source.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mpi/driver_mpi_source.c

+ 4 - 10
src/common/fxt.h

@@ -37,13 +37,8 @@
 #include <starpu.h>
 #include <starpu.h>
 
 
 /* some key to identify the worker kind */
 /* some key to identify the worker kind */
-#define _STARPU_FUT_APPS_KEY	0x100
-#define _STARPU_FUT_CPU_KEY	0x101
-#define _STARPU_FUT_CUDA_KEY	0x102
-#define _STARPU_FUT_FPGA_KEY	0x109
-#define _STARPU_FUT_OPENCL_KEY	0x103
-#define _STARPU_FUT_MIC_KEY	0x104
-#define _STARPU_FUT_MPI_KEY	0x106
+#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100)
+#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100)
 
 
 #define _STARPU_FUT_WORKER_INIT_START	0x5100
 #define _STARPU_FUT_WORKER_INIT_START	0x5100
 #define _STARPU_FUT_WORKER_INIT_END	0x5101
 #define _STARPU_FUT_WORKER_INIT_END	0x5101
@@ -738,10 +733,9 @@ do {									\
 		FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \
 		FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \
 } while (0)
 } while (0)
 
 
-/* workerkind = _STARPU_FUT_CPU_KEY for instance */
 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	do {\
 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	do {\
 	if (_starpu_fxt_started) \
 	if (_starpu_fxt_started) \
-		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, workerkind, workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
+		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
 } while (0)
 } while (0)
 
 
 #define _STARPU_TRACE_WORKER_INIT_END(__workerid)		do {\
 #define _STARPU_TRACE_WORKER_INIT_END(__workerid)		do {\
@@ -931,7 +925,7 @@ do {										\
 
 
 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		do {\
 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		do {\
 	if (_starpu_fxt_started) \
 	if (_starpu_fxt_started) \
-		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid()); \
+		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_WORKER_SCHEDULING_START	\
 #define _STARPU_TRACE_WORKER_SCHEDULING_START	\

+ 1 - 1
src/core/detect_combined_workers.c

@@ -252,7 +252,7 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 	int cpu_workers[STARPU_NMAXWORKERS];
 	int cpu_workers[STARPU_NMAXWORKERS];
 	unsigned ncpus = 0;
 	unsigned ncpus = 0;
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
-	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
+	unsigned nb_mics = _starpu_get_machine_config()->topology.ndevices[STARPU_MIC_WORKER];
 	unsigned * nmics_table;
 	unsigned * nmics_table;
 	int * mic_id;
 	int * mic_id;
 	int ** mic_workers;
 	int ** mic_workers;

+ 0 - 1
src/core/disk_ops/disk_leveldb.cpp

@@ -27,7 +27,6 @@
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
-#include <starpu_parameters.h>
 
 
 #define NITER	_starpu_calibration_minimum
 #define NITER	_starpu_calibration_minimum
 
 

+ 0 - 1
src/core/disk_ops/disk_stdio.c

@@ -28,7 +28,6 @@
 #include <datawizard/copy_driver.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_nodes.h>
 #include <datawizard/memory_nodes.h>
-#include <starpu_parameters.h>
 
 
 #ifdef STARPU_HAVE_WINDOWS
 #ifdef STARPU_HAVE_WINDOWS
 #  include <io.h>
 #  include <io.h>

+ 0 - 1
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -37,7 +37,6 @@
 #include <datawizard/copy_driver.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/data_request.h>
 #include <datawizard/data_request.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
-#include <starpu_parameters.h>
 #include <common/uthash.h>
 #include <common/uthash.h>
 
 
 #ifdef STARPU_HAVE_WINDOWS
 #ifdef STARPU_HAVE_WINDOWS

+ 1 - 5
src/core/jobs.h

@@ -52,11 +52,7 @@ struct _starpu_worker;
 /** codelet function */
 /** codelet function */
 typedef void (*_starpu_cl_func_t)(void **, void *);
 typedef void (*_starpu_cl_func_t)(void **, void *);
 
 
-#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->where & STARPU_CPU)
-#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->where & STARPU_CUDA)
-#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->where & STARPU_OPENCL)
-#define _STARPU_FPGA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_FPGA)
-#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->where & STARPU_MIC)
+#define _STARPU_MAY_PERFORM(j, arch)	((j)->task->where & STARPU_##arch)
 
 
 struct _starpu_data_descr
 struct _starpu_data_descr
 {
 {

+ 3 - 13
src/core/perfmodel/perfmodel.c

@@ -126,18 +126,8 @@ double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch* perf_arc
 	int dev;
 	int dev;
 	for(dev = 0; dev < perf_arch->ndevices; dev++)
 	for(dev = 0; dev < perf_arch->ndevices; dev++)
 	{
 	{
-		double coef = 0.0;
-		if (perf_arch->devices[dev].type == STARPU_CPU_WORKER)
-			coef = _STARPU_CPU_ALPHA;
-		else if (perf_arch->devices[dev].type == STARPU_CUDA_WORKER)
-			coef = _STARPU_CUDA_ALPHA;
-		else if (perf_arch->devices[dev].type == STARPU_OPENCL_WORKER)
-			coef = _STARPU_OPENCL_ALPHA;
-		else if (perf_arch->devices[dev].type == STARPU_MIC_WORKER)
-			coef = _STARPU_MIC_ALPHA;
-		else if (perf_arch->devices[dev].type == STARPU_MPI_MS_WORKER)
-			coef = _STARPU_MPI_MS_ALPHA;
-
+		enum starpu_worker_archtype archtype = perf_arch->devices[dev].type;
+		double coef = starpu_driver_info[archtype].alpha;
 		speedup += coef * (perf_arch->devices[dev].ncores);
 		speedup += coef * (perf_arch->devices[dev].ncores);
 	}
 	}
 	return speedup;
 	return speedup;
@@ -305,7 +295,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 		if (!_starpu_data_is_multiformat_handle(handle))
 		if (!_starpu_data_is_multiformat_handle(handle))
 			continue;
 			continue;
 
 
-		node_kind = _starpu_worker_get_node_kind(arch->devices[0].type);
+		node_kind = starpu_worker_get_memory_node_kind(arch->devices[0].type);
 		if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind))
 		if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind))
 			continue;
 			continue;
 
 

+ 4 - 4
src/core/perfmodel/perfmodel.h

@@ -87,10 +87,6 @@ void _starpu_load_bus_performance_files(void);
 void _starpu_set_calibrate_flag(unsigned val);
 void _starpu_set_calibrate_flag(unsigned val);
 unsigned _starpu_get_calibrate_flag(void);
 unsigned _starpu_get_calibrate_flag(void);
 
 
-#if defined(STARPU_USE_FPGA)
-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
-#endif
-
 #if defined(STARPU_USE_CUDA)
 #if defined(STARPU_USE_CUDA)
 unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 #endif
 #endif
@@ -98,6 +94,10 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid);
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid);
 #endif
 #endif
 
 
+#if defined(STARPU_USE_FPGA)
+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
+#endif
+
 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name);
 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name);
 
 
 void _starpu_write_double(FILE *f, const char *format, double val);
 void _starpu_write_double(FILE *f, const char *format, double val);

+ 21 - 18
src/core/perfmodel/perfmodel_bus.c

@@ -119,22 +119,17 @@ static char cudadev_direct[STARPU_MAXNODES][STARPU_MAXNODES];
 static uint64_t opencl_size[STARPU_MAXCUDADEVS];
 static uint64_t opencl_size[STARPU_MAXCUDADEVS];
 #endif
 #endif
 
 
-#ifdef STARPU_USE_FPGA
-/* preference order of cores (logical indexes) */
-static unsigned fpga_affinity_matrix[STARPU_MAXFPGADEVS][STARPU_MAXCPUS];
-static double fpgadev_timing_htod[STARPU_MAXNODES] = {0.0};
-static double fpgadev_latency_htod[STARPU_MAXNODES] = {0.0};
-static double fpgadev_timing_dtoh[STARPU_MAXNODES] = {0.0};
-static double fpgadev_latency_dtoh[STARPU_MAXNODES] = {0.0};
-static struct dev_timing fpgadev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
-#endif
-
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 /* preference order of cores (logical indexes) */
 /* preference order of cores (logical indexes) */
 static unsigned opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXNUMANODES];
 static unsigned opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXNUMANODES];
 static struct dev_timing opencldev_timing_per_numa[STARPU_MAXOPENCLDEVS*STARPU_MAXNUMANODES];
 static struct dev_timing opencldev_timing_per_numa[STARPU_MAXOPENCLDEVS*STARPU_MAXNUMANODES];
 #endif
 #endif
 
 
+#ifdef STARPU_USE_FPGA
+/* preference order of cores (logical indexes) */
+static unsigned fpga_affinity_matrix[STARPU_MAXFPGADEVS][STARPU_MAXCPUS];
+#endif
+
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
 static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
 static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
 static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
@@ -301,7 +296,11 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
 
 	cudaFree(d_buffer);
 	cudaFree(d_buffer);
 
 
+#if CUDART_VERSION >= 4000
+	cudaDeviceReset();
+#else
 	cudaThreadExit();
 	cudaThreadExit();
+#endif
 }
 }
 
 
 #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
 #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
@@ -411,7 +410,11 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	cudaSetDevice(src);
 	cudaSetDevice(src);
 	cudaFree(s_buffer);
 	cudaFree(s_buffer);
 
 
+#if CUDART_VERSION >= 4000
+	cudaDeviceReset();
+#else
 	cudaThreadExit();
 	cudaThreadExit();
+#endif
 }
 }
 #endif
 #endif
 #endif
 #endif
@@ -1122,13 +1125,6 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid)
 }
 }
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-#ifdef STARPU_USE_FPGA
-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
-{
-        return fpga_affinity_matrix[fpgaid];
-}
-#endif /* STARPU_USE_FPGA */
-
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 {
 {
@@ -1136,6 +1132,13 @@ unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 }
 }
 #endif /* STARPU_USE_OPENCL */
 #endif /* STARPU_USE_OPENCL */
 
 
+#ifdef STARPU_USE_FPGA
+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
+{
+        return fpga_affinity_matrix[fpgaid];
+}
+#endif /* STARPU_USE_FPGA */
+
 void starpu_bus_print_affinity(FILE *f)
 void starpu_bus_print_affinity(FILE *f)
 {
 {
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
@@ -3055,7 +3058,7 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 	int busid = starpu_bus_get_id(src_node, dst_node);
 	int busid = starpu_bus_get_id(src_node, dst_node);
 	int direct = starpu_bus_get_direct(busid);
 	int direct = starpu_bus_get_direct(busid);
 #endif
 #endif
-	float ngpus = topology->ncudagpus+topology->nopenclgpus;
+	float ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
 #warning FIXME: ngpus should not be used e.g. for slow disk transfers...
 #warning FIXME: ngpus should not be used e.g. for slow disk transfers...
 #endif
 #endif

+ 25 - 50
src/core/perfmodel/perfmodel_history.c

@@ -33,7 +33,6 @@
 #include <core/perfmodel/regression.h>
 #include <core/perfmodel/regression.h>
 #include <core/perfmodel/multiple_regression.h>
 #include <core/perfmodel/multiple_regression.h>
 #include <common/config.h>
 #include <common/config.h>
-#include <starpu_parameters.h>
 #include <common/uthash.h>
 #include <common/uthash.h>
 #include <limits.h>
 #include <limits.h>
 #include <core/task.h>
 #include <core/task.h>
@@ -54,7 +53,7 @@ static int current_arch_comb;
 static int nb_arch_combs;
 static int nb_arch_combs;
 static starpu_pthread_rwlock_t arch_combs_mutex;
 static starpu_pthread_rwlock_t arch_combs_mutex;
 static int historymaxerror;
 static int historymaxerror;
-static char ignore_devid[STARPU_ANY_WORKER];
+static char ignore_devid[STARPU_NARCH];
 
 
 /* How many executions a codelet will have to be measured before we
 /* How many executions a codelet will have to be measured before we
  * consider that calibration will provide a value good enough for scheduling */
  * consider that calibration will provide a value good enough for scheduling */
@@ -1030,16 +1029,9 @@ void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
 		fprintf(f, "  <combination>\n");
 		fprintf(f, "  <combination>\n");
 		for(dev = 0; dev < ndevices; dev++)
 		for(dev = 0; dev < ndevices; dev++)
 		{
 		{
-			const char *type;
-			switch (arch_combs[comb]->devices[dev].type)
-			{
-				case STARPU_CPU_WORKER: type = "CPU"; break;
-				case STARPU_CUDA_WORKER: type = "CUDA"; break;
-				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
-				case STARPU_MIC_WORKER: type = "MIC"; break;
-				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
-				default: STARPU_ASSERT(0);
-			}
+			enum starpu_worker_archtype archtype = arch_combs[comb]->devices[dev].type;
+			const char *type = starpu_driver_info[archtype].name_upper;
+			STARPU_ASSERT(type);
 			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
 			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
 					type,
 					type,
 					arch_combs[comb]->devices[dev].devid);
 					arch_combs[comb]->devices[dev].devid);
@@ -1225,21 +1217,22 @@ void _starpu_initialize_registered_performance_models(void)
 	starpu_perfmodel_initialize();
 	starpu_perfmodel_initialize();
 
 
 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
-	unsigned ncores = conf->topology.nhwcpus;
-	unsigned ncuda =  conf->topology.nhwcudagpus;
-	unsigned nopencl = conf->topology.nhwopenclgpus;
+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
 	unsigned nmic = 0;
 	unsigned nmic = 0;
+	enum starpu_worker_archtype archtype;
 #if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
 #if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
 	unsigned i;
 	unsigned i;
 #endif
 #endif
 #if STARPU_MAXMICDEVS > 0
 #if STARPU_MAXMICDEVS > 0
-	for(i = 0; i < conf->topology.nhwmicdevices; i++)
-		nmic += conf->topology.nhwmiccores[i];
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
 #endif
 #endif
 	unsigned nmpi = 0;
 	unsigned nmpi = 0;
 #if STARPU_MAXMPIDEVS > 0
 #if STARPU_MAXMPIDEVS > 0
-	for(i = 0; i < conf->topology.nhwmpidevices; i++)
-		nmpi += conf->topology.nhwmpicores[i];
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
 #endif
 #endif
 
 
 	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
 	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
@@ -1249,11 +1242,14 @@ void _starpu_initialize_registered_performance_models(void)
 	current_arch_comb = 0;
 	current_arch_comb = 0;
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
-	ignore_devid[STARPU_CPU_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
-	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
-	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
-	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);
-	ignore_devid[STARPU_MPI_MS_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS", 0);
+
+	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
+		char name[128];
+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
+	}
 }
 }
 
 
 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
@@ -1546,32 +1542,11 @@ int starpu_perfmodel_deinit(struct starpu_perfmodel *model){
 	return 0;
 	return 0;
 }
 }
 
 
-char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
+const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
 {
 {
-	switch(archtype)
-	{
-		case(STARPU_CPU_WORKER):
-			return "cpu";
-			break;
-		case(STARPU_CUDA_WORKER):
-			return "cuda";
-			break;
-		case(STARPU_OPENCL_WORKER):
-			return "opencl";
-			break;
-		case(STARPU_FPGA_WORKER):
-			return "fpga";
-			break;
-		case(STARPU_MIC_WORKER):
-			return "mic";
-			break;
-		case(STARPU_MPI_MS_WORKER):
-			return "mpi_ms";
-			break;
-		default:
-			STARPU_ABORT();
-			break;
-	}
+	const char *name = starpu_driver_info[archtype].name_lower;
+	STARPU_ASSERT(name);
+	return name;
 }
 }
 
 
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl)
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl)
@@ -2107,7 +2082,7 @@ int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 		fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":"");
 		fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":"");
 		for(device=0 ; device<arch->ndevices ; device++)
 		for(device=0 ; device<arch->ndevices ; device++)
 		{
 		{
-			char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type);
+			const char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type);
 			fprintf(output, "\t\tDevice %d: type: %s - devid: %d - ncores: %d\n", device, name, arch->devices[device].devid, arch->devices[device].ncores);
 			fprintf(output, "\t\tDevice %d: type: %s - devid: %d - ncores: %d\n", device, name, arch->devices[device].devid, arch->devices[device].ncores);
 		}
 		}
 	}
 	}

+ 9 - 10
src/core/sched_policy.c

@@ -629,16 +629,15 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 				&& starpu_get_prefetch_flag()
 				&& starpu_get_prefetch_flag()
 				&& starpu_memory_nodes_get_count() > 1)
 				&& starpu_memory_nodes_get_count() > 1)
 			{
 			{
-				if (task->where == STARPU_CPU && config->cpus_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
-				else if (task->where == STARPU_CUDA && config->cuda_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
-				else if (task->cl->where == STARPU_FPGA && config->fpga_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->fpga_nodeid);
-				else if (task->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
-				else if (task->where == STARPU_MIC && config->mic_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
+				enum starpu_worker_archtype type;
+				for (type = 0; type < STARPU_NARCH; type++)
+				{
+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
+						if (config->arch_nodeid[type] >= 0)
+							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
+						break;
+					}
+				}
 			}
 			}
 
 
 			STARPU_ASSERT(sched_ctx->sched_policy->push_task);
 			STARPU_ASSERT(sched_ctx->sched_policy->push_task);

+ 36 - 45
src/core/task.c

@@ -631,35 +631,6 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 		where |= STARPU_CPU;
 		where |= STARPU_CPU;
 	}
 	}
 
 
-       /* FPGA */
-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
-	{
-		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
-	{
-		cl->fpga_funcs[0] = cl->fpga_func;
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	some_impl = 0;
-	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
-		if (cl->fpga_funcs[i])
-		{
-			some_impl = 1;
-			break;
-		}
-	if (some_impl && cl->fpga_func == 0)
-	{
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	if (some_impl && is_where_unset)
-	{
-		where |= STARPU_FPGA;
-	}
-
-
-
 	/* CUDA */
 	/* CUDA */
 	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
 	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
 	{
 	{
@@ -714,6 +685,35 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 		where |= STARPU_OPENCL;
 		where |= STARPU_OPENCL;
 	}
 	}
 
 
+       /* FPGA */
+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
+	{
+		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
+	{
+		cl->fpga_funcs[0] = cl->fpga_func;
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	some_impl = 0;
+	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
+		if (cl->fpga_funcs[i])
+		{
+			some_impl = 1;
+			break;
+		}
+	if (some_impl && cl->fpga_func == 0)
+	{
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	if (some_impl && is_where_unset)
+	{
+		where |= STARPU_FPGA;
+	}
+
+
+
 	some_impl = 0;
 	some_impl = 0;
 	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 		if (cl->mic_funcs[i])
 		if (cl->mic_funcs[i])
@@ -1484,38 +1484,29 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 	switch (node_kind)
 	switch (node_kind)
 	{
 	{
 		case STARPU_CPU_RAM:
 		case STARPU_CPU_RAM:
+		case STARPU_MIC_RAM:
+		case STARPU_MPI_MS_RAM:
 			switch(starpu_node_get_kind(handle->mf_node))
 			switch(starpu_node_get_kind(handle->mf_node))
 			{
 			{
 				case STARPU_CPU_RAM:
 				case STARPU_CPU_RAM:
-					return 0;
-				case STARPU_CUDA_RAM:      /* Fall through */
-				case STARPU_OPENCL_RAM:
 				case STARPU_MIC_RAM:
 				case STARPU_MIC_RAM:
                                 case STARPU_MPI_MS_RAM:
                                 case STARPU_MPI_MS_RAM:
-					return 1;
+					return 0;
 				default:
 				default:
-					STARPU_ABORT();
+					return 1;
 			}
 			}
 			break;
 			break;
-		case STARPU_CUDA_RAM:    /* Fall through */
-		case STARPU_OPENCL_RAM:
-		case STARPU_MIC_RAM:
-		case STARPU_MPI_MS_RAM:
+		default:
 			switch(starpu_node_get_kind(handle->mf_node))
 			switch(starpu_node_get_kind(handle->mf_node))
 			{
 			{
 				case STARPU_CPU_RAM:
 				case STARPU_CPU_RAM:
-					return 1;
-				case STARPU_CUDA_RAM:
-				case STARPU_OPENCL_RAM:
 				case STARPU_MIC_RAM:
 				case STARPU_MIC_RAM:
                                 case STARPU_MPI_MS_RAM:
                                 case STARPU_MPI_MS_RAM:
-					return 0;
+					return 1;
 				default:
 				default:
-					STARPU_ABORT();
+					return 0;
 			}
 			}
 			break;
 			break;
-		default:
-			STARPU_ABORT();
 	}
 	}
 	/* that instruction should never be reached */
 	/* that instruction should never be reached */
 	return -EINVAL;
 	return -EINVAL;

+ 4 - 9
src/core/task.h

@@ -97,14 +97,14 @@ static inline starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct
 	return cl->cuda_funcs[nimpl];
 	return cl->cuda_funcs[nimpl];
 }
 }
 
 
-static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
+static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
 {
 {
-	return cl->fpga_funcs[nimpl];
+	return cl->opencl_funcs[nimpl];
 }
 }
 
 
-static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
+static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
 {
 {
-	return cl->opencl_funcs[nimpl];
+	return cl->fpga_funcs[nimpl];
 }
 }
 
 
 static inline starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
 static inline starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
@@ -122,11 +122,6 @@ static inline const char *_starpu_task_get_cpu_name_nth_implementation(struct st
 	return cl->cpu_funcs_name[nimpl];
 	return cl->cpu_funcs_name[nimpl];
 }
 }
 
 
-static inline char *_starpu_task_get_fpga_kernel_type_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
-{
-	return cl->fpga_kernel_type[nimpl];
-}
-
 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
 
 

+ 135 - 167
src/core/topology.c

@@ -84,7 +84,7 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid);
 #define STARPU_NUMA_UNINITIALIZED (-2)
 #define STARPU_NUMA_UNINITIALIZED (-2)
 #define STARPU_NUMA_MAIN_RAM (-1)
 #define STARPU_NUMA_MAIN_RAM (-1)
 
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 
 
 struct handle_entry
 struct handle_entry
 {
 {
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  * Discover the topology of the machine
  * Discover the topology of the machine
  */
  */
 
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 						int *current, int *workers_gpuid,
 						int *current, int *workers_gpuid,
 						const char *varname, unsigned nhwgpus,
 						const char *varname, unsigned nhwgpus,
@@ -560,42 +560,18 @@ static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config
 					    &(config->current_cuda_gpuid),
 					    &(config->current_cuda_gpuid),
 					    (int *)topology->workers_cuda_gpuid,
 					    (int *)topology->workers_cuda_gpuid,
 					    "STARPU_WORKERS_CUDAID",
 					    "STARPU_WORKERS_CUDAID",
-					    topology->nhwcudagpus,
+					    topology->nhwdevices[STARPU_CUDA_WORKER],
 					    STARPU_CUDA_WORKER);
 					    STARPU_CUDA_WORKER);
 }
 }
 
 
 static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
 static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
 {
 {
-	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
+	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ndevices[STARPU_CUDA_WORKER]);
 
 
 	return (int)config->topology.workers_cuda_gpuid[i];
 	return (int)config->topology.workers_cuda_gpuid[i];
 }
 }
 #endif
 #endif
 
 
-#if defined(STARPU_USE_FPGA)
-static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
-{
-	struct _starpu_machine_topology *topology = &config->topology;
-	struct starpu_conf *uconf = &config->conf;
-
-        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
-					    ? NULL
-					    : (int *)uconf->workers_fpga_deviceid,
-					    &(config->current_fpga_deviceid),
-					    (int *)topology->workers_fpga_deviceid,
-					    "STARPU_WORKERS_FPGAID",
-					    topology->nhwfpgafpgas,
-					    STARPU_FPGA_WORKER);
-}
-
-static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
-{
-	unsigned i = ((config->current_fpga_deviceid++) % config->topology.nfpgafpgas);
-
-	return (int)config->topology.workers_fpga_deviceid[i];
-}
-#endif
-
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config)
 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config)
 {
 {
@@ -608,7 +584,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 					    &(config->current_opencl_gpuid),
 					    &(config->current_opencl_gpuid),
 					    (int *)topology->workers_opencl_gpuid,
 					    (int *)topology->workers_opencl_gpuid,
 					    "STARPU_WORKERS_OPENCLID",
 					    "STARPU_WORKERS_OPENCLID",
-					    topology->nhwopenclgpus,
+					    topology->nhwdevices[STARPU_OPENCL_WORKER],
 					    STARPU_OPENCL_WORKER);
 					    STARPU_OPENCL_WORKER);
 
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
@@ -671,12 +647,36 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
 
 static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
 static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
 {
 {
-	unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
+	unsigned i = ((config->current_opencl_gpuid++) % config->topology.ndevices[STARPU_OPENCL_WORKER]);
 
 
 	return (int)config->topology.workers_opencl_gpuid[i];
 	return (int)config->topology.workers_opencl_gpuid[i];
 }
 }
 #endif
 #endif
 
 
+#if defined(STARPU_USE_FPGA)
+static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
+{
+	struct _starpu_machine_topology *topology = &config->topology;
+	struct starpu_conf *uconf = &config->conf;
+
+        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
+					    ? NULL
+					    : (int *)uconf->workers_fpga_deviceid,
+					    &(config->current_fpga_deviceid),
+					    (int *)topology->workers_fpga_deviceid,
+					    "STARPU_WORKERS_FPGAID",
+					    topology->nhwdevices[STARPU_FPGA_WORKER],
+					    STARPU_FPGA_WORKER);
+}
+
+static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
+{
+	unsigned i = ((config->current_fpga_deviceid++) % config->topology.ndevices[STARPU_FPGA_WORKER]);
+
+	return (int)config->topology.workers_fpga_deviceid[i];
+}
+#endif
+
 #if 0
 #if 0
 #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
 static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
@@ -690,7 +690,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 					    &(config->current_mic_deviceid),
 					    &(config->current_mic_deviceid),
 					    (int *)topology->workers_mic_deviceid,
 					    (int *)topology->workers_mic_deviceid,
 					    "STARPU_WORKERS_MICID",
 					    "STARPU_WORKERS_MICID",
-					    topology->nhwmiccores,
+					    topology->nhwdevices[STARPU_MIC_WORKER],
 					    STARPU_MIC_WORKER);
 					    STARPU_MIC_WORKER);
 }
 }
 #endif
 #endif
@@ -700,7 +700,7 @@ static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_confi
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
 static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
 {
 {
-	unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
+	unsigned i = ((config->current_mic_deviceid++) % config->topology.ndevices[STARPU_MIC_WORKER]);
 
 
 	return (int)config->topology.workers_mic_deviceid[i];
 	return (int)config->topology.workers_mic_deviceid[i];
 }
 }
@@ -710,7 +710,7 @@ static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *c
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
 static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
 {
 {
-	unsigned i = ((config->current_mpi_deviceid++) % config->topology.nmpidevices);
+	unsigned i = ((config->current_mpi_deviceid++) % config->topology.ndevices[STARPU_MPI_MS_WORKER]);
 
 
 	return (int)config->topology.workers_mpi_ms_deviceid[i];
 	return (int)config->topology.workers_mpi_ms_deviceid[i];
 }
 }
@@ -719,14 +719,14 @@ static void _starpu_init_mpi_topology(struct _starpu_machine_config *config, lon
 {
 {
 	/* Discover the topology of the mpi node identifier by MPI_IDX. That
 	/* Discover the topology of the mpi node identifier by MPI_IDX. That
 	 * means, make this StarPU instance aware of the number of cores available
 	 * means, make this StarPU instance aware of the number of cores available
-	 * on this MPI device. Update the `nhwmpicores' topology field
+	 * on this MPI device. Update the `nhwworker[STARPU_MPI_MS_WORKER]' topology field
 	 * accordingly. */
 	 * accordingly. */
 
 
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct _starpu_machine_topology *topology = &config->topology;
 
 
 	int nbcores;
 	int nbcores;
 	_starpu_src_common_sink_nbcores(_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
 	_starpu_src_common_sink_nbcores(_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
-	topology->nhwmpicores[mpi_idx] = nbcores;
+	topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nbcores;
 }
 }
 
 
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
@@ -736,14 +736,14 @@ static void _starpu_init_mic_topology(struct _starpu_machine_config *config, lon
 {
 {
 	/* Discover the topology of the mic node identifier by MIC_IDX. That
 	/* Discover the topology of the mic node identifier by MIC_IDX. That
 	 * means, make this StarPU instance aware of the number of cores available
 	 * means, make this StarPU instance aware of the number of cores available
-	 * on this MIC device. Update the `nhwmiccores' topology field
+	 * on this MIC device. Update the `nhwworker[STARPU_MIC_WORKER]' topology field
 	 * accordingly. */
 	 * accordingly. */
 
 
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct _starpu_machine_topology *topology = &config->topology;
 
 
 	int nbcores;
 	int nbcores;
 	_starpu_src_common_sink_nbcores(_starpu_mic_nodes[mic_idx], &nbcores);
 	_starpu_src_common_sink_nbcores(_starpu_mic_nodes[mic_idx], &nbcores);
-	topology->nhwmiccores[mic_idx] = nbcores;
+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = nbcores;
 }
 }
 
 
 static int _starpu_init_mic_node(struct _starpu_machine_config *config, int mic_idx,
 static int _starpu_init_mic_node(struct _starpu_machine_config *config, int mic_idx,
@@ -862,7 +862,8 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
 
 	nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
 	nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
 
 
-	topology->nhwcpus = 0;
+	topology->nhwdevices[STARPU_CPU_WORKER] = 1;
+	topology->nhwworker[STARPU_CPU_WORKER][0] = 0;
 	topology->nhwpus = 0;
 	topology->nhwpus = 0;
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
@@ -906,7 +907,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 #endif
 #endif
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-	config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
 #elif defined(STARPU_HAVE_HWLOC)
 #elif defined(STARPU_HAVE_HWLOC)
 	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
 	/* Discover the CPUs relying on the hwloc interface and fills CONFIG
 	 * accordingly. */
 	 * accordingly. */
@@ -926,24 +927,24 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 							 HWLOC_OBJ_PU);
 							 HWLOC_OBJ_PU);
 	}
 	}
 
 
-	topology->nhwcpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
+	topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
 	topology->nhwpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth);
 	topology->nhwpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth);
 
 
 #elif defined(HAVE_SYSCONF)
 #elif defined(HAVE_SYSCONF)
 	/* Discover the CPUs relying on the sysconf(3) function and fills
 	/* Discover the CPUs relying on the sysconf(3) function and fills
 	 * CONFIG accordingly. */
 	 * CONFIG accordingly. */
 
 
-	config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
 
 
 #elif defined(_WIN32)
 #elif defined(_WIN32)
 	/* Discover the CPUs on Cygwin and MinGW systems. */
 	/* Discover the CPUs on Cygwin and MinGW systems. */
 
 
 	SYSTEM_INFO sysinfo;
 	SYSTEM_INFO sysinfo;
 	GetSystemInfo(&sysinfo);
 	GetSystemInfo(&sysinfo);
-	config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
 #else
 #else
 #warning no way to know number of cores, assuming 1
 #warning no way to know number of cores, assuming 1
-	config->topology.nhwcpus = config->topology.nhwpus = 1;
+	config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = 1;
 #endif
 #endif
 
 
 	if (config->conf.ncuda != 0)
 	if (config->conf.ncuda != 0)
@@ -953,7 +954,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
         if (config->conf.nfpga != 0)
         if (config->conf.nfpga != 0)
 		_starpu_fpga_discover_devices(config);
 		_starpu_fpga_discover_devices(config);
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-        config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
+        config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count();
 #endif
 #endif
 
 
 	topology_is_initialized = 1;
 	topology_is_initialized = 1;
@@ -968,7 +969,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 	unsigned i;
 	unsigned i;
 
 
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct _starpu_machine_topology *topology = &config->topology;
-	int nhyperthreads = topology->nhwpus / topology->nhwcpus;
+	int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
 	unsigned bind_on_core = 0;
 	unsigned bind_on_core = 0;
 	int scale = 1;
 	int scale = 1;
 
 
@@ -1032,7 +1033,7 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 						}
 						}
 						else
 						else
 						{
 						{
-							endval = (bind_on_core ? topology->nhwcpus : topology->nhwpus) - 1;
+							endval = (bind_on_core ? topology->nhwworker[STARPU_CPU_WORKER][0] : topology->nhwpus) - 1;
 							if (*strval)
 							if (*strval)
 								strval++;
 								strval++;
 						}
 						}
@@ -1136,7 +1137,7 @@ static inline unsigned _starpu_get_next_bindid(struct _starpu_machine_config *co
 	STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?");
 	STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?");
 
 
 	unsigned current_preferred;
 	unsigned current_preferred;
-	unsigned nhyperthreads = topology->nhwpus / topology->nhwcpus;
+	unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
 	unsigned ncores = topology->nhwpus / nhyperthreads;
 	unsigned ncores = topology->nhwpus / nhyperthreads;
 	unsigned i;
 	unsigned i;
 
 
@@ -1219,7 +1220,7 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 #endif
 #endif
 	_starpu_init_topology(config);
 	_starpu_init_topology(config);
 
 
-	return config->topology.nhwcpus;
+	return config->topology.nhwworker[STARPU_CPU_WORKER][0];
 }
 }
 
 
 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
@@ -1304,7 +1305,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 
 
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct _starpu_machine_topology *topology = &config->topology;
 
 
-	topology->nhwmiccores[mic_idx] = 0;
+	topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = 0;
 
 
 	_starpu_init_mic_topology(config, mic_idx);
 	_starpu_init_mic_topology(config, mic_idx);
 
 
@@ -1316,29 +1317,29 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 	{
 	{
 		/* Nothing was specified, so let's use the number of
 		/* Nothing was specified, so let's use the number of
 		 * detected mic cores. ! */
 		 * detected mic cores. ! */
-		nmiccores = topology->nhwmiccores[mic_idx];
+		nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
 	}
 	}
 	else
 	else
 	{
 	{
-		if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
+		if ((unsigned) nmiccores > topology->nhwworker[STARPU_MIC_WORKER][mic_idx])
 		{
 		{
 			/* The user requires more MIC cores than there is available */
 			/* The user requires more MIC cores than there is available */
-			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
-			nmiccores = topology->nhwmiccores[mic_idx];
+			_STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwworker[STARPU_MIC_WORKER][mic_idx]);
+			nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
 		}
 		}
 	}
 	}
 
 
-	topology->nmiccores[mic_idx] = nmiccores;
-	STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
-			  "topology->nmiccores[mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
-			  mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
+	topology->nworker[STARPU_MIC_WORKER][mic_idx] = nmiccores;
+	STARPU_ASSERT_MSG(topology->nworker[STARPU_MIC_WORKER][mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
+			  "topology->nworker[STARPU_MIC_WORKER][mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
+			  mic_idx, topology->nworker[STARPU_MIC_WORKER][mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
 
 
 	/* _starpu_initialize_workers_mic_deviceid (config); */
 	/* _starpu_initialize_workers_mic_deviceid (config); */
 
 
 	mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
 	mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
-	mic_worker_set[mic_idx].nworkers = topology->nmiccores[mic_idx];
+	mic_worker_set[mic_idx].nworkers = topology->nworker[STARPU_MIC_WORKER][mic_idx];
 	unsigned miccore_id;
 	unsigned miccore_id;
-	for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
+	for (miccore_id = 0; miccore_id < topology->nworker[STARPU_MIC_WORKER][mic_idx]; miccore_id++)
 	{
 	{
 		int worker_idx = topology->nworkers + miccore_id;
 		int worker_idx = topology->nworkers + miccore_id;
 		config->workers[worker_idx].set = &mic_worker_set[mic_idx];
 		config->workers[worker_idx].set = &mic_worker_set[mic_idx];
@@ -1355,7 +1356,7 @@ static void _starpu_init_mic_config(struct _starpu_machine_config *config,
 	}
 	}
 	_starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
 	_starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
 
 
-	topology->nworkers += topology->nmiccores[mic_idx];
+	topology->nworkers += topology->nworker[STARPU_MIC_WORKER][mic_idx];
 }
 }
 
 
 static COIENGINE mic_handles[STARPU_MAXMICDEVS];
 static COIENGINE mic_handles[STARPU_MAXMICDEVS];
@@ -1369,7 +1370,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
 {
 {
         struct _starpu_machine_topology *topology = &config->topology;
         struct _starpu_machine_topology *topology = &config->topology;
 
 
-        topology->nhwmpicores[mpi_idx] = 0;
+        topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = 0;
 
 
         _starpu_init_mpi_topology(config, mpi_idx);
         _starpu_init_mpi_topology(config, mpi_idx);
 
 
@@ -1380,28 +1381,28 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
         {
         {
                 /* Nothing was specified, so let's use the number of
                 /* Nothing was specified, so let's use the number of
                  * detected mpi cores. ! */
                  * detected mpi cores. ! */
-                nmpicores = topology->nhwmpicores[mpi_idx];
+                nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
         }
         }
         else
         else
         {
         {
-                if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
+                if ((unsigned) nmpicores > topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx])
                 {
                 {
                         /* The user requires more MPI cores than there is available */
                         /* The user requires more MPI cores than there is available */
                         _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
                         _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
-				    nmpicores, topology->nhwmpicores[mpi_idx]);
-                        nmpicores = topology->nhwmpicores[mpi_idx];
+				    nmpicores, topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx]);
+                        nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
                 }
                 }
         }
         }
 
 
-        topology->nmpicores[mpi_idx] = nmpicores;
-        STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
-                        "topology->nmpicores[mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
-                        mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
+        topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] = nmpicores;
+        STARPU_ASSERT_MSG(topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
+                        "topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
+                        mpi_idx, topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
 
 
         mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
         mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
-        mpi_worker_set[mpi_idx].nworkers = topology->nmpicores[mpi_idx];
+        mpi_worker_set[mpi_idx].nworkers = topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
         unsigned mpicore_id;
         unsigned mpicore_id;
-        for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
+        for (mpicore_id = 0; mpicore_id < topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx]; mpicore_id++)
         {
         {
                 int worker_idx = topology->nworkers + mpicore_id;
                 int worker_idx = topology->nworkers + mpicore_id;
                 config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
                 config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
@@ -1418,7 +1419,7 @@ static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
         }
         }
 	_starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
 	_starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
 
 
-        topology->nworkers += topology->nmpicores[mpi_idx];
+        topology->nworkers += topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
 }
 }
 #endif
 #endif
 
 
@@ -1461,13 +1462,13 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 			}
 			}
 		}
 		}
 
 
-		topology->nmicdevices = 0;
+		topology->ndevices[STARPU_MIC_WORKER] = 0;
 		unsigned i;
 		unsigned i;
 		for (i = 0; i < (unsigned) reqmicdevices; i++)
 		for (i = 0; i < (unsigned) reqmicdevices; i++)
 			if (0 == _starpu_init_mic_node(config, i, &mic_handles[i], &_starpu_mic_process[i]))
 			if (0 == _starpu_init_mic_node(config, i, &mic_handles[i], &_starpu_mic_process[i]))
-				topology->nmicdevices++;
+				topology->ndevices[STARPU_MIC_WORKER]++;
 
 
-		for (i = 0; i < topology->nmicdevices; i++)
+		for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
 			_starpu_init_mic_config(config, user_conf, i);
 			_starpu_init_mic_config(config, user_conf, i);
 	}
 	}
 #endif
 #endif
@@ -1498,10 +1499,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 			}
 			}
 		}
 		}
 
 
-		topology->nmpidevices = reqmpidevices;
+		topology->ndevices[STARPU_MPI_MS_WORKER] = reqmpidevices;
 
 
 		/* if user don't want to use MPI slaves, we close the slave processes */
 		/* if user don't want to use MPI slaves, we close the slave processes */
-		if (no_mp_config && topology->nmpidevices == 0)
+		if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0)
 		{
 		{
 			_starpu_mpi_common_mp_deinit();
 			_starpu_mpi_common_mp_deinit();
 			exit(0);
 			exit(0);
@@ -1510,10 +1511,10 @@ static void _starpu_init_mp_config(struct _starpu_machine_config *config,
 		if (!no_mp_config)
 		if (!no_mp_config)
 		{
 		{
 			unsigned i;
 			unsigned i;
-			for (i = 0; i < topology->nmpidevices; i++)
+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
 				_starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
 				_starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
 
 
-			for (i = 0; i < topology->nmpidevices; i++)
+			for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
 				_starpu_init_mpi_config(config, user_conf, i);
 				_starpu_init_mpi_config(config, user_conf, i);
 		}
 		}
 	}
 	}
@@ -1549,12 +1550,12 @@ static void _starpu_deinit_mp_config(struct _starpu_machine_config *config)
 	unsigned i;
 	unsigned i;
 
 
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
-	for (i = 0; i < topology->nmicdevices; i++)
+	for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
 		_starpu_deinit_mic_node(i);
 		_starpu_deinit_mic_node(i);
 	_starpu_mic_clear_kernels();
 	_starpu_mic_clear_kernels();
 #endif
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-	for (i = 0; i < topology->nmpidevices; i++)
+	for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
 		_starpu_deinit_mpi_node(i);
 		_starpu_deinit_mpi_node(i);
 #endif
 #endif
 }
 }
@@ -1664,9 +1665,10 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 	}
 	}
 
 
 	/* Now we know how many CUDA devices will be used */
 	/* Now we know how many CUDA devices will be used */
-	topology->ncudagpus = ncuda;
-	topology->nworkerpercuda = nworker_per_cuda;
-	STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
+	topology->ndevices[STARPU_CUDA_WORKER] = ncuda;
+	for (i = 0; i < ncuda; i++)
+		topology->nworker[STARPU_CUDA_WORKER][i] = nworker_per_cuda;
+	STARPU_ASSERT(topology->ndevices[STARPU_CUDA_WORKER] <= STARPU_MAXCUDADEVS);
 
 
 	_starpu_initialize_workers_cuda_gpuid(config);
 	_starpu_initialize_workers_cuda_gpuid(config);
 
 
@@ -1693,11 +1695,11 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 	if (!topology->cuda_th_per_dev)
 	if (!topology->cuda_th_per_dev)
 	{
 	{
 		cuda_worker_set[0].workers = &config->workers[topology->nworkers];
 		cuda_worker_set[0].workers = &config->workers[topology->nworkers];
-		cuda_worker_set[0].nworkers = topology->ncudagpus * nworker_per_cuda;
+		cuda_worker_set[0].nworkers = topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
 	}
 	}
 
 
 	unsigned cudagpu;
 	unsigned cudagpu;
-	for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
+	for (cudagpu = 0; cudagpu < topology->ndevices[STARPU_CUDA_WORKER]; cudagpu++)
 	{
 	{
 		int devid = _starpu_get_next_cuda_gpuid(config);
 		int devid = _starpu_get_next_cuda_gpuid(config);
 		int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
 		int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
@@ -1769,7 +1771,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 #endif
 #endif
         }
         }
 
 
-	topology->nworkers += topology->ncudagpus * nworker_per_cuda;
+	topology->nworkers += topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
 #endif
 #endif
 
 
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
@@ -1813,20 +1815,22 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 		}
 		}
 	}
 	}
 
 
-	topology->nopenclgpus = nopencl;
-	STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
+	topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
+	for (i = 0; i < nopencl; i++)
+		topology->nworker[STARPU_CUDA_WORKER][i] = 1;
+	STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
 
 
 	_starpu_initialize_workers_opencl_gpuid(config);
 	_starpu_initialize_workers_opencl_gpuid(config);
 
 
 	unsigned openclgpu;
 	unsigned openclgpu;
-	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
+	for (openclgpu = 0; openclgpu < topology->ndevices[STARPU_OPENCL_WORKER]; openclgpu++)
 	{
 	{
 		int worker_idx = topology->nworkers + openclgpu;
 		int worker_idx = topology->nworkers + openclgpu;
 		int devid = _starpu_get_next_opencl_gpuid(config);
 		int devid = _starpu_get_next_opencl_gpuid(config);
 		if (devid == -1)
 		if (devid == -1)
 		{
 		{
 			// There is no more devices left
 			// There is no more devices left
-			topology->nopenclgpus = openclgpu;
+			topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu;
 			break;
 			break;
 		}
 		}
 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
@@ -1841,7 +1845,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 		config->worker_mask |= STARPU_OPENCL;
 		config->worker_mask |= STARPU_OPENCL;
 	}
 	}
 
 
-	topology->nworkers += topology->nopenclgpus;
+	topology->nworkers += topology->ndevices[STARPU_OPENCL_WORKER];
 #endif
 #endif
 
 
 
 
@@ -1884,19 +1888,21 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 		}
 		}
 	}
 	}
 
 
-	topology->nfpgafpgas = nfpga;
-	STARPU_ASSERT(topology->nfpgafpgas + topology->nworkers <= STARPU_NMAXWORKERS);
+	topology->ndevices[STARPU_FPGA_WORKER] = nfpga;
+	for (i = 0; i < nfpga; i++)
+		topology->nworker[STARPU_FPGA_WORKER][i] = 1;
+	STARPU_ASSERT(topology->ndevices[STARPU_FPGA_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
 
 
 	_starpu_initialize_workers_fpga_deviceid(config);
 	_starpu_initialize_workers_fpga_deviceid(config);
 
 
 	unsigned fpgafpga;
 	unsigned fpgafpga;
-	for (fpgafpga = 0; fpgafpga < topology->nfpgafpgas; fpgafpga++)
+	for (fpgafpga = 0; fpgafpga < topology->ndevices[STARPU_FPGA_WORKER]; fpgafpga++)
 	{
 	{
 		int worker_idx = topology->nworkers + fpgafpga;
 		int worker_idx = topology->nworkers + fpgafpga;
 		int devid = _starpu_get_next_fpga_deviceid(config);
 		int devid = _starpu_get_next_fpga_deviceid(config);
 		if (devid == -1)
 		if (devid == -1)
 		{ // There is no more devices left
 		{ // There is no more devices left
-			topology->nfpgafpgas = fpgafpga;
+			topology->ndevices[STARPU_FPGA_WORKER] = fpgafpga;
 			break;
 			break;
 		}
 		}
 		config->workers[worker_idx].arch = STARPU_FPGA_WORKER;
 		config->workers[worker_idx].arch = STARPU_FPGA_WORKER;
@@ -1911,7 +1917,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 		config->worker_mask |= STARPU_FPGA;
 		config->worker_mask |= STARPU_FPGA;
 	}
 	}
 
 
-	topology->nworkers += topology->nfpgafpgas;
+	topology->nworkers += topology->ndevices[STARPU_FPGA_WORKER];
 #endif
 #endif
 
 
 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
@@ -1931,13 +1937,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 			unsigned mic_busy_cpus = 0;
 			unsigned mic_busy_cpus = 0;
 			int j = 0;
 			int j = 0;
 			for (j = 0; j < STARPU_MAXMICDEVS; j++)
 			for (j = 0; j < STARPU_MAXMICDEVS; j++)
-				mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
+				mic_busy_cpus += (topology->nworker[STARPU_MIC_WORKER][j] ? 1 : 0);
 
 
 			unsigned mpi_ms_busy_cpus = 0;
 			unsigned mpi_ms_busy_cpus = 0;
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
 #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
 			for (j = 0; j < STARPU_MAXMPIDEVS; j++)
 			for (j = 0; j < STARPU_MAXMPIDEVS; j++)
-				mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
+				mpi_ms_busy_cpus += (topology->nworker[STARPU_MPI_MS_WORKER][j] ? 1 : 0);
 #else
 #else
 			mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
 			mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
 #endif
 #endif
@@ -1945,15 +1951,15 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 			unsigned cuda_busy_cpus = 0;
 			unsigned cuda_busy_cpus = 0;
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 			cuda_busy_cpus =
 			cuda_busy_cpus =
-				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ncudagpus ? 1 : 0) :
-				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ncudagpus) : topology->ncudagpus;
+				topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ndevices[STARPU_CUDA_WORKER] ? 1 : 0) :
+				topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ndevices[STARPU_CUDA_WORKER]) : topology->ndevices[STARPU_CUDA_WORKER];
 #endif
 #endif
 			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
 			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
 				+ cuda_busy_cpus
 				+ cuda_busy_cpus
-				+ topology->nopenclgpus
-				+ topology->nfpgafpgas;
+				+ topology->ndevices[STARPU_OPENCL_WORKER];
+				+ topology->ndevices[STARPU_FPGA_WORKER];
 
 
-			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
+			long avail_cpus = (long) topology->nhwworker[STARPU_CPU_WORKER][0] - (long) already_busy_cpus;
 			if (avail_cpus < 0)
 			if (avail_cpus < 0)
 				avail_cpus = 0;
 				avail_cpus = 0;
 			int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
 			int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
@@ -1983,12 +1989,13 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
 
 	}
 	}
 
 
-	topology->ncpus = ncpu;
-	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
+	topology->ndevices[STARPU_CPU_WORKER] = 1;
+	topology->nworker[STARPU_CPU_WORKER][0] = ncpu;
+	STARPU_ASSERT(topology->nworker[STARPU_CPU_WORKER][0] + topology->nworkers <= STARPU_NMAXWORKERS);
 
 
 	unsigned cpu;
 	unsigned cpu;
 	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
 	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
-	for (cpu = 0; cpu < topology->ncpus; cpu++)
+	for (cpu = 0; cpu < topology->nworker[STARPU_CPU_WORKER][0]; cpu++)
 	{
 	{
 		int worker_idx = topology->nworkers + cpu;
 		int worker_idx = topology->nworkers + cpu;
 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
@@ -2003,7 +2010,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 		config->worker_mask |= STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
 	}
 	}
 
 
-	topology->nworkers += topology->ncpus;
+	topology->nworkers += topology->nworker[STARPU_CPU_WORKER][0];
 #endif
 #endif
 
 
 	if (topology->nworkers == 0)
 	if (topology->nworkers == 0)
@@ -2136,7 +2143,7 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
 
 			if (workerid >= 0)
 			if (workerid >= 0)
 				/* This shouldn't happen for workers */
 				/* This shouldn't happen for workers */
-				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwcpus, config->topology.nhwpus);
+				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus);
 			ret = -1;
 			ret = -1;
 		}
 		}
 		else
 		else
@@ -2426,7 +2433,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 #endif
 #endif
 
 
 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
-		for (i = 0; i < config->topology.ncudagpus; i++)
+		for (i = 0; i < config->topology.ndevices[STARPU_CUDA_WORKER]; i++)
 		{
 		{
 			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
 			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
 			if (obj)
 			if (obj)
@@ -2461,7 +2468,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 		}
 		}
 #endif
 #endif
 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
-		if (config->topology.nopenclgpus > 0)
+		if (config->topology.ndevices[STARPU_OPENCL_WORKER] > 0)
 		{
 		{
 			cl_int err;
 			cl_int err;
 			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
 			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
@@ -2657,17 +2664,18 @@ static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_confi
 	int cuda_globalbindid = -1;
 	int cuda_globalbindid = -1;
 #endif
 #endif
 
 
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
+	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
+	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
+	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
+#endif
+
 #if defined(STARPU_USE_FPGA)
 #if defined(STARPU_USE_FPGA)
 	unsigned fpga_init[STARPU_MAXFPGADEVS] = { };
 	unsigned fpga_init[STARPU_MAXFPGADEVS] = { };
 	unsigned fpga_memory_nodes[STARPU_MAXFPGADEVS];
 	unsigned fpga_memory_nodes[STARPU_MAXFPGADEVS];
 	unsigned fpga_bindid[STARPU_MAXFPGADEVS];
 	unsigned fpga_bindid[STARPU_MAXFPGADEVS];
 #endif
 #endif
 
 
-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
-	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
-	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
-	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
-#endif
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 	unsigned mic_init[STARPU_MAXMICDEVS] = { };
 	unsigned mic_init[STARPU_MAXMICDEVS] = { };
 	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
 	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
@@ -3128,6 +3136,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 {
 {
 	int ret;
 	int ret;
 	unsigned i;
 	unsigned i;
+	enum starpu_worker_archtype type;
 
 
 	ret = _starpu_init_machine_config(config, no_mp_config);
 	ret = _starpu_init_machine_config(config, no_mp_config);
 	if (ret)
 	if (ret)
@@ -3141,57 +3150,16 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 
 
 	_starpu_mem_chunk_init_last();
 	_starpu_mem_chunk_init_last();
 
 
-	config->cpus_nodeid = -1;
-	config->cuda_nodeid = -1;
-	config->opencl_nodeid = -1;
-	config->mic_nodeid = -1;
-	config->fpga_nodeid = -1;
-	config->mpi_nodeid = -1;
+	for (type = 0; type < STARPU_NARCH; type++)
+		config->arch_nodeid[type] = -1;
+
 	for (i = 0; i < starpu_worker_get_count(); i++)
 	for (i = 0; i < starpu_worker_get_count(); i++)
 	{
 	{
-		switch (starpu_worker_get_type(i))
-		{
-			case STARPU_CPU_WORKER:
-				if (config->cpus_nodeid == -1)
-					config->cpus_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->cpus_nodeid = -2;
-				break;
-
-			case STARPU_CUDA_WORKER:
-				if (config->cuda_nodeid == -1)
-					config->cuda_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->cuda_nodeid = -2;
-				break;
-
-                        case STARPU_FPGA_WORKER:
-				if (config->fpga_nodeid == -1)
-					config->fpga_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->fpga_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->fpga_nodeid = -2;
-				break;
-			case STARPU_OPENCL_WORKER:
-				if (config->opencl_nodeid == -1)
-					config->opencl_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->opencl_nodeid = -2;
-				break;
-			case STARPU_MIC_WORKER:
-				if (config->mic_nodeid == -1)
-					config->mic_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->mic_nodeid = -2;
-				break;
-			case STARPU_MPI_MS_WORKER:
-				if (config->mpi_nodeid == -1)
-					config->mpi_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->mpi_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->mpi_nodeid = -2;
-				break;
-			case STARPU_ANY_WORKER:
-				STARPU_ASSERT(0);
-		}
+		type = starpu_worker_get_type(i);
+		if (config->arch_nodeid[type] == -1)
+			config->arch_nodeid[type] = starpu_worker_get_memory_node(i);
+		else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i))
+			config->arch_nodeid[type] = -2;
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -3219,7 +3187,7 @@ void starpu_topology_print(FILE *output)
 	unsigned worker;
 	unsigned worker;
 	unsigned nworkers = starpu_worker_get_count();
 	unsigned nworkers = starpu_worker_get_count();
 	unsigned ncombinedworkers = topology->ncombinedworkers;
 	unsigned ncombinedworkers = topology->ncombinedworkers;
-	unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
+	unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
 
 
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
 	hwloc_topology_t topo = topology->hwtopology;
 	hwloc_topology_t topo = topology->hwtopology;
@@ -3308,5 +3276,5 @@ unsigned _starpu_get_nhyperthreads()
 {
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 
 
-	return config->topology.nhwpus / config->topology.nhwcpus;
+	return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0];
 }
 }

+ 111 - 119
src/core/workers.c

@@ -45,6 +45,10 @@
 #include <drivers/cpu/driver_cpu.h>
 #include <drivers/cpu/driver_cpu.h>
 #include <drivers/cuda/driver_cuda.h>
 #include <drivers/cuda/driver_cuda.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
+#include <drivers/max/driver_fpga.h>
+#include <drivers/mic/driver_mic_source.h>
+#include <drivers/mpi/driver_mpi_source.h>
+#include <drivers/disk/driver_disk.h>
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 #include <core/simgrid.h>
 #include <core/simgrid.h>
@@ -203,6 +207,20 @@ void _starpu__workers_c__register_kobs(void)
 	/* TODO */
 	/* TODO */
 }
 }
 
 
+struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
+
+void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info)
+{
+	starpu_driver_info[archtype] = *info;
+}
+
+struct starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1];
+
+void starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct starpu_memory_driver_info *info)
+{
+	starpu_memory_driver_info[kind] = *info;
+}
+
 /* Initialize value of static argc and argv, called when the process begins
 /* Initialize value of static argc and argv, called when the process begins
  */
  */
 void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
 void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
@@ -402,14 +420,14 @@ static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype
 		starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
 		starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
 		return func != NULL;
 		return func != NULL;
 	}
 	}
-        case STARPU_FPGA_WORKER:
+	case STARPU_OPENCL_WORKER:
 	{
 	{
-		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
+		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
 		return func != NULL;
 		return func != NULL;
 	}
 	}
-	case STARPU_OPENCL_WORKER:
+        case STARPU_FPGA_WORKER:
 	{
 	{
-		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
+		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
 		return func != NULL;
 		return func != NULL;
 	}
 	}
 	case STARPU_MIC_WORKER:
 	case STARPU_MIC_WORKER:
@@ -632,10 +650,12 @@ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
 				return 0;
 				return 0;
 			break;
 			break;
+#ifdef STARPU_USE_OPENCL
 		case STARPU_OPENCL_WORKER:
 		case STARPU_OPENCL_WORKER:
 			if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
 			if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
 				return 0;
 				return 0;
 			break;
 			break;
+#endif
 		default:
 		default:
 			STARPU_ABORT();
 			STARPU_ABORT();
 		}
 		}
@@ -759,23 +779,23 @@ static void _starpu_worker_deinit(struct _starpu_worker *workerarg)
 }
 }
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync)
+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync)
 {
 {
 	unsigned devid = worker->devid;
 	unsigned devid = worker->devid;
 	unsigned memnode = worker->memory_node;
 	unsigned memnode = worker->memory_node;
-	_STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode, worker->bindid, sync);
+	_STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync);
 }
 }
 #endif
 #endif
 
 
-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync STARPU_ATTRIBUTE_UNUSED)
+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED)
 {
 {
-	(void) fut_key;
+	(void) archtype;
 	int devid = worker->devid;
 	int devid = worker->devid;
 	(void) devid;
 	(void) devid;
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 	_STARPU_TRACE_REGISTER_THREAD(worker->bindid);
 	_STARPU_TRACE_REGISTER_THREAD(worker->bindid);
-	_starpu_worker_start(worker, fut_key, sync);
+	_starpu_worker_start(worker, archtype, sync);
 #endif
 #endif
 	_starpu_set_local_worker_key(worker);
 	_starpu_set_local_worker_key(worker);
 
 
@@ -886,30 +906,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 			}
 			}
 #endif
 #endif
 
 
-#if defined(STARPU_USE_FPGA)
-			case STARPU_FPGA_WORKER:
-				driver.id.fpga_id = workerarg->devid;
-				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
-				{
-					workerarg->run_by_starpu = 0;
-					break;
-				}
-				STARPU_PTHREAD_CREATE_ON(
-					workerarg->name,
-					&workerarg->worker_thread,
-					NULL,
-					_starpu_fpga_worker,
-					workerarg,
-					_starpu_simgrid_get_host_by_worker(workerarg));
-#ifdef STARPU_USE_FXT
-				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
-				while (!workerarg->worker_is_running)
-					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
-				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
-#endif
-				break;
-#endif
-
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 			case STARPU_OPENCL_WORKER:
 			case STARPU_OPENCL_WORKER:
 			{
 			{
@@ -937,6 +933,29 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 			}
 			}
 #endif
 #endif
 
 
+#if defined(STARPU_USE_FPGA)
+			case STARPU_FPGA_WORKER:
+				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
+				{
+					workerarg->run_by_starpu = 0;
+					break;
+				}
+				STARPU_PTHREAD_CREATE_ON(
+					workerarg->name,
+					&workerarg->worker_thread,
+					NULL,
+					_starpu_fpga_worker,
+					workerarg,
+					_starpu_simgrid_get_host_by_worker(workerarg));
+#ifdef STARPU_USE_FXT
+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
+				while (!workerarg->worker_is_running)
+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
+#endif
+				break;
+#endif
+
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 			case STARPU_MIC_WORKER:
 			case STARPU_MIC_WORKER:
 			{
 			{
@@ -1017,7 +1036,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 	}
 	}
 
 
 #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
 #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
-        if (pconfig->topology.nmpidevices > 0)
+        if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
         {
         {
                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
                 struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
                 struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
                 struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
@@ -1157,6 +1176,14 @@ int starpu_conf_init(struct starpu_conf *conf)
 		conf->disable_asynchronous_opencl_copy = 0;
 		conf->disable_asynchronous_opencl_copy = 0;
 #endif
 #endif
 
 
+#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
+	conf->disable_asynchronous_fpga_copy = 1;
+#else
+	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
+	if (conf->disable_asynchronous_fpga_copy == -1)
+		conf->disable_asynchronous_fpga_copy = 0;
+#endif
+
 #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
 #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
 	conf->disable_asynchronous_mic_copy = 1;
 	conf->disable_asynchronous_mic_copy = 1;
 #else
 #else
@@ -1173,14 +1200,6 @@ int starpu_conf_init(struct starpu_conf *conf)
 		conf->disable_asynchronous_mpi_ms_copy = 0;
 		conf->disable_asynchronous_mpi_ms_copy = 0;
 #endif
 #endif
 
 
-#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
-	conf->disable_asynchronous_fpga_copy = 1;
-#else
-	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
-	if (conf->disable_asynchronous_fpga_copy == -1)
-		conf->disable_asynchronous_fpga_copy = 0;
-#endif
-
 	/* 64MiB by default */
 	/* 64MiB by default */
 	conf->trace_buffer_size = ((uint64_t) starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20;
 	conf->trace_buffer_size = ((uint64_t) starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20;
 
 
@@ -1231,8 +1250,8 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 	if (main_thread_bind)
 	if (main_thread_bind)
 		conf->reserve_ncpus++;
 		conf->reserve_ncpus++;
 	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables);
-        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables);
+        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables);
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
@@ -1249,9 +1268,9 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables);
+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables);
-	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
 }
 }
 
 
 struct starpu_tree* starpu_workers_get_tree(void)
 struct starpu_tree* starpu_workers_get_tree(void)
@@ -1426,6 +1445,16 @@ int _starpu_get_catch_signals(void)
 	return _starpu_config.conf.catch_signals;
 	return _starpu_config.conf.catch_signals;
 }
 }
 
 
+void starpu_drivers_preinit(void) {
+	_starpu_cpu_preinit();
+	_starpu_cuda_preinit();
+	_starpu_opencl_preinit();
+	_starpu_fpga_preinit();
+	_starpu_mic_preinit();
+	_starpu_mpi_ms_preinit();
+	_starpu_disk_preinit();
+}
+
 int starpu_init(struct starpu_conf *user_conf)
 int starpu_init(struct starpu_conf *user_conf)
 {
 {
 	return starpu_initialize(user_conf, NULL, NULL);
 	return starpu_initialize(user_conf, NULL, NULL);
@@ -1610,6 +1639,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
 
 	_starpu_load_bus_performance_files();
 	_starpu_load_bus_performance_files();
 
 
+	/* Let drivers register themselves */
+	starpu_drivers_preinit();
+
 	/* Note: nothing before here should be allocating anything, in case we
 	/* Note: nothing before here should be allocating anything, in case we
 	 * actually return ENODEV here */
 	 * actually return ENODEV here */
 
 
@@ -2118,32 +2150,22 @@ unsigned starpu_worker_is_slave_somewhere(int workerid)
 
 
 int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 {
 {
-	switch (type)
-	{
-		case STARPU_CPU_WORKER:
-			return _starpu_config.topology.ncpus;
-
-		case STARPU_CUDA_WORKER:
-			return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
+	unsigned n = 0;
 
 
-		case STARPU_OPENCL_WORKER:
-			return _starpu_config.topology.nopenclgpus;
-
-		case STARPU_MIC_WORKER:
-			return _starpu_config.topology.nmicdevices;
-
-                case STARPU_MPI_MS_WORKER:
-                        return _starpu_config.topology.nmpidevices;
-
-                case STARPU_ANY_WORKER:
-                        return _starpu_config.topology.ncpus+
-				_starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda+
-                                _starpu_config.topology.nopenclgpus+
-                                _starpu_config.topology.nmicdevices+
-                                _starpu_config.topology.nmpidevices;
-		default:
+	if (type != STARPU_ANY_WORKER)
+	{
+		if (type >= STARPU_NARCH)
 			return -EINVAL;
 			return -EINVAL;
+
+		unsigned i;
+		for (i = 0; i < _starpu_config.topology.ndevices[type]; i++)
+			n += _starpu_config.topology.nworker[type][i];
+		return n;
 	}
 	}
+
+	for (type = 0; type < STARPU_NARCH; type++)
+		n += starpu_worker_get_count_by_type(type);
+	return n;
 }
 }
 
 
 unsigned starpu_combined_worker_get_count(void)
 unsigned starpu_combined_worker_get_count(void)
@@ -2153,17 +2175,17 @@ unsigned starpu_combined_worker_get_count(void)
 
 
 unsigned starpu_cpu_worker_get_count(void)
 unsigned starpu_cpu_worker_get_count(void)
 {
 {
-	return _starpu_config.topology.ncpus;
+	return starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
 }
 }
 
 
 unsigned starpu_cuda_worker_get_count(void)
 unsigned starpu_cuda_worker_get_count(void)
 {
 {
-	return _starpu_config.topology.ncudagpus * _starpu_config.topology.nworkerpercuda;
+	return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
 }
 }
 
 
 unsigned starpu_opencl_worker_get_count(void)
 unsigned starpu_opencl_worker_get_count(void)
 {
 {
-	return _starpu_config.topology.nopenclgpus;
+	return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER);
 }
 }
 
 
 int starpu_asynchronous_copy_disabled(void)
 int starpu_asynchronous_copy_disabled(void)
@@ -2176,14 +2198,14 @@ int starpu_asynchronous_cuda_copy_disabled(void)
 	return _starpu_config.conf.disable_asynchronous_cuda_copy;
 	return _starpu_config.conf.disable_asynchronous_cuda_copy;
 }
 }
 
 
-int starpu_asynchronous_fpga_copy_disabled(void)
+int starpu_asynchronous_opencl_copy_disabled(void)
 {
 {
-	return _starpu_config.conf.disable_asynchronous_fpga_copy;
+	return _starpu_config.conf.disable_asynchronous_opencl_copy;
 }
 }
 
 
-int starpu_asynchronous_opencl_copy_disabled(void)
+int starpu_asynchronous_fpga_copy_disabled(void)
 {
 {
-	return _starpu_config.conf.disable_asynchronous_opencl_copy;
+	return _starpu_config.conf.disable_asynchronous_fpga_copy;
 }
 }
 
 
 int starpu_asynchronous_mic_copy_disabled(void)
 int starpu_asynchronous_mic_copy_disabled(void)
@@ -2198,17 +2220,12 @@ int starpu_asynchronous_mpi_ms_copy_disabled(void)
 
 
 unsigned starpu_mic_worker_get_count(void)
 unsigned starpu_mic_worker_get_count(void)
 {
 {
-	int i = 0, count = 0;
-
-	for (i = 0; i < STARPU_MAXMICDEVS; i++)
-		count += _starpu_config.topology.nmiccores[i];
-
-	return count;
+	return starpu_worker_get_count_by_type(STARPU_MIC_WORKER);
 }
 }
 
 
 unsigned starpu_mpi_ms_worker_get_count(void)
 unsigned starpu_mpi_ms_worker_get_count(void)
 {
 {
-        return _starpu_config.topology.nmpidevices;
+	return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER);
 }
 }
 
 
 /* When analyzing performance, it is useful to see what is the processing unit
 /* When analyzing performance, it is useful to see what is the processing unit
@@ -2615,30 +2632,18 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 
 
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 {
 {
-	switch (type) {
-		case STARPU_CPU_WORKER: return "STARPU_CPU_WORKER";
-		case STARPU_CUDA_WORKER: return "STARPU_CUDA_WORKER";
-		case STARPU_OPENCL_WORKER: return "STARPU_OPENCL_WORKER";
-		case STARPU_FPGA_WORKER: return "STARPU_FPGA_WORKER";
-		case STARPU_MIC_WORKER: return "STARPU_MIC_WORKER";
-		case STARPU_MPI_MS_WORKER: return "STARPU_MPI_MS_WORKER";
-		case STARPU_ANY_WORKER: return "STARPU_ANY_WORKER";
-		default: return "STARPU_unknown_WORKER";
-	}
+	const char *ret = starpu_driver_info[type].name_upper;
+	if (!ret)
+		ret = "unknown";
+	return ret;
 }
 }
 
 
-const char *starpu_worker_get_type_as_short_string(enum starpu_worker_archtype type)
+const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
 {
 {
-	switch (type) {
-		case STARPU_CPU_WORKER: return "CPU";
-		case STARPU_CUDA_WORKER: return "CUDA";
-		case STARPU_OPENCL_WORKER: return "OPENCL";
-		case STARPU_FPGA_WORKER: return "FPGA";
-		case STARPU_MIC_WORKER: return "MIC";
-		case STARPU_MPI_MS_WORKER: return "MPI_MS";
-		case STARPU_ANY_WORKER: return "ANY";
-		default: return "STARPU_unknown_WORKER";
-	}
+	const char *ret = starpu_driver_info[type].name_var;
+	if (!ret)
+		ret = "UNKNOWN";
+	return ret;
 }
 }
 
 
 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx)
 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx)
@@ -2846,22 +2851,9 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 }
 }
 #endif
 #endif
 
 
-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type)
+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
 {
 {
-	switch(type)
-	{
-		case STARPU_CPU_WORKER:
-			return STARPU_CPU_RAM;
-		case STARPU_CUDA_WORKER:
-			return STARPU_CUDA_RAM;
-		case STARPU_OPENCL_WORKER:
-			return STARPU_OPENCL_RAM;
-			break;
-		case STARPU_MIC_WORKER:
-			return STARPU_MIC_RAM;
-		case STARPU_MPI_MS_WORKER:
-			return STARPU_MPI_MS_RAM;
-		default:
-			STARPU_ABORT();
-	}
+	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
+	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
+	return kind;
 }
 }

+ 52 - 68
src/core/workers.h

@@ -59,8 +59,6 @@
 
 
 #include <datawizard/datawizard.h>
 #include <datawizard/datawizard.h>
 
 
-#include <starpu_parameters.h>
-
 #define STARPU_MAX_PIPELINE 4
 #define STARPU_MAX_PIPELINE 4
 
 
 enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
 enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
@@ -271,66 +269,33 @@ struct _starpu_machine_topology
 	/** custom hwloc tree*/
 	/** custom hwloc tree*/
 	struct starpu_tree *tree;
 	struct starpu_tree *tree;
 
 
-	/** Total number of CPU cores, as detected by the topology code. May
-	 * be different from the actual number of CPU workers.
-	 */
-	unsigned nhwcpus;
-
 	/** Total number of PUs (i.e. threads), as detected by the topology code. May
 	/** Total number of PUs (i.e. threads), as detected by the topology code. May
-	 * be different from the actual number of PU workers.
+	 * be different from the actual number of CPU workers.
 	 */
 	 */
 	unsigned nhwpus;
 	unsigned nhwpus;
 
 
-	/** Total number of CUDA devices, as detected. May be different
-	 * from the actual number of CUDA workers.
+	/** Total number of devices, as detected. May be different from the
+	 * actual number of devices run by StarPU.
 	 */
 	 */
-	unsigned nhwcudagpus;
-
-	/** Total number of OpenCL devices, as detected. May be
-	 * different from the actual number of OpenCL workers.
+	unsigned nhwdevices[STARPU_NARCH];
+	/** Total number of worker for each device, as detected. May be different from the
+	 * actual number of workers run by StarPU.
 	 */
 	 */
-	unsigned nhwopenclgpus;
+	unsigned nhwworker[STARPU_NARCH][STARPU_NMAXDEVS];
 
 
-        /* Total number of FPGA devices, as detected. May be different
-	 * from the actual number of FPGA workers.
+	/** Actual number of devices used by StarPU.
 	 */
 	 */
-	unsigned nhwfpgafpgas;
+	unsigned ndevices[STARPU_NARCH];
 
 
-	/** Total number of MPI nodes, as detected. May be different
-	 * from the actual number of node workers.
+	/** Number of worker per device
 	 */
 	 */
-	unsigned nhwmpi;
+	unsigned nworker[STARPU_NARCH][STARPU_NMAXDEVS];
 
 
-	/** Actual number of CPU workers used by StarPU. */
-	unsigned ncpus;
-
-	/** Actual number of CUDA GPUs used by StarPU. */
-	unsigned ncudagpus;
-	unsigned nworkerpercuda;
+	/** Whether we should have one thread per stream */
 	int cuda_th_per_stream;
 	int cuda_th_per_stream;
+	/** Whether we should have one thread per device */
 	int cuda_th_per_dev;
 	int cuda_th_per_dev;
 
 
-	/** Actual number of OpenCL workers used by StarPU. */
-	unsigned nopenclgpus;
-
-	/** Actual number of MPI workers used by StarPU. */
-	unsigned nmpidevices;
-        unsigned nhwmpidevices;
-
-	unsigned nhwmpicores[STARPU_MAXMPIDEVS]; /**< Each MPI node has its set of cores. */
-	unsigned nmpicores[STARPU_MAXMPIDEVS];
-
-        /* Actual number of Fpga workers used by StarPU. */
-	unsigned nfpgafpgas;
-
-	/** Topology of MP nodes (MIC) as well as necessary
-	 * objects to communicate with them. */
-	unsigned nhwmicdevices;
-	unsigned nmicdevices;
-
-	unsigned nhwmiccores[STARPU_MAXMICDEVS]; /**< Each MIC node has its set of cores. */
-	unsigned nmiccores[STARPU_MAXMICDEVS];
-
 	/** Indicates the successive logical PU identifier that should be used
 	/** Indicates the successive logical PU identifier that should be used
 	 * to bind the workers. It is either filled according to the
 	 * to bind the workers. It is either filled according to the
 	 * user's explicit parameters (from starpu_conf) or according
 	 * user's explicit parameters (from starpu_conf) or according
@@ -356,7 +321,13 @@ struct _starpu_machine_topology
 	 */
 	 */
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 
 
-        unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
+	/** Indicates the successive FPGA identifier that should be
+	 * used by the FPGA driver.  It is either filled according
+	 * to the user's explicit parameters (from starpu_conf) or
+	 * according to the STARPU_WORKERS_FPGAID env. variable.
+	 * Otherwise, they are taken in ID order.
+	 */
+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
 
 
 	/*** Indicates the successive MIC devices that should be used
 	/*** Indicates the successive MIC devices that should be used
 	 * by the MIC driver.  It is either filled according to the
 	 * by the MIC driver.  It is either filled according to the
@@ -390,27 +361,17 @@ struct _starpu_machine_config
 	/** Which GPU(s) do we use for OpenCL ? */
 	/** Which GPU(s) do we use for OpenCL ? */
 	int current_opencl_gpuid;
 	int current_opencl_gpuid;
 
 
+        /* Which FPGA(s) do we use for FPGA? */
+	int current_fpga_deviceid;
+
 	/** Which MIC do we use? */
 	/** Which MIC do we use? */
 	int current_mic_deviceid;
 	int current_mic_deviceid;
 
 
 	/** Which MPI do we use? */
 	/** Which MPI do we use? */
 	int current_mpi_deviceid;
 	int current_mpi_deviceid;
 
 
-        /* Which FPGA(s) do we use for FPGA? */
-	int current_fpga_deviceid;
-
-	/** Memory node for cpus, if only one */
-	int cpus_nodeid;
-	/** Memory node for CUDA, if only one */
-	int cuda_nodeid;
-	/** Memory node for OpenCL, if only one */
-	int opencl_nodeid;
-	/** Memory node for MIC, if only one */
-	int mic_nodeid;
-	/** Memory node for MPI, if only one */
-	int mpi_nodeid;
-        /* Memory node for FPGA, if only one */
-	int fpga_nodeid;
+	/** Memory node for different worker types, if only one */
+	int arch_nodeid [STARPU_NARCH];
 
 
 	/** Separate out previous variables from per-worker data. */
 	/** Separate out previous variables from per-worker data. */
 	char padding1[STARPU_CACHELINE_SIZE];
 	char padding1[STARPU_CACHELINE_SIZE];
@@ -465,6 +426,31 @@ struct _starpu_machine_config
 	int perf_counter_pause_depth;
 	int perf_counter_pause_depth;
 };
 };
 
 
+/** Provides information for a device driver */
+struct starpu_driver_info {
+	const char *name_upper;	/**< Name of worker type in upper case */
+	const char *name_var;	/**< Name of worker type for environment variables */
+	const char *name_lower;	/**< Name of worker type in lower case */
+	enum starpu_node_kind memory_kind;	/**< Kind of memory in device */
+	double alpha;	/**< Typical relative speed compared to a CPU core */
+};
+
+/** Device driver information, indexed by enum starpu_worker_archtype */
+extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
+
+void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
+
+/** Provides information for a memory node driver */
+struct starpu_memory_driver_info {
+	const char *name_upper;	/**< Name of memory in upper case */
+	enum starpu_worker_archtype worker_archtype;	/**< Kind of device */
+};
+
+/** Memory driver information, indexed by enum starpu_node_kind */
+extern struct starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1];
+
+void starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct starpu_memory_driver_info *info);
+
 extern int _starpu_worker_parallel_blocks;
 extern int _starpu_worker_parallel_blocks;
 
 
 extern struct _starpu_machine_config _starpu_config STARPU_ATTRIBUTE_INTERNAL;
 extern struct _starpu_machine_config _starpu_config STARPU_ATTRIBUTE_INTERNAL;
@@ -522,9 +508,9 @@ unsigned _starpu_worker_can_block(unsigned memnode, struct _starpu_worker *worke
 void _starpu_block_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
 void _starpu_block_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
 
 
 /** This function initializes the current driver for the given worker */
 /** This function initializes the current driver for the given worker */
-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
 /** This function initializes the current thread for the given worker */
 /** This function initializes the current thread for the given worker */
-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
 
 
 static inline unsigned _starpu_worker_get_count(void)
 static inline unsigned _starpu_worker_get_count(void)
 {
 {
@@ -673,8 +659,6 @@ static inline unsigned __starpu_worker_get_id_check(const char *f, int l)
 }
 }
 #define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l)
 #define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l)
 
 
-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type);
-
 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx);
 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx);
 
 
 struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid);
 struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid);

+ 5 - 12
src/datawizard/coherency.c

@@ -109,7 +109,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 	int i_ram = -1;
 	int i_ram = -1;
 	int i_gpu = -1;
 	int i_gpu = -1;
 	int i_disk = -1;
 	int i_disk = -1;
-	int i_fpga = -1;
 
 
 	/* Revert to dumb strategy: take RAM unless only a GPU has it */
 	/* Revert to dumb strategy: take RAM unless only a GPU has it */
 	for (i = 0; i < nnodes; i++)
 	for (i = 0; i < nnodes; i++)
@@ -140,30 +139,24 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 			 * 	Unless peer transfer is supported (and it would then have been selected above).
 			 * 	Unless peer transfer is supported (and it would then have been selected above).
 			 * 	Other should be ok */
 			 * 	Other should be ok */
 
 
-			if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
-			    starpu_node_get_kind(i) == STARPU_OPENCL_RAM ||
-			    starpu_node_get_kind(i) == STARPU_MIC_RAM)
-				i_gpu = i;
-
 			if (starpu_node_get_kind(i) == STARPU_CPU_RAM ||
 			if (starpu_node_get_kind(i) == STARPU_CPU_RAM ||
 			    starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
 			    starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
 				i_ram = i;
 				i_ram = i;
-			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
+			else if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
 				i_disk = i;
 				i_disk = i;
-			if (starpu_node_get_kind(i) == STARPU_FPGA_RAM)
-				i_fpga = i;
+			else
+				i_gpu = i;
 		}
 		}
 	}
 	}
 
 
 	/* we have to use cpu_ram in first */
 	/* we have to use cpu_ram in first */
 	if (i_ram != -1)
 	if (i_ram != -1)
 		src_node = i_ram;
 		src_node = i_ram;
-	/* no luck we have to use the disk memory */
 	else if (i_gpu != -1)
 	else if (i_gpu != -1)
+	/* otherwise a gpu */
 		src_node = i_gpu;
 		src_node = i_gpu;
-	else if (i_fpga != -1)
-		src_node = i_fpga;
 	else
 	else
+	/* no luck we have to use the disk memory */
 		src_node = i_disk;
 		src_node = i_disk;
 
 
 	STARPU_ASSERT(src_node != -1);
 	STARPU_ASSERT(src_node != -1);

+ 3 - 10
src/datawizard/memory_nodes.c

@@ -181,14 +181,7 @@ int starpu_memory_node_get_devid(unsigned node)
 }
 }
 
 
 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) {
-	switch (node_kind) {
-		// case STARPU_UNUSED:
-		case STARPU_CPU_RAM: return STARPU_CPU_WORKER;
-		case STARPU_CUDA_RAM: return STARPU_CUDA_WORKER;
-		case STARPU_OPENCL_RAM: return STARPU_OPENCL_WORKER;
-		// case STARPU_DISK_RAM:
-		case STARPU_MIC_RAM: return STARPU_MIC_WORKER;
-		case STARPU_MPI_MS_RAM: return STARPU_MPI_MS_WORKER;
-		default: STARPU_ASSERT_MSG(0, "ambiguous memory node kind %d", node_kind);
-	}
+	enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype;
+	STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind);
+	return archtype;
 }
 }

+ 3 - 21
src/datawizard/node_ops.c

@@ -28,25 +28,7 @@
 
 
 const char* _starpu_node_get_prefix(enum starpu_node_kind kind)
 const char* _starpu_node_get_prefix(enum starpu_node_kind kind)
 {
 {
-	switch (kind)
-	{
-		case STARPU_CPU_RAM:
-			return "NUMA";
-		case STARPU_CUDA_RAM:
-			return "CUDA";
-		case STARPU_OPENCL_RAM:
-			return "OpenCL";
-		case STARPU_DISK_RAM:
-			return "Disk";
-		case STARPU_MIC_RAM:
-			return "MIC";
-		case STARPU_FPGA_RAM:
-			return "FPGA";
-		case STARPU_MPI_MS_RAM:
-			return "MPI_MS";
-		case STARPU_UNUSED:
-		default:
-			STARPU_ASSERT(0);
-			return "unknown";
-	}
+	const char *ret = starpu_memory_driver_info[kind].name_upper;
+	STARPU_ASSERT(ret);
+	return ret;
 }
 }

+ 4 - 4
src/datawizard/node_ops.h

@@ -46,10 +46,10 @@ typedef int (*copy3d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_
 
 
 struct _starpu_node_ops
 struct _starpu_node_ops
 {
 {
-	copy_interface_func_t copy_interface_to[STARPU_MPI_MS_RAM+1];
-	copy_data_t copy_data_to[STARPU_MPI_MS_RAM+1];
-	copy2d_data_t copy2d_data_to[STARPU_MPI_MS_RAM+1];
-	copy3d_data_t copy3d_data_to[STARPU_MPI_MS_RAM+1];
+	copy_interface_func_t copy_interface_to[STARPU_MAX_RAM+1];
+	copy_data_t copy_data_to[STARPU_MAX_RAM+1];
+	copy2d_data_t copy2d_data_to[STARPU_MAX_RAM+1];
+	copy3d_data_t copy3d_data_to[STARPU_MAX_RAM+1];
 	void (*wait_request_completion)(struct _starpu_async_channel *async_channel);
 	void (*wait_request_completion)(struct _starpu_async_channel *async_channel);
 	unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel);
 	unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel);
 	int (*is_direct_access_supported)(unsigned node, unsigned handling_node);
 	int (*is_direct_access_supported)(unsigned node, unsigned handling_node);

+ 19 - 95
src/debug/traces/starpu_fxt.c

@@ -39,30 +39,18 @@
 #include <starpu_hash.h>
 #include <starpu_hash.h>
 
 
 #define CPUS_WORKER_COLORS_NB	8
 #define CPUS_WORKER_COLORS_NB	8
-#define CUDA_WORKER_COLORS_NB	9
-#define OPENCL_WORKER_COLORS_NB 9
-#define MIC_WORKER_COLORS_NB	9
-#define MPI_MS_WORKER_COLORS_NB	9
-#define OTHER_WORKER_COLORS_NB	4
+#define ACCEL_WORKER_COLORS_NB	9
 
 
 /* How many times longer an idle period has to be before the smoothing
 /* How many times longer an idle period has to be before the smoothing
  * heuristics avoids averaging codelet gflops */
  * heuristics avoids averaging codelet gflops */
 #define IDLE_FACTOR 2
 #define IDLE_FACTOR 2
 
 
 static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
 static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
-static char *cuda_worker_colors[CUDA_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
-static char *opencl_worker_colors[OPENCL_WORKER_COLORS_NB] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
-static char *mic_worker_colors[MIC_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
-static char *mpi_ms_worker_colors[MPI_MS_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
-static char *other_worker_colors[OTHER_WORKER_COLORS_NB] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
+static char *accel_worker_colors[ACCEL_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
 static char *worker_colors[STARPU_NMAXWORKERS];
 static char *worker_colors[STARPU_NMAXWORKERS];
 
 
-static unsigned opencl_index = 0;
-static unsigned cuda_index = 0;
 static unsigned cpus_index = 0;
 static unsigned cpus_index = 0;
-static unsigned mic_index = 0;
-static unsigned mpi_ms_index = 0;
-static unsigned other_index = 0;
+static unsigned accel_index = 0;
 static uint64_t* number_events = NULL;
 static uint64_t* number_events = NULL;
 
 
 static unsigned long fut_keymask;
 static unsigned long fut_keymask;
@@ -395,14 +383,6 @@ out:
 	free(data);
 	free(data);
 }
 }
 
 
-static void set_next_other_worker_color(int workerid)
-{
-	if (workerid >= STARPU_NMAXWORKERS)
-		return;
-	worker_colors[workerid] = other_worker_colors[other_index++];
-	if (other_index == OTHER_WORKER_COLORS_NB) other_index = 0;
-}
-
 static void set_next_cpu_worker_color(int workerid)
 static void set_next_cpu_worker_color(int workerid)
 {
 {
 	if (workerid >= STARPU_NMAXWORKERS)
 	if (workerid >= STARPU_NMAXWORKERS)
@@ -411,36 +391,12 @@ static void set_next_cpu_worker_color(int workerid)
 	if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0;
 	if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0;
 }
 }
 
 
-static void set_next_cuda_worker_color(int workerid)
-{
-	if (workerid >= STARPU_NMAXWORKERS)
-		return;
-	worker_colors[workerid] = cuda_worker_colors[cuda_index++];
-	if (cuda_index == CUDA_WORKER_COLORS_NB) cuda_index = 0;
-}
-
-static void set_next_opencl_worker_color(int workerid)
-{
-	if (workerid >= STARPU_NMAXWORKERS)
-		return;
-	worker_colors[workerid] = opencl_worker_colors[opencl_index++];
-	if (opencl_index == OPENCL_WORKER_COLORS_NB) opencl_index = 0;
-}
-
-static void set_next_mic_worker_color(int workerid)
-{
-	if (workerid >= STARPU_NMAXWORKERS)
-		return;
-	worker_colors[workerid] = mic_worker_colors[mic_index++];
-	if (mic_index == MIC_WORKER_COLORS_NB) mic_index = 0;
-}
-
-static void set_next_mpi_ms_worker_color(int workerid)
+static void set_next_accel_worker_color(int workerid)
 {
 {
 	if (workerid >= STARPU_NMAXWORKERS)
 	if (workerid >= STARPU_NMAXWORKERS)
 		return;
 		return;
-	worker_colors[workerid] = mpi_ms_worker_colors[mpi_ms_index++];
-	if (mpi_ms_index == MPI_MS_WORKER_COLORS_NB) mpi_ms_index = 0;
+	worker_colors[workerid] = accel_worker_colors[accel_index++];
+	if (accel_index == ACCEL_WORKER_COLORS_NB) accel_index = 0;
 }
 }
 
 
 static const char *get_worker_color(int workerid)
 static const char *get_worker_color(int workerid)
@@ -1231,56 +1187,24 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
 
 	new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set);
 	new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set);
 
 
-	char *kindstr = "";
+	const char *kindstr;
 	struct starpu_perfmodel_arch arch;
 	struct starpu_perfmodel_arch arch;
 	arch.ndevices = 1;
 	arch.ndevices = 1;
 	_STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device));
 	_STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device));
 
 
-	switch (ev->param[0])
-	{
-		case _STARPU_FUT_APPS_KEY:
-			set_next_other_worker_color(workerid);
-			kindstr = "APPS";
-			break;
-		case _STARPU_FUT_CPU_KEY:
-			set_next_cpu_worker_color(workerid);
-			kindstr = "CPU";
-			arch.devices[0].type = STARPU_CPU_WORKER;
-			arch.devices[0].devid = 0;
-			arch.devices[0].ncores = 1;
-			break;
-		case _STARPU_FUT_CUDA_KEY:
-			set_next_cuda_worker_color(workerid);
-			kindstr = "CUDA";
-			arch.devices[0].type = STARPU_CUDA_WORKER;
-			arch.devices[0].devid = devid;
-			arch.devices[0].ncores = 1;
-			break;
-		case _STARPU_FUT_OPENCL_KEY:
-			set_next_opencl_worker_color(workerid);
-			kindstr = "OPENCL";
-			arch.devices[0].type = STARPU_OPENCL_WORKER;
-			arch.devices[0].devid = devid;
-			arch.devices[0].ncores = 1;
-			break;
-		case _STARPU_FUT_MIC_KEY:
-			set_next_mic_worker_color(workerid);
-			kindstr = "mic";
-			arch.devices[0].type = STARPU_MIC_WORKER;
-			arch.devices[0].devid = devid;
-			arch.devices[0].ncores = 1;
-			break;
-		case _STARPU_FUT_MPI_KEY:
-			set_next_mpi_ms_worker_color(workerid);
-			kindstr = "mpi_ms";
-			arch.devices[0].type = STARPU_MPI_MS_WORKER;
-			arch.devices[0].devid = devid;
-			arch.devices[0].ncores = 1;
-			break;
+	enum starpu_worker_archtype archtype = _STARPU_FUT_KEY_WORKER(ev->param[0]);
+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
+
+	kindstr = starpu_worker_get_type_as_string(archtype);
+	arch.devices[0].type = archtype;
+	arch.devices[0].devid = 0;
+	arch.devices[0].ncores = 1;
+
+	if (archtype == STARPU_CPU_WORKER)
+		set_next_cpu_worker_color(workerid);
+	else
+		set_next_accel_worker_color(workerid);
 
 
-		default:
-			STARPU_ABORT();
-	}
 	double now = get_event_time_stamp(ev, options);
 	double now = get_event_time_stamp(ev, options);
 
 
 	if (out_paje_file)
 	if (out_paje_file)

+ 28 - 9
src/drivers/cpu/driver_cpu.c

@@ -60,6 +60,25 @@
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
 
 
+static struct starpu_driver_info driver_info = {
+	.name_upper = "CPU",
+	.name_var = "CPU",
+	.name_lower = "cpu",
+	.memory_kind = STARPU_CPU_RAM,
+	.alpha = 0.5f,
+};
+
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "NUMA",
+	.worker_archtype = STARPU_CPU_WORKER,
+};
+
+void _starpu_cpu_preinit(void)
+{
+	starpu_driver_info_register(STARPU_CPU_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_CPU_RAM, &memory_driver_info);
+}
+
 
 
 #ifdef STARPU_USE_CPU
 #ifdef STARPU_USE_CPU
 /* Actually launch the job on a cpu worker.
 /* Actually launch the job on a cpu worker.
@@ -190,7 +209,7 @@ int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker)
 {
 {
 	int devid = cpu_worker->devid;
 	int devid = cpu_worker->devid;
 
 
-	_starpu_driver_start(cpu_worker, _STARPU_FUT_CPU_KEY, 1);
+	_starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1);
 	snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid);
 	snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid);
 	snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid);
 	snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid);
 	starpu_pthread_setname(cpu_worker->short_name);
 	starpu_pthread_setname(cpu_worker->short_name);
@@ -361,7 +380,7 @@ int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker)
 	 * job. */
 	 * job. */
 
 
 	/* can a cpu perform that task ? */
 	/* can a cpu perform that task ? */
-	if (!_STARPU_CPU_MAY_PERFORM(j))
+	if (!_STARPU_MAY_PERFORM(j, CPU))
 	{
 	{
 		/* put it and the end of the queue ... XXX */
 		/* put it and the end of the queue ... XXX */
 		_starpu_push_task_to_workers(task);
 		_starpu_push_task_to_workers(task);
@@ -417,7 +436,7 @@ int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker)
 	_starpu_free_all_automatically_allocated_buffers(memnode);
 	_starpu_free_all_automatically_allocated_buffers(memnode);
 
 
 	cpu_worker->worker_is_initialized = 0;
 	cpu_worker->worker_is_initialized = 0;
-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -534,6 +553,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 #else
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 #endif
 #endif
+#ifdef STARPU_USE_FPGA
+	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
+#endif
 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk,
 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk,
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 	.copy_interface_to[STARPU_MIC_RAM] = _starpu_mic_copy_interface_from_cpu_to_mic,
 	.copy_interface_to[STARPU_MIC_RAM] = _starpu_mic_copy_interface_from_cpu_to_mic,
@@ -545,9 +567,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 #else
 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
 #endif
-#ifdef STARPU_USE_FPGA
-	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
-#endif
 
 
 	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data,
@@ -561,6 +580,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 #else
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 #endif
 #endif
+#ifdef STARPU_USE_FPGA
+	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
+#endif
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk,
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk,
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 	.copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic,
 	.copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic,
@@ -572,9 +594,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 #else
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
 #endif
-#ifdef STARPU_USE_FPGA
-	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
-#endif
 
 
 	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,

+ 2 - 0
src/drivers/cpu/driver_cpu.h

@@ -22,6 +22,8 @@
 #include <common/config.h>
 #include <common/config.h>
 #include <datawizard/node_ops.h>
 #include <datawizard/node_ops.h>
 
 
+void _starpu_cpu_preinit(void);
+
 extern struct _starpu_driver_ops _starpu_driver_cpu_ops;
 extern struct _starpu_driver_ops _starpu_driver_cpu_ops;
 extern struct _starpu_node_ops _starpu_driver_cpu_node_ops;
 extern struct _starpu_node_ops _starpu_driver_cpu_node_ops;
 
 

+ 8 - 48
src/drivers/cuda/driver_cuda.c

@@ -110,7 +110,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 	/* Discover the number of CUDA devices. Fill the result in CONFIG. */
 	/* Discover the number of CUDA devices. Fill the result in CONFIG. */
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-	config->topology.nhwcudagpus = _starpu_simgrid_get_nbhosts("CUDA");
+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = _starpu_simgrid_get_nbhosts("CUDA");
 #else
 #else
 	int cnt;
 	int cnt;
 	cudaError_t cures;
 	cudaError_t cures;
@@ -118,7 +118,7 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 	cures = cudaGetDeviceCount (&cnt);
 	cures = cudaGetDeviceCount (&cnt);
 	if (STARPU_UNLIKELY(cures != cudaSuccess))
 	if (STARPU_UNLIKELY(cures != cudaSuccess))
 		cnt = 0;
 		cnt = 0;
-	config->topology.nhwcudagpus = cnt;
+	config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt;
 #ifdef HAVE_LIBNVIDIA_ML
 #ifdef HAVE_LIBNVIDIA_ML
 	nvmlInit();
 	nvmlInit();
 #endif
 #endif
@@ -684,12 +684,12 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 	int lastdevid = -1;
 	int lastdevid = -1;
 	unsigned i;
 	unsigned i;
 
 
-	_starpu_driver_start(worker0, _STARPU_FUT_CUDA_KEY, 0);
+	_starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0);
 	_starpu_set_local_worker_set_key(worker_set);
 	_starpu_set_local_worker_set_key(worker_set);
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 	for (i = 1; i < worker_set->nworkers; i++)
 	for (i = 1; i < worker_set->nworkers; i++)
-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_CUDA_KEY, 0);
+		_starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0);
 #endif
 #endif
 
 
 	for (i = 0; i < worker_set->nworkers; i++)
 	for (i = 0; i < worker_set->nworkers; i++)
@@ -710,7 +710,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 		init_device_context(devid, memnode);
 		init_device_context(devid, memnode);
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
-		if (worker->config->topology.nworkerpercuda > 1 && props[devid].concurrentKernels == 0)
+		if (worker->config->topology.nworker[STARPU_CUDA_WORKER][devid] > 1 && props[devid].concurrentKernels == 0)
 			_STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid);
 			_STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid);
 #endif /* !STARPU_SIMGRID */
 #endif /* !STARPU_SIMGRID */
 	}
 	}
@@ -723,7 +723,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 		struct _starpu_worker *worker = &worker_set->workers[i];
 		struct _starpu_worker *worker = &worker_set->workers[i];
 		unsigned devid = worker->devid;
 		unsigned devid = worker->devid;
 		unsigned workerid = worker->workerid;
 		unsigned workerid = worker->workerid;
-		unsigned subdev = i % _starpu_get_machine_config()->topology.nworkerpercuda;
+		unsigned subdev = i % _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid];
 
 
 		float size = (float) global_mem[devid] / (1<<30);
 		float size = (float) global_mem[devid] / (1<<30);
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
@@ -968,7 +968,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 		j = _starpu_get_job_associated_to_task(task);
 		j = _starpu_get_job_associated_to_task(task);
 
 
 		/* can CUDA do that task ? */
 		/* can CUDA do that task ? */
-		if (!_STARPU_CUDA_MAY_PERFORM(j))
+		if (!_STARPU_MAY_PERFORM(j, CUDA))
 		{
 		{
 			/* this is neither a cuda or a cublas task */
 			/* this is neither a cuda or a cublas task */
 			_starpu_worker_refuse_task(worker, task);
 			_starpu_worker_refuse_task(worker, task);
@@ -1067,7 +1067,7 @@ int _starpu_cuda_driver_deinit(struct _starpu_worker_set *worker_set)
 	}
 	}
 
 
 	worker_set->workers[0].worker_is_initialized = 0;
 	worker_set->workers[0].worker_is_initialized = 0;
-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CUDA_KEY);
+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1832,37 +1832,17 @@ struct _starpu_driver_ops _starpu_driver_cuda_ops =
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
 	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
 	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = NULL,
 	.copy_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy2d_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy2d_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy2d_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy2d_data_to[STARPU_DISK_RAM] = NULL,
-	.copy2d_data_to[STARPU_MIC_RAM] = NULL,
-	.copy2d_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy3d_data_to[STARPU_UNUSED] = NULL,
 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy3d_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy3d_data_to[STARPU_DISK_RAM] = NULL,
-	.copy3d_data_to[STARPU_MIC_RAM] = NULL,
-	.copy3d_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	.wait_request_completion = NULL,
 	.wait_request_completion = NULL,
 	.test_request_completion = NULL,
 	.test_request_completion = NULL,
@@ -1874,31 +1854,15 @@ struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 #else
 #else
 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu,
 	.copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda,
 	.copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu,
 	.copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda,
 	.copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu,
 	.copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu,
 	.copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda,
 	.copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda,
-	.copy2d_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy2d_data_to[STARPU_DISK_RAM] = NULL,
-	.copy2d_data_to[STARPU_MIC_RAM] = NULL,
-	.copy2d_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy3d_data_to[STARPU_UNUSED] = NULL,
 #if 0
 #if 0
 	.copy3d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cpu,
 	.copy3d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cpu,
 	.copy3d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cuda,
 	.copy3d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy3d_data_from_cuda_to_cuda,
@@ -1906,10 +1870,6 @@ struct _starpu_node_ops _starpu_driver_cuda_node_ops =
 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy3d_data_to[STARPU_CPU_RAM] = NULL,
 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy3d_data_to[STARPU_CUDA_RAM] = NULL,
 #endif
 #endif
-	.copy3d_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy3d_data_to[STARPU_DISK_RAM] = NULL,
-	.copy3d_data_to[STARPU_MIC_RAM] = NULL,
-	.copy3d_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	.wait_request_completion = _starpu_cuda_wait_request_completion,
 	.wait_request_completion = _starpu_cuda_wait_request_completion,
 	.test_request_completion = _starpu_cuda_test_request_completion,
 	.test_request_completion = _starpu_cuda_test_request_completion,

+ 2 - 0
src/drivers/cuda/driver_cuda.h

@@ -22,6 +22,8 @@
 
 
 #include <common/config.h>
 #include <common/config.h>
 
 
+void _starpu_cuda_preinit(void);
+
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include <cuda_runtime_api.h>

+ 37 - 0
src/drivers/cuda/driver_cuda_init.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <core/workers.h>
+#include <drivers/cuda/driver_cuda.h>
+
+static struct starpu_driver_info driver_info = {
+	.name_upper = "CUDA",
+	.name_var = "CUDA",
+	.name_lower = "cuda",
+	.memory_kind = STARPU_CUDA_RAM,
+	.alpha = 13.33f,
+};
+
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "CUDA",
+	.worker_archtype = STARPU_CUDA_WORKER,
+};
+
+void _starpu_cuda_preinit(void)
+{
+	starpu_driver_info_register(STARPU_CUDA_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_CUDA_RAM, &memory_driver_info);
+}

+ 3 - 1
src/drivers/cuda/starpu_cublas.c

@@ -100,9 +100,11 @@ void starpu_cublas_shutdown(void)
 void starpu_cublas_set_stream(void)
 void starpu_cublas_set_stream(void)
 {
 {
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
+	unsigned workerid = starpu_worker_get_id_check();
+	int devid = starpu_worker_get_devid(workerid);
 	if (!_starpu_get_machine_config()->topology.cuda_th_per_dev ||
 	if (!_starpu_get_machine_config()->topology.cuda_th_per_dev ||
 		(!_starpu_get_machine_config()->topology.cuda_th_per_stream &&
 		(!_starpu_get_machine_config()->topology.cuda_th_per_stream &&
-		 _starpu_get_machine_config()->topology.nworkerpercuda > 1))
+		 _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devid] > 1))
 		cublasSetKernelStream(starpu_cuda_get_local_stream());
 		cublasSetKernelStream(starpu_cuda_get_local_stream());
 #endif
 #endif
 }
 }

+ 10 - 10
src/drivers/disk/driver_disk.c

@@ -23,6 +23,16 @@
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
 #include <datawizard/memory_nodes.h>
 #include <datawizard/memory_nodes.h>
 
 
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "Disk",
+	.worker_archtype = (enum starpu_worker_archtype) -1,
+};
+
+void _starpu_disk_preinit(void)
+{
+	starpu_memory_driver_info_register(STARPU_DISK_RAM, &memory_driver_info);
+}
+
 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel)
 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel)
 {
 {
 	STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_CPU_RAM);
 	STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_CPU_RAM);
@@ -251,21 +261,11 @@ void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, i
 
 
 struct _starpu_node_ops _starpu_driver_disk_node_ops =
 struct _starpu_node_ops _starpu_driver_disk_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu,
-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk,
 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu,
-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk,
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	/* TODO: copy2D/3D? */
 	/* TODO: copy2D/3D? */
 
 

+ 2 - 0
src/drivers/disk/driver_disk.h

@@ -22,6 +22,8 @@
 
 
 #include <datawizard/node_ops.h>
 #include <datawizard/node_ops.h>
 
 
+void _starpu_disk_preinit(void);
+
 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel);
 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel);
 
 
 int _starpu_disk_copy_disk_to_src(void * src, size_t src_offset, unsigned src_node, void * dst, unsigned dst_node, size_t size, void * async_channel);
 int _starpu_disk_copy_disk_to_src(void * src, size_t src_offset, unsigned src_node, void * dst, unsigned dst_node, size_t size, void * async_channel);

+ 5 - 20
src/drivers/max/driver_fpga.c

@@ -80,7 +80,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 	n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
 	n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
 	if (n != -1)
 	if (n != -1)
 	{
 	{
-		config->topology.nhwfpgafpgas = nfpgafpgas = n;
+		config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
 		return;
 		return;
 	}
 	}
 
 
@@ -134,7 +134,7 @@ void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 
 
         //LMemInterface addLMemInterface()
         //LMemInterface addLMemInterface()
         //// pour récupérer l'accès à la LMem
         //// pour récupérer l'accès à la LMem
-	config->topology.nhwfpgafpgas = nfpgafpgas = n;
+	config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
 }
 }
 
 
 unsigned _starpu_fpga_get_device_count(void)
 unsigned _starpu_fpga_get_device_count(void)
@@ -208,7 +208,7 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker)
 {
 {
 	int devid = worker->devid;
 	int devid = worker->devid;
 	//fpga_msg("successful till here");
 	//fpga_msg("successful till here");
-	_starpu_driver_start(worker, _STARPU_FUT_CPU_KEY, 1);
+	_starpu_driver_start(worker, STARPU_FPGA_WORKER, 1);
 	/* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
 	/* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
 	// TODO: drop test when we allocated a memory node for fpga
 	// TODO: drop test when we allocated a memory node for fpga
 	if (worker->memory_node != STARPU_MAIN_RAM)
 	if (worker->memory_node != STARPU_MAIN_RAM)
@@ -259,16 +259,11 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker
 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
 	{
 	{
 		_starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
 		_starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
-		//char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
-		//printf("chanel reserved: %d \n",chnl);
 
 
 		STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
 		STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
 		if (_starpu_get_disable_kernels() <= 0)
 		if (_starpu_get_disable_kernels() <= 0)
 		{
 		{
 			_STARPU_TRACE_START_EXECUTING();
 			_STARPU_TRACE_START_EXECUTING();
-			//int chnl = fpga_reserve_chanel_of_kernel_type(kernel_type);
-			//_starpu_fpga_transfer_data(_STARPU_TASK_GET_INTERFACES(task), j, chnl);
-			//fpga_release_chanel(chnl);
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 			_STARPU_TRACE_END_EXECUTING();
 			_STARPU_TRACE_END_EXECUTING();
 		}
 		}
@@ -308,7 +303,7 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 	j = _starpu_get_job_associated_to_task(task);
 	j = _starpu_get_job_associated_to_task(task);
 
 
 	/* can a cpu perform that task ? */
 	/* can a cpu perform that task ? */
-	if (!_STARPU_FPGA_MAY_PERFORM(j))
+	if (!_STARPU_MAY_PERFORM(j, FPGA))
 	{
 	{
 		/* put it and the end of the queue ... XXX */
 		/* put it and the end of the queue ... XXX */
 		_starpu_push_task_to_workers(task);
 		_starpu_push_task_to_workers(task);
@@ -399,7 +394,7 @@ int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
 	_starpu_free_all_automatically_allocated_buffers(memnode);
 	_starpu_free_all_automatically_allocated_buffers(memnode);
 
 
 	fpga_worker->worker_is_initialized = 0;
 	fpga_worker->worker_is_initialized = 0;
-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_FPGA_WORKER);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -642,21 +637,11 @@ struct _starpu_driver_ops _starpu_driver_fpga_ops =
 // TODO: transfers
 // TODO: transfers
 struct _starpu_node_ops _starpu_driver_fpga_node_ops =
 struct _starpu_node_ops _starpu_driver_fpga_node_ops =
 {
 {
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	//.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
 	//.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
 	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
 	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	//.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
 	//.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
 	.copy_interface_to[STARPU_FPGA_RAM] = NULL,
 	.copy_interface_to[STARPU_FPGA_RAM] = NULL,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
         .wait_request_completion = NULL,
         .wait_request_completion = NULL,
 	.test_request_completion = NULL,
 	.test_request_completion = NULL,

+ 2 - 0
src/drivers/max/driver_fpga.h

@@ -30,6 +30,8 @@
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
 #include <common/fxt.h>
 #include <common/fxt.h>
 
 
+void _starpu_fpga_preinit(void);
+
 #ifdef STARPU_USE_FPGA
 #ifdef STARPU_USE_FPGA
 typedef unsigned * fpga_mem;
 typedef unsigned * fpga_mem;
 
 

+ 37 - 0
src/drivers/max/driver_fpga_init.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <core/workers.h>
+#include <drivers/max/driver_fpga.h>
+
+static struct starpu_driver_info driver_info = {
+	.name_upper = "FPGA",
+	.name_var = "FPGA",
+	.name_lower = "fpga",
+	.memory_kind = STARPU_FPGA_RAM,
+	.alpha = 0.5,
+};
+
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "FPGA",
+	.worker_archtype = STARPU_FPGA_WORKER,
+};
+
+void _starpu_fpga_preinit(void)
+{
+	starpu_driver_info_register(STARPU_FPGA_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_FPGA_RAM, &memory_driver_info);
+}

+ 19 - 15
src/starpu_parameters.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2013       Thibaut Lambert
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,19 +14,24 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#ifndef _STARPU_PARAMETERS_H
-#define _STARPU_PARAMETERS_H
+#include <core/workers.h>
+#include <drivers/mic/driver_mic_source.h>
 
 
-/** @file */
+static struct starpu_driver_info driver_info = {
+	.name_upper = "MIC",
+	.name_var = "MIC",
+	.name_lower = "mic",
+	.memory_kind = STARPU_MIC_RAM,
+	.alpha = 0.5f,
+};
 
 
-/* Parameters which are not worth being added to ./configure options, but
- * still interesting to easily change */
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "MIC",
+	.worker_archtype = STARPU_MIC_WORKER,
+};
 
 
-/* Assumed relative performance ratios */
-/* TODO: benchmark a bit instead */
-#define _STARPU_CPU_ALPHA	1.0f
-#define _STARPU_CUDA_ALPHA	13.33f
-#define _STARPU_OPENCL_ALPHA	12.22f
-#define _STARPU_MIC_ALPHA	0.5f
-#define _STARPU_MPI_MS_ALPHA	1.0f
-#endif /* _STARPU_PARAMETERS_H */
+void _starpu_mic_preinit(void)
+{
+	starpu_driver_info_register(STARPU_MIC_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_MIC_RAM, &memory_driver_info);
+}

+ 4 - 14
src/drivers/mic/driver_mic_source.c

@@ -241,7 +241,7 @@ unsigned starpu_mic_device_get_count(void)
     struct _starpu_machine_config *config = _starpu_get_machine_config ();
     struct _starpu_machine_config *config = _starpu_get_machine_config ();
     struct _starpu_machine_topology *topology = &config->topology;
     struct _starpu_machine_topology *topology = &config->topology;
 
 
-    return topology->nmicdevices;
+    return topology->ndevices[STARPU_MIC_WORKER];
 }
 }
 
 
 starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
 starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
@@ -521,16 +521,16 @@ void *_starpu_mic_src_worker(void *arg)
 
 
 	/* unsigned memnode = baseworker->memory_node; */
 	/* unsigned memnode = baseworker->memory_node; */
 
 
-	_starpu_driver_start(baseworker, _STARPU_FUT_MIC_KEY, 0);
+	_starpu_driver_start(baseworker, STARPU_MIC_WORKER, 0);
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 	for (i = 1; i < worker_set->nworkers; i++)
 	for (i = 1; i < worker_set->nworkers; i++)
-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MIC_KEY, 0);
+		_starpu_worker_start(&worker_set->workers[i], STARPU_FUT_WORKER, 0);
 #endif
 #endif
 
 
 	// Current task for a thread managing a worker set has no sense.
 	// Current task for a thread managing a worker set has no sense.
 	_starpu_set_current_task(NULL);
 	_starpu_set_current_task(NULL);
 
 
-	for (i = 0; i < config->topology.nmiccores[devid]; i++)
+	for (i = 0; i < config->topology.nworker[STARPU_MIC_WORKER][devid]; i++)
 	{
 	{
 		struct _starpu_worker *worker = &config->workers[baseworkerid+i];
 		struct _starpu_worker *worker = &config->workers[baseworkerid+i];
 		snprintf(worker->name, sizeof(worker->name), "MIC %u core %u", devid, i);
 		snprintf(worker->name, sizeof(worker->name), "MIC %u core %u", devid, i);
@@ -686,21 +686,11 @@ void _starpu_mic_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, in
 /* TODO: MIC -> MIC */
 /* TODO: MIC -> MIC */
 struct _starpu_node_ops _starpu_driver_mic_node_ops =
 struct _starpu_node_ops _starpu_driver_mic_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mic_copy_interface_from_mic_to_cpu,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mic_copy_interface_from_mic_to_cpu,
-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
 	.copy_interface_to[STARPU_MIC_RAM] = NULL,
 	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mic_copy_data_from_mic_to_cpu,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mic_copy_data_from_mic_to_cpu,
-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
 	.copy_data_to[STARPU_MIC_RAM] = NULL,
 	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	/* TODO: copy2D/3D? */
 	/* TODO: copy2D/3D? */
 
 

+ 2 - 0
src/drivers/mic/driver_mic_source.h

@@ -23,6 +23,8 @@
 #include <starpu_mic.h>
 #include <starpu_mic.h>
 #include <common/config.h>
 #include <common/config.h>
 
 
+void _starpu_mic_preinit(void);
+
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 
 
 #include <source/COIProcess_source.h>
 #include <source/COIProcess_source.h>

+ 1 - 1
src/drivers/mpi/driver_mpi_common.c

@@ -127,7 +127,7 @@ void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node)
 {
 {
         struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
         struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
 
 
-        node->nb_cores = topology->nhwcpus;
+        node->nb_cores = topology->nhwworker[STARPU_CPU_WORKER][0];
 }
 }
 
 
 int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node)
 int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node)

+ 37 - 0
src/drivers/mpi/driver_mpi_init.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <core/workers.h>
+#include <drivers/mpi/driver_mpi_source.h>
+
+static struct starpu_driver_info driver_info = {
+	.name_upper = "MPI_MS",
+	.name_var = "MPI_MS",
+	.name_lower = "mpi_ms",
+	.memory_kind = STARPU_MPI_MS_RAM,
+	.alpha = 1.0f,
+};
+
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "MPI_MS",
+	.worker_archtype = STARPU_MPI_MS_WORKER,
+};
+
+void _starpu_mpi_ms_preinit(void)
+{
+	starpu_driver_info_register(STARPU_MPI_MS_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_MPI_MS_RAM, &memory_driver_info);
+}

+ 3 - 13
src/drivers/mpi/driver_mpi_source.c

@@ -310,17 +310,17 @@ void *_starpu_mpi_src_worker(void *arg)
 
 
                 /* unsigned memnode = baseworker->memory_node; */
                 /* unsigned memnode = baseworker->memory_node; */
 
 
-                _starpu_driver_start(baseworker, _STARPU_FUT_MPI_KEY, 0);
+                _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0);
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
                 for (i = 1; i < worker_set->nworkers; i++)
                 for (i = 1; i < worker_set->nworkers; i++)
-                        _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MPI_KEY, 0);
+                        _starpu_worker_start(&worker_set->workers[i], STARPU_MPI_WORKER, 0);
 #endif
 #endif
 
 
                 // Current task for a thread managing a worker set has no sense.
                 // Current task for a thread managing a worker set has no sense.
                 _starpu_set_current_task(NULL);
                 _starpu_set_current_task(NULL);
 
 
-                for (i = 0; i < config->topology.nmpicores[devid]; i++)
+                for (i = 0; i < config->topology.nworker[STARPU_MPI_MS_WORKER][devid]; i++)
                 {
                 {
                         struct _starpu_worker *worker = &config->workers[baseworkerid+i];
                         struct _starpu_worker *worker = &config->workers[baseworkerid+i];
                         snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i);
                         snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i);
@@ -546,20 +546,10 @@ void _starpu_mpi_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, in
 
 
 struct _starpu_node_ops _starpu_driver_mpi_node_ops =
 struct _starpu_node_ops _starpu_driver_mpi_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mpi_copy_interface_from_mpi_to_cpu,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_mpi_copy_interface_from_mpi_to_cpu,
-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
-	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
 	.copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_interface_from_mpi_to_mpi,
 	.copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_interface_from_mpi_to_mpi,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mpi_copy_data_from_mpi_to_cpu,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_mpi_copy_data_from_mpi_to_cpu,
-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
-	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
 	.copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_data_from_mpi_to_mpi,
 	.copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_data_from_mpi_to_mpi,
 
 
 	/* TODO: copy2D/3D? */
 	/* TODO: copy2D/3D? */

+ 2 - 1
src/drivers/mpi/driver_mpi_source.h

@@ -23,8 +23,9 @@
 #include <starpu_mpi_ms.h>
 #include <starpu_mpi_ms.h>
 #include <datawizard/node_ops.h>
 #include <datawizard/node_ops.h>
 
 
-#ifdef STARPU_USE_MPI_MASTER_SLAVE
+void _starpu_mpi_ms_preinit(void);
 
 
+#ifdef STARPU_USE_MPI_MASTER_SLAVE
 extern struct _starpu_node_ops _starpu_driver_mpi_node_ops;
 extern struct _starpu_node_ops _starpu_driver_mpi_node_ops;
 
 
 /** Array of structures containing all the informations useful to send
 /** Array of structures containing all the informations useful to send

+ 4 - 24
src/drivers/opencl/driver_opencl.c

@@ -71,7 +71,7 @@ _starpu_opencl_discover_devices(struct _starpu_machine_config *config)
 	/* As OpenCL must have been initialized before calling this function,
 	/* As OpenCL must have been initialized before calling this function,
 	 * `nb_device' is ensured to be correctly set. */
 	 * `nb_device' is ensured to be correctly set. */
 	STARPU_ASSERT(init_done == 1);
 	STARPU_ASSERT(init_done == 1);
-	config->topology.nhwopenclgpus = nb_devices;
+	config->topology.nhwdevices[STARPU_OPENCL_WORKER] = nb_devices;
 }
 }
 
 
 static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
 static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
@@ -623,7 +623,7 @@ int _starpu_opencl_driver_init(struct _starpu_worker *worker)
 {
 {
 	int devid = worker->devid;
 	int devid = worker->devid;
 
 
-	_starpu_driver_start(worker, _STARPU_FUT_OPENCL_KEY, 0);
+	_starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0);
 
 
 	_starpu_opencl_init_context(devid);
 	_starpu_opencl_init_context(devid);
 
 
@@ -817,7 +817,7 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *worker)
 		worker->current_task = task;
 		worker->current_task = task;
 
 
 	/* can OpenCL do that task ? */
 	/* can OpenCL do that task ? */
-	if (!_STARPU_OPENCL_MAY_PERFORM(j))
+	if (!_STARPU_MAY_PERFORM(j, OPENCL))
 	{
 	{
 		/* this is not a OpenCL task */
 		/* this is not a OpenCL task */
 		_starpu_worker_refuse_task(worker, task);
 		_starpu_worker_refuse_task(worker, task);
@@ -853,7 +853,7 @@ int _starpu_opencl_driver_deinit(struct _starpu_worker *worker)
         _starpu_opencl_deinit_context(devid);
         _starpu_opencl_deinit_context(devid);
 
 
 	worker->worker_is_initialized = 0;
 	worker->worker_is_initialized = 0;
-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_OPENCL_KEY);
+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1356,21 +1356,11 @@ int _starpu_opencl_is_direct_access_supported(unsigned node, unsigned handling_n
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = NULL,
-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = NULL,
-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	.wait_request_completion = NULL,
 	.wait_request_completion = NULL,
 	.test_request_completion = NULL,
 	.test_request_completion = NULL,
@@ -1382,21 +1372,11 @@ struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 #else
 #else
 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 struct _starpu_node_ops _starpu_driver_opencl_node_ops =
 {
 {
-	.copy_interface_to[STARPU_UNUSED] = NULL,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu,
 	.copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu,
-	.copy_interface_to[STARPU_CUDA_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl,
 	.copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl,
-	.copy_interface_to[STARPU_DISK_RAM] = NULL,
-	.copy_interface_to[STARPU_MIC_RAM] = NULL,
-	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 
 
-	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu,
-	.copy_data_to[STARPU_CUDA_RAM] = NULL,
 	.copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl,
 	.copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl,
-	.copy_data_to[STARPU_DISK_RAM] = NULL,
-	.copy_data_to[STARPU_MIC_RAM] = NULL,
-	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 
 	/* TODO: copy2D/3D? */
 	/* TODO: copy2D/3D? */
 
 

+ 2 - 0
src/drivers/opencl/driver_opencl.h

@@ -36,6 +36,8 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <datawizard/node_ops.h>
 #include <datawizard/node_ops.h>
 
 
+void _starpu_opencl_preinit(void);
+
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 struct _starpu_machine_config;
 struct _starpu_machine_config;
 void _starpu_opencl_discover_devices(struct _starpu_machine_config *config);
 void _starpu_opencl_discover_devices(struct _starpu_machine_config *config);

+ 37 - 0
src/drivers/opencl/driver_opencl_init.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <core/workers.h>
+#include <drivers/opencl/driver_opencl.h>
+
+static struct starpu_driver_info driver_info = {
+	.name_upper = "OpenCL",
+	.name_var = "OPENCL",
+	.name_lower = "opencl",
+	.memory_kind = STARPU_OPENCL_RAM,
+	.alpha = 12.22f,
+};
+
+static struct starpu_memory_driver_info memory_driver_info = {
+	.name_upper = "OpenCL",
+	.worker_archtype = STARPU_OPENCL_WORKER,
+};
+
+void _starpu_opencl_preinit(void)
+{
+	starpu_driver_info_register(STARPU_OPENCL_WORKER, &driver_info);
+	starpu_memory_driver_info_register(STARPU_OPENCL_RAM, &memory_driver_info);
+}

+ 3 - 6
src/profiling/bound.c

@@ -231,12 +231,9 @@ static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
 static void initialize_duration(struct bound_task *task)
 static void initialize_duration(struct bound_task *task)
 {
 {
 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
-	task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.nhwcpus);
-	task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.nhwcudagpus,NULL);
-	task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nhwopenclgpus,NULL);
-	task->duration[STARPU_FPGA_WORKER] = initialize_arch_duration(conf->topology.nhwfpgafpgas,NULL);
-	task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nhwmicdevices,conf->topology.nmiccores);
-	task->duration[STARPU_MPI_MS_WORKER] = initialize_arch_duration(conf->topology.nhwmpidevices,conf->topology.nmpicores);
+	enum starpu_worker_archtype type;
+	for (type = 0; type < STARPU_NARCH; type++)
+		task->duration[type] = initialize_arch_duration(conf->topology.nhwdevices[type], conf->topology.nworker[type]);
 }
 }
 
 
 static struct starpu_perfmodel_device device =
 static struct starpu_perfmodel_device device =

+ 1 - 1
src/profiling/profiling.c

@@ -577,7 +577,7 @@ int starpu_bus_get_ngpus(int busid)
 	int ngpus = bus_ngpus[busid];
 	int ngpus = bus_ngpus[busid];
 	if (!ngpus)
 	if (!ngpus)
 		/* Unknown number of GPUs, assume it's shared by all GPUs */
 		/* Unknown number of GPUs, assume it's shared by all GPUs */
-		ngpus = topology->ncudagpus+topology->nopenclgpus;
+		ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER];
 	return ngpus;
 	return ngpus;
 }
 }
 
 

+ 4 - 4
src/sched_policies/heteroprio.c

@@ -37,7 +37,7 @@
 #define DBL_MAX __DBL_MAX__
 #define DBL_MAX __DBL_MAX__
 #endif
 #endif
 
 
-#define STARPU_NB_TYPES (STARPU_MAX_WORKER+1)
+#define STARPU_NB_TYPES STARPU_NARCH
 
 
 /* A bucket corresponds to a Pair of priorities
 /* A bucket corresponds to a Pair of priorities
  * When a task is pushed with a priority X, it will be stored
  * When a task is pushed with a priority X, it will be stored
@@ -112,7 +112,7 @@ struct _starpu_heteroprio_data
 
 
 static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch)
 static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch)
 {
 {
-	if (arch > STARPU_MAX_WORKER)
+	if (arch >= STARPU_NARCH)
 		return 0;
 		return 0;
 	return STARPU_WORKER_TO_MASK(arch);
 	return STARPU_WORKER_TO_MASK(arch);
 }
 }
@@ -174,7 +174,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 	enum starpu_worker_archtype type;
 	enum starpu_worker_archtype type;
 
 
 	// By default each type of devices uses 1 bucket and no slow factor
 	// By default each type of devices uses 1 bucket and no slow factor
-	for (type = 0; type <= STARPU_MAX_WORKER; type++)
+	for (type = 0; type < STARPU_NARCH; type++)
 		if (starpu_worker_get_count_by_type(type) > 0)
 		if (starpu_worker_get_count_by_type(type) > 0)
 			starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1);
 			starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1);
 
 
@@ -183,7 +183,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 	for(prio=min_prio ; prio<=max_prio ; prio++)
 	for(prio=min_prio ; prio<=max_prio ; prio++)
 	{
 	{
 		// By default each type of devices uses 1 bucket and no slow factor
 		// By default each type of devices uses 1 bucket and no slow factor
-		for (type = 0; type <= STARPU_MAX_WORKER; type++)
+		for (type = 0; type < STARPU_NARCH; type++)
 			if (starpu_worker_get_count_by_type(type) > 0)
 			if (starpu_worker_get_count_by_type(type) > 0)
 				starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio);
 				starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio);
 	}
 	}

+ 3 - 3
src/sched_policies/modular_ez.c

@@ -112,8 +112,8 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 		nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count();
 		nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count();
 		if (starpu_memory_nodes_get_count() > nummaxids)
 		if (starpu_memory_nodes_get_count() > nummaxids)
 			nummaxids = starpu_memory_nodes_get_count();
 			nummaxids = starpu_memory_nodes_get_count();
-		if (STARPU_ANY_WORKER > nummaxids)
-			nummaxids = STARPU_ANY_WORKER;
+		if (STARPU_NARCH > nummaxids)
+			nummaxids = STARPU_NARCH;
 
 
 		if (sched == 0)
 		if (sched == 0)
 			decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK;
 			decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK;
@@ -154,7 +154,7 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 			/* Count available architecture types */
 			/* Count available architecture types */
 			enum starpu_worker_archtype type;
 			enum starpu_worker_archtype type;
 			nbelow = 0;
 			nbelow = 0;
-			for (type = STARPU_CPU_WORKER; type < STARPU_ANY_WORKER; type++)
+			for (type = 0; type < STARPU_NARCH; type++)
 			{
 			{
 				if (starpu_worker_get_count_by_type(type))
 				if (starpu_worker_get_count_by_type(type))
 				{
 				{

+ 0 - 1
src/sched_policies/parallel_heft.c

@@ -22,7 +22,6 @@
 #include <limits.h>
 #include <limits.h>
 #include <core/workers.h>
 #include <core/workers.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
-#include <starpu_parameters.h>
 #include <core/detect_combined_workers.h>
 #include <core/detect_combined_workers.h>
 #include <core/sched_policy.h>
 #include <core/sched_policy.h>
 #include <core/task.h>
 #include <core/task.h>

+ 2 - 0
src/util/fstarpu.c

@@ -81,6 +81,7 @@ static const intptr_t fstarpu_cuda_worker = STARPU_CUDA_WORKER;
 static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER;
 static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER;
 static const intptr_t fstarpu_mic_worker = STARPU_MIC_WORKER;
 static const intptr_t fstarpu_mic_worker = STARPU_MIC_WORKER;
 static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER;
 static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER;
+static const intptr_t fstarpu_narch = STARPU_NARCH;
 
 
 static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS;
 static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS;
 
 
@@ -174,6 +175,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if (!strcmp(s, "FSTARPU_OPENCL_WORKER"))	{ return fstarpu_opencl_worker; }
 	else if (!strcmp(s, "FSTARPU_OPENCL_WORKER"))	{ return fstarpu_opencl_worker; }
 	else if (!strcmp(s, "FSTARPU_MIC_WORKER"))	{ return fstarpu_mic_worker; }
 	else if (!strcmp(s, "FSTARPU_MIC_WORKER"))	{ return fstarpu_mic_worker; }
 	else if (!strcmp(s, "FSTARPU_ANY_WORKER"))	{ return fstarpu_any_worker; }
 	else if (!strcmp(s, "FSTARPU_ANY_WORKER"))	{ return fstarpu_any_worker; }
+	else if (!strcmp(s, "FSTARPU_NARCH"))	{ return fstarpu_narch; }
 
 
 	else if (!strcmp(s, "FSTARPU_NMAXBUFS"))	{ return fstarpu_nmaxbufs; }
 	else if (!strcmp(s, "FSTARPU_NMAXBUFS"))	{ return fstarpu_nmaxbufs; }
 
 

+ 1 - 1
src/util/openmp_runtime_support_omp_api.c

@@ -65,7 +65,7 @@ int starpu_omp_get_max_threads()
 
 
 int starpu_omp_get_num_procs (void)
 int starpu_omp_get_num_procs (void)
 {
 {
-	/* starpu_cpu_worker_get_count defined as topology.ncpus */
+	/* starpu_cpu_worker_get_count defined as topology.nworkers[STARPU_CPU_WORKER] */
 	return starpu_cpu_worker_get_count();
 	return starpu_cpu_worker_get_count();
 }
 }
 
 

+ 2 - 0
tests/Makefile.am

@@ -17,6 +17,8 @@ include $(top_srcdir)/starpu.mk
 
 
 AM_CFLAGS += -Wno-unused
 AM_CFLAGS += -Wno-unused
 AM_CXXFLAGS += -Wno-unused
 AM_CXXFLAGS += -Wno-unused
+AM_FFLAGS += -Wno-unused
+AM_FCFLAGS += -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS)

+ 5 - 1
tests/datawizard/in_place_partition.c

@@ -30,7 +30,11 @@ int main(int argc, char **argv)
 	int ret;
 	int ret;
 	unsigned n, i, size;
 	unsigned n, i, size;
 
 
-	ret = starpu_initialize(NULL, &argc, &argv);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	conf.nfpga = 0;
+
+	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 

+ 5 - 1
tests/datawizard/wt_broadcast.c

@@ -90,7 +90,11 @@ int main(void)
 {
 {
 	int ret;
 	int ret;
 
 
-	ret = starpu_init(NULL);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	conf.nfpga = 0;
+
+	ret = starpu_init(&conf);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 

+ 5 - 1
tests/microbenchs/prefetch_data_on_node.c

@@ -102,7 +102,11 @@ int main(int argc, char **argv)
 {
 {
 	int ret;
 	int ret;
 
 
-	ret = starpu_initialize(NULL, &argc, &argv);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	conf.nfpga = 0;
+
+	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 

+ 1 - 13
tools/dev/checker/rename_internal.sed

@@ -26,9 +26,6 @@ s/\b_starpu_get_memory_node_description\b/_starpu_memory_node_get_description/g
 
 
 s/\bheft_policy\b/_starpu_sched_heft_policy/g
 s/\bheft_policy\b/_starpu_sched_heft_policy/g
 s/\bstruct starpu_priority_taskq_s\b/struct _starpu_priority_taskq/g
 s/\bstruct starpu_priority_taskq_s\b/struct _starpu_priority_taskq/g
-s/\bSTARPU_FUT_APPS_KEY\b/_STARPU_FUT_APPS_KEY/g
-s/\bSTARPU_FUT_CPU_KEY\b/_STARPU_FUT_CPU_KEY/g
-s/\bSTARPU_FUT_CUDA_KEY\b/_STARPU_FUT_CUDA_KEY/g
 s/\bSTARPU_FUT_DATA_COPY\b/_STARPU_FUT_DATA_COPY/g
 s/\bSTARPU_FUT_DATA_COPY\b/_STARPU_FUT_DATA_COPY/g
 s/\bSTARPU_FUT_DO_PROBE3STR\b/_STARPU_FUT_DO_PROBE3STR/g
 s/\bSTARPU_FUT_DO_PROBE3STR\b/_STARPU_FUT_DO_PROBE3STR/g
 s/\bSTARPU_FUT_DO_PROBE4STR\b/_STARPU_FUT_DO_PROBE4STR/g
 s/\bSTARPU_FUT_DO_PROBE4STR\b/_STARPU_FUT_DO_PROBE4STR/g
@@ -46,7 +43,6 @@ s/\bSTARPU_FUT_END_PUSH_OUTPUT\b/_STARPU_FUT_END_PUSH_OUTPUT/g
 s/\bSTARPU_FUT_JOB_POP\b/_STARPU_FUT_JOB_POP/g
 s/\bSTARPU_FUT_JOB_POP\b/_STARPU_FUT_JOB_POP/g
 s/\bSTARPU_FUT_JOB_PUSH\b/_STARPU_FUT_JOB_PUSH/g
 s/\bSTARPU_FUT_JOB_PUSH\b/_STARPU_FUT_JOB_PUSH/g
 s/\bSTARPU_FUT_NEW_MEM_NODE\b/_STARPU_FUT_NEW_MEM_NODE/g
 s/\bSTARPU_FUT_NEW_MEM_NODE\b/_STARPU_FUT_NEW_MEM_NODE/g
-s/\bSTARPU_FUT_OPENCL_KEY\b/_STARPU_FUT_OPENCL_KEY/g
 s/\bSTARPU_FUT_SET_PROFILING\b/_STARPU_FUT_SET_PROFILING/g
 s/\bSTARPU_FUT_SET_PROFILING\b/_STARPU_FUT_SET_PROFILING/g
 s/\bSTARPU_FUT_START_ALLOC\b/_STARPU_FUT_START_ALLOC/g
 s/\bSTARPU_FUT_START_ALLOC\b/_STARPU_FUT_START_ALLOC/g
 s/\bSTARPU_FUT_START_ALLOC_REUSE\b/_STARPU_FUT_START_ALLOC_REUSE/g
 s/\bSTARPU_FUT_START_ALLOC_REUSE\b/_STARPU_FUT_START_ALLOC_REUSE/g
@@ -123,10 +119,6 @@ s/\bSTARPU_TRACE_WORK_STEALING\b/_STARPU_TRACE_WORK_STEALING/g
 #s/\bSTARPU_DEFAULT_BETA\b/_STARPU_DEFAULT_BETA/g
 #s/\bSTARPU_DEFAULT_BETA\b/_STARPU_DEFAULT_BETA/g
 #s/\bSTARPU_DEFAULT_GAMMA\b/_STARPU_DEFAULT_GAMMA/g
 #s/\bSTARPU_DEFAULT_GAMMA\b/_STARPU_DEFAULT_GAMMA/g
 #s/\bSTARPU_CALIBRATION_MINIMUM\b/_STARPU_CALIBRATION_MINIMUM/g
 #s/\bSTARPU_CALIBRATION_MINIMUM\b/_STARPU_CALIBRATION_MINIMUM/g
-#s/\bSTARPU_CPU_ALPHA\b/_STARPU_CPU_ALPHA/g
-#s/\bSTARPU_CUDA_ALPHA\b/_STARPU_CUDA_ALPHA/g
-#s/\bSTARPU_OPENCL_ALPHA\b/_STARPU_OPENCL_ALPHA/g
-#s/\bSTARPU_GORDON_ALPHA\b/_STARPU_GORDON_ALPHA/g
 #
 #
 #s/\bstarpu_memory_node_to_devid\b/_starpu_memory_node_to_devid/g
 #s/\bstarpu_memory_node_to_devid\b/_starpu_memory_node_to_devid/g
 #s/\bstarpu_memchunk_recently_used\b/_starpu_memchunk_recently_used/g
 #s/\bstarpu_memchunk_recently_used\b/_starpu_memchunk_recently_used/g
@@ -136,11 +128,7 @@ s/\bSTARPU_TRACE_WORK_STEALING\b/_STARPU_TRACE_WORK_STEALING/g
 #s/\bstarpu_data_end_reduction_mode_terminate\b/_starpu_data_end_reduction_mode_terminate/g
 #s/\bstarpu_data_end_reduction_mode_terminate\b/_starpu_data_end_reduction_mode_terminate/g
 #s/\bcreate_request_to_fetch_data\b/_starpu_create_request_to_fetch_data/g
 #s/\bcreate_request_to_fetch_data\b/_starpu_create_request_to_fetch_data/g
 #
 #
-#s/\bSTARPU_CPU_MAY_PERFORM\b/_STARPU_CPU_MAY_PERFORM/g
-#s/\bSTARPU_CUDA_MAY_PERFORM\b/_STARPU_CUDA_MAY_PERFORM/g
-#s/\bSTARPU_SPU_MAY_PERFORM\b/_STARPU_SPU_MAY_PERFORM/g
-#s/\bSTARPU_GORDON_MAY_PERFORM\b/_STARPU_GORDON_MAY_PERFORM/g
-#s/\bSTARPU_OPENCL_MAY_PERFORM\b/_STARPU_OPENCL_MAY_PERFORM/g
+#s/\bSTARPU_MAY_PERFORM\b/_STARPU_MAY_PERFORM/g
 #
 #
 #s/\bSTARPU_TAG_SIZE\b/_STARPU_TAG_SIZE/g
 #s/\bSTARPU_TAG_SIZE\b/_STARPU_TAG_SIZE/g
 #s/\bSTARPU_HTBL_NODE_SIZE\b/_STARPU_HTBL_NODE_SIZE/g
 #s/\bSTARPU_HTBL_NODE_SIZE\b/_STARPU_HTBL_NODE_SIZE/g

+ 1 - 0
tools/starpu_perfmodel_display.c

@@ -156,6 +156,7 @@ int main(int argc, char **argv)
 #endif
 #endif
 
 
 	parse_args(argc, argv);
 	parse_args(argc, argv);
+	starpu_drivers_preinit();
 	starpu_perfmodel_initialize();
 	starpu_perfmodel_initialize();
 
 
         if (plist)
         if (plist)

+ 1 - 0
tools/starpu_perfmodel_plot.c

@@ -555,6 +555,7 @@ int main(int argc, char **argv)
 #endif
 #endif
 
 
 	parse_args(argc, argv, &options, &directory);
 	parse_args(argc, argv, &options, &directory);
+	starpu_drivers_preinit();
 	starpu_perfmodel_initialize();
 	starpu_perfmodel_initialize();
 
 
 	if (options.directory)
 	if (options.directory)