浏览代码

Fix cuda/opencl/cuda/mic/mpims ordering coherency

Samuel Thibault 4 年之前
父节点
当前提交
cd4a651f76

+ 1 - 1
Makefile.am

@@ -82,8 +82,8 @@ versinclude_HEADERS = 				\
 	include/starpu_util.h			\
 	include/starpu_fxt.h			\
 	include/starpu_cuda.h			\
-	include/starpu_fpga.h			\
 	include/starpu_opencl.h			\
+	include/starpu_fpga.h			\
 	include/starpu_openmp.h			\
 	include/starpu_sink.h			\
 	include/starpu_mic.h			\

+ 3 - 3
doc/doxygen/doxygen.cfg

@@ -1613,10 +1613,10 @@ INCLUDE_FILE_PATTERNS  =
 # undefined via #undef or recursively expanded use the := operator
 # instead of the = operator.
 
-PREDEFINED             = STARPU_USE_OPENCL=1 \
-                         STARPU_USE_CUDA=1 \
-                         STARPU_USE_MIC=1 \
+PREDEFINED             = STARPU_USE_CUDA=1 \
+                         STARPU_USE_OPENCL=1 \
                          STARPU_USE_FPGA=1 \
+                         STARPU_USE_MIC=1 \
 			 STARPU_USE_MPI=1 \
 			 STARPU_HAVE_HWLOC=1 \
 			 STARPU_USE_SC_HYPERVISOR=1 \

+ 13 - 5
examples/cpp/add_vectors_interface.cpp

@@ -171,6 +171,7 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	.ram_to_ram = NULL,
 	.ram_to_cuda = NULL,
 	.ram_to_opencl = NULL,
+	.ram_to_fpga = NULL,
 	.ram_to_mic = NULL,
 
 	.cuda_to_ram = NULL,
@@ -181,6 +182,8 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	.opencl_to_cuda = NULL,
 	.opencl_to_opencl = NULL,
 
+	.fpga_to_ram = NULL,
+
 	.mic_to_ram = NULL,
 
 	.ram_to_mpi_ms = NULL,
@@ -195,13 +198,16 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	.opencl_to_ram_async = NULL,
 	.opencl_to_opencl_async = NULL,
 
-	.ram_to_mpi_ms_async = NULL,
-	.mpi_ms_to_ram_async = NULL,
-	.mpi_ms_to_mpi_ms_async = NULL,
+	.ram_to_fpga_async = NULL,
+	.fpga_to_ram_async = NULL,
 
 	.ram_to_mic_async = NULL,
 	.mic_to_ram_async = NULL,
 
+	.ram_to_mpi_ms_async = NULL,
+	.mpi_ms_to_ram_async = NULL,
+	.mpi_ms_to_mpi_ms_async = NULL,
+
 	.any_to_any = vector_interface_copy_any_to_any,
 };
 #else
@@ -213,6 +219,7 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	NULL,
 	NULL,
 	NULL,
+	NULL,
 
 	NULL,
 	NULL,
@@ -225,8 +232,6 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 	NULL,
 
 	NULL,
-	NULL,
-	NULL,
 
 	NULL,
 	NULL,
@@ -242,10 +247,13 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
 	NULL,
 	NULL,
+
+	NULL,
 	NULL,
 
 	NULL,
 	NULL,
+	NULL,
 
 	vector_interface_copy_any_to_any,
 };

+ 40 - 40
include/starpu.h

@@ -254,6 +254,40 @@ struct starpu_conf
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 
 	/**
+	   If this flag is set, the FPGA workers will be attached to
+	   the FPGA devices specified in the
+	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
+	   affects the FPGA devices in a round-robin fashion. This
+	   can also be specified with the environment variable \ref
+	   STARPU_WORKERS_FPGAID.
+	   (default = 0)
+	*/
+        unsigned use_explicit_workers_fpga_deviceid;
+
+	/**
+	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
+	   is set, this array contains the logical identifiers of the
+	   FPGA devices to be used.
+	*/
+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
+
+#ifdef STARPU_USE_FPGA
+	/**
+           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
+	   This is an array of starpu_max_load, the last of which shall have
+	   file set to NULL. In order to use all available devices,
+	   starpu_max_load::engine_id_pattern can be set to "*", but only the
+           last non-NULL entry can be set so.
+
+	   If this is not set, it is assumed that the basic static SLiC
+           interface is used.
+        */
+	struct starpu_max_load *fpga_load;
+#else
+	void *fpga_load;
+#endif
+
+	/**
 	   If this flag is set, the MIC workers will be attached to
 	   the MIC devices specified in the array
 	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
@@ -290,40 +324,6 @@ struct starpu_conf
 	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
 
 	/**
-	   If this flag is set, the FPGA workers will be attached to
-	   the FPGA devices specified in the
-	   starpu_conf::workers_fpga_deviceid array. Otherwise, StarPU
-	   affects the FPGA devices in a round-robin fashion. This
-	   can also be specified with the environment variable \ref
-	   STARPU_WORKERS_FPGAID.
-	   (default = 0)
-	*/
-        unsigned use_explicit_workers_fpga_deviceid;
-
-	/**
-	   If the starpu_conf::use_explicit_workers_fpga_deviceid flag
-	   is set, this array contains the logical identifiers of the
-	   FPGA devices to be used.
-	*/
-	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
-
-#ifdef STARPU_USE_FPGA
-	/**
-           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
-	   This is an array of starpu_max_load, the last of which shall have
-	   file set to NULL. In order to use all available devices,
-	   starpu_max_load::engine_id_pattern can be set to "*", but only the
-           last non-NULL entry can be set so.
-
-	   If this is not set, it is assumed that the basic static SLiC
-           interface is used.
-        */
-	struct starpu_max_load *fpga_load;
-#else
-	void *fpga_load;
-#endif
-
-	/**
 	   If this flag is set, StarPU will recalibrate the bus.  If
 	   this value is equal to -1, the default value is used. This
 	   can also be specified with the environment variable \ref
@@ -658,6 +658,12 @@ int starpu_asynchronous_cuda_copy_disabled(void);
 int starpu_asynchronous_opencl_copy_disabled(void);
 
 /**
+   Return 1 if asynchronous data transfers between CPU and FPGA
+   devices are disabled.
+*/
+int starpu_asynchronous_fpga_copy_disabled(void);
+
+/**
    Return 1 if asynchronous data transfers between CPU and MIC devices
    are disabled.
 */
@@ -669,12 +675,6 @@ int starpu_asynchronous_mic_copy_disabled(void);
 */
 int starpu_asynchronous_mpi_ms_copy_disabled(void);
 
-/**
-   Return 1 if asynchronous data transfers between CPU and FPGA
-   devices are disabled.
-*/
-int starpu_asynchronous_fpga_copy_disabled(void);
-
 void starpu_display_stats(void);
 
 void starpu_get_version(int *major, int *minor, int *release);

+ 14 - 14
include/starpu_config.h.in

@@ -65,6 +65,13 @@
 #undef STARPU_USE_OPENCL
 
 /**
+   Defined when StarPU has been installed with FPGA support. It should
+   be used in your code to detect the availability of FPGA.
+   @ingroup API_FPGA_Extensions
+*/
+#undef STARPU_USE_FPGA
+
+/**
    Defined when StarPU has been installed with MIC support. It should
    be used in your code to detect the availability of MIC.
    @ingroup API_MIC_Extensions
@@ -80,13 +87,6 @@
 #undef STARPU_USE_MPI_MASTER_SLAVE
 
 /**
-   Defined when StarPU has been installed with FPGA support. It should
-   be used in your code to detect the availability of FPGA.
-   @ingroup API_FPGA_Extensions
-*/
-#undef STARPU_USE_FPGA
-
-/**
    Defined when StarPU has been installed with OpenMP Runtime support.
    It should be used in your code to detect the availability of the
    runtime support for OpenMP.
@@ -224,13 +224,6 @@
 #undef STARPU_MAXCUDADEVS
 
 /**
-   Define the maximum number of FPGA devices that are supported by
-   StarPU.
-   @ingroup API_FPGA_Extensions
- */
-#undef STARPU_MAXFPGADEVS
-
-/**
    Define the maximum number of OpenCL devices that are supported by
    StarPU.
    @ingroup API_OpenCL_Extensions
@@ -238,6 +231,13 @@
 #undef STARPU_MAXOPENCLDEVS
 
 /**
+   Define the maximum number of FPGA devices that are supported by
+   StarPU.
+   @ingroup API_FPGA_Extensions
+ */
+#undef STARPU_MAXFPGADEVS
+
+/**
    Define the maximum number of MIC devices that are supported by
    StarPU.
    @ingroup API_MIC_Extensions

+ 40 - 40
include/starpu_data_interfaces.h

@@ -124,16 +124,16 @@ struct starpu_data_copy_methods
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node MIC node. Return 0 on success.
+	   dst_node FPGA node. Return 0 on success.
 	*/
-	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node FPGA node. Return 0 on success.
+	   dst_node MIC node. Return 0 on success.
 	*/
-	int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
@@ -179,17 +179,17 @@ struct starpu_data_copy_methods
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MIC node to the \p dst_interface interface on the \p
+	   \p src_node FPGA node to the \p dst_interface interface on the \p
 	   dst_node CPU node. Return 0 on success.
 	*/
-	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node FPGA node to the \p dst_interface interface on the \p
+	   \p src_node MIC node to the \p dst_interface interface on the \p
 	   dst_node CPU node. Return 0 on success.
 	*/
-	int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
@@ -288,32 +288,22 @@ struct starpu_data_copy_methods
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node MPI Slave node, with the given even. Must return 0 if the
-	   transfer was actually completed completely synchronously, or
-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
-	   should be awaited for by the core.
-	*/
-	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
-
-	/**
-	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MPI Slave node to the \p dst_interface interface on
-	   the \p dst_node CPU node, with the given event. Must return 0 if
-	   the transfer was actually completed completely synchronously, or
-	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
-	   should be awaited for by the core.
+	   dst_node FPGA node. Must return 0 if the transfer was actually
+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
+	   some transfers are still ongoing and should be awaited for by the
+	   core.
 	*/
-	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node MPI Slave node to the \p dst_interface interface on
-	   the \p dst_node MPI Slave node, using the given stream. Must
-	   return 0 if the transfer was actually completed completely
-	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
-	   still ongoing and should be awaited for by the core.
+	   \p src_node FPGA node to the \p dst_interface interface on the \p
+	   dst_node CPU node. Must return 0 if the transfer was actually
+	   completed completely synchronously, or <c>-EAGAIN</c> if at least
+	   some transfers are still ongoing and should be awaited for by the
+	   core.
 	*/
-	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
@@ -338,22 +328,32 @@ struct starpu_data_copy_methods
 	/**
 	   Define how to copy data from the \p src_interface interface on the
 	   \p src_node CPU node to the \p dst_interface interface on the \p
-	   dst_node FPGA node. Must return 0 if the transfer was actually
-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
-	   some transfers are still ongoing and should be awaited for by the
-	   core.
+	   dst_node MPI Slave node, with the given even. Must return 0 if the
+	   transfer was actually completed completely synchronously, or
+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
+	   should be awaited for by the core.
 	*/
-	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the
-	   \p src_node FPGA node to the \p dst_interface interface on the \p
-	   dst_node CPU node. Must return 0 if the transfer was actually
-	   completed completely synchronously, or <c>-EAGAIN</c> if at least
-	   some transfers are still ongoing and should be awaited for by the
-	   core.
+	   \p src_node MPI Slave node to the \p dst_interface interface on
+	   the \p dst_node CPU node, with the given event. Must return 0 if
+	   the transfer was actually completed completely synchronously, or
+	   <c>-EAGAIN</c> if at least some transfers are still ongoing and
+	   should be awaited for by the core.
 	*/
-	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
+	int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
+
+	/**
+	   Define how to copy data from the \p src_interface interface on the
+	   \p src_node MPI Slave node to the \p dst_interface interface on
+	   the \p dst_node MPI Slave node, using the given stream. Must
+	   return 0 if the transfer was actually completed completely
+	   synchronously, or <c>-EAGAIN</c> if at least some transfers are
+	   still ongoing and should be awaited for by the core.
+	*/
+	int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
 
 	/**
 	   Define how to copy data from the \p src_interface interface on the

+ 1 - 1
include/starpu_driver.h

@@ -53,12 +53,12 @@ struct starpu_driver
 	{
 		unsigned cpu_id;
 		unsigned cuda_id;
-		unsigned fpga_id;
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 		cl_device_id opencl_id;
 #else
 		unsigned opencl_id;
 #endif
+		unsigned fpga_id;
 	} id;
 };
 

+ 35 - 40
include/starpu_task.h

@@ -68,16 +68,16 @@ extern "C"
 /**
    To be used when setting the field starpu_codelet::where (or
    starpu_task::where) to specify the codelet (or the task) may be
-   executed on a MAX FPGA.
+   executed on a OpenCL processing unit.
 */
-#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
+#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
 
 /**
    To be used when setting the field starpu_codelet::where (or
    starpu_task::where) to specify the codelet (or the task) may be
-   executed on a OpenCL processing unit.
+   executed on a MAX FPGA.
 */
-#define STARPU_OPENCL	STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER)
+#define STARPU_FPGA	STARPU_WORKER_TO_MASK(STARPU_FPGA_WORKER)
 
 /**
    To be used when setting the field starpu_codelet::where (or
@@ -180,14 +180,14 @@ typedef void (*starpu_cpu_func_t)(void **, void*);
 typedef void (*starpu_cuda_func_t)(void **, void*);
 
 /**
-   FPGA implementation of a codelet.
+   OpenCL implementation of a codelet.
 */
-typedef void (*starpu_fpga_func_t)(void **, void*);
+typedef void (*starpu_opencl_func_t)(void **, void*);
 
 /**
-   OpenCL implementation of a codelet.
+   FPGA implementation of a codelet.
 */
-typedef void (*starpu_opencl_func_t)(void **, void*);
+typedef void (*starpu_fpga_func_t)(void **, void*);
 
 /**
    MIC implementation of a codelet.
@@ -229,21 +229,21 @@ typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
 
 /**
    @deprecated
-   Setting the field starpu_codelet::fpga_func with this macro
+   Setting the field starpu_codelet::opencl_func with this macro
    indicates the codelet will have several implementations. The use of
    this macro is deprecated. One should always only define the field
-   starpu_codelet::fpga_funcs.
+   starpu_codelet::opencl_funcs.
 */
-#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
+#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
 
 /**
    @deprecated
-   Setting the field starpu_codelet::opencl_func with this macro
+   Setting the field starpu_codelet::fpga_func with this macro
    indicates the codelet will have several implementations. The use of
    this macro is deprecated. One should always only define the field
-   starpu_codelet::opencl_funcs.
+   starpu_codelet::fpga_funcs.
 */
-#define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
+#define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
 
 /**
    Value to set in starpu_codelet::nbuffers to specify that the
@@ -352,16 +352,16 @@ struct starpu_codelet
         /**
 	   @deprecated
 	   Optional field which has been made deprecated. One should
-	   use instead the starpu_codelet::fpga_funcs field.
+	   use instead the starpu_codelet::opencl_funcs field.
 	*/
-	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
+	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
 
         /**
 	   @deprecated
 	   Optional field which has been made deprecated. One should
-	   use instead the starpu_codelet::opencl_funcs field.
+	   use instead the starpu_codelet::fpga_funcs field.
 	*/
-	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
+	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
 
 	/**
 	   Optional array of function pointers to the CPU
@@ -396,23 +396,6 @@ struct starpu_codelet
 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
 
 	/**
-           Optional array of function pointers to the FPGA
-           implementations of the codelet. The functions prototype
-           must be:
-           \code{.c}
-           void fpga_func(void *buffers[], void *cl_arg)
-           \endcode
-           The first argument being the array of data managed by the
-           data management library, and the second argument is a
-           pointer to the argument passed from the field
-           starpu_task::cl_arg. If the field starpu_codelet::where is
-           set, then the field starpu_codelet::fpga_funcs is ignored if
-           ::STARPU_FPGA does not appear in the field
-           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
-        */
-	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
-
-	/**
 	   Optional array of flags for CUDA execution. They specify
 	   some semantic details about CUDA kernel execution, such as
 	   asynchronous execution.
@@ -441,6 +424,23 @@ struct starpu_codelet
 	char opencl_flags[STARPU_MAXIMPLEMENTATIONS];
 
 	/**
+           Optional array of function pointers to the FPGA
+           implementations of the codelet. The functions prototype
+           must be:
+           \code{.c}
+           void fpga_func(void *buffers[], void *cl_arg)
+           \endcode
+           The first argument being the array of data managed by the
+           data management library, and the second argument is a
+           pointer to the argument passed from the field
+           starpu_task::cl_arg. If the field starpu_codelet::where is
+           set, then the field starpu_codelet::fpga_funcs is ignored if
+           ::STARPU_FPGA does not appear in the field
+           starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
+        */
+	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
+
+	/**
 	   Optional array of function pointers to a function which
 	   returns the MIC implementation of the codelet. The
 	   functions prototype must be:
@@ -482,11 +482,6 @@ struct starpu_codelet
 	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 
 	/**
-	   fpga kernel type
-        */
-	char *fpga_kernel_type[STARPU_MAXIMPLEMENTATIONS];
-
-	/**
 	   Specify the number of arguments taken by the codelet. These
 	   arguments are managed by the DSM and are accessed from the
 	   <c>void *buffers[]</c> array. The constant argument passed

+ 5 - 5
src/Makefile.am

@@ -138,9 +138,9 @@ noinst_HEADERS = 						\
 	drivers/mp_common/sink_common.h				\
 	drivers/cpu/driver_cpu.h				\
 	drivers/cuda/driver_cuda.h				\
-	drivers/max/driver_fpga.h				\
 	drivers/opencl/driver_opencl.h				\
 	drivers/opencl/driver_opencl_utils.h			\
+	drivers/max/driver_fpga.h				\
 	debug/starpu_debug_helpers.h				\
 	drivers/mic/driver_mic_common.h				\
 	drivers/mic/driver_mic_source.h				\
@@ -322,10 +322,6 @@ if STARPU_HAVE_LEVELDB
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_leveldb.cpp
 endif
 
-if STARPU_USE_FPGA
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
-endif
-
 if STARPU_HAVE_HDF5
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_hdf5.c
 endif
@@ -352,6 +348,10 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.
 endif
 endif
 
+if STARPU_USE_FPGA
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/max/driver_fpga.c
+endif
+
 if STARPU_LINUX_SYS
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += core/disk_ops/disk_unistd_o_direct.c
 endif

+ 1 - 1
src/common/fxt.h

@@ -40,8 +40,8 @@
 #define _STARPU_FUT_APPS_KEY	0x100
 #define _STARPU_FUT_CPU_KEY	0x101
 #define _STARPU_FUT_CUDA_KEY	0x102
-#define _STARPU_FUT_FPGA_KEY	0x109
 #define _STARPU_FUT_OPENCL_KEY	0x103
+#define _STARPU_FUT_FPGA_KEY	0x109
 #define _STARPU_FUT_MIC_KEY	0x104
 #define _STARPU_FUT_MPI_KEY	0x106
 

+ 4 - 4
src/core/perfmodel/perfmodel.h

@@ -87,10 +87,6 @@ void _starpu_load_bus_performance_files(void);
 void _starpu_set_calibrate_flag(unsigned val);
 unsigned _starpu_get_calibrate_flag(void);
 
-#if defined(STARPU_USE_FPGA)
-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
-#endif
-
 #if defined(STARPU_USE_CUDA)
 unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 #endif
@@ -98,6 +94,10 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid);
 #endif
 
+#if defined(STARPU_USE_FPGA)
+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
+#endif
+
 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name);
 
 void _starpu_write_double(FILE *f, const char *format, double val);

+ 13 - 13
src/core/perfmodel/perfmodel_bus.c

@@ -119,6 +119,12 @@ static char cudadev_direct[STARPU_MAXNODES][STARPU_MAXNODES];
 static uint64_t opencl_size[STARPU_MAXCUDADEVS];
 #endif
 
+#ifdef STARPU_USE_OPENCL
+/* preference order of cores (logical indexes) */
+static unsigned opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXNUMANODES];
+static struct dev_timing opencldev_timing_per_numa[STARPU_MAXOPENCLDEVS*STARPU_MAXNUMANODES];
+#endif
+
 #ifdef STARPU_USE_FPGA
 /* preference order of cores (logical indexes) */
 static unsigned fpga_affinity_matrix[STARPU_MAXFPGADEVS][STARPU_MAXCPUS];
@@ -129,12 +135,6 @@ static double fpgadev_latency_dtoh[STARPU_MAXNODES] = {0.0};
 static struct dev_timing fpgadev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
 #endif
 
-#ifdef STARPU_USE_OPENCL
-/* preference order of cores (logical indexes) */
-static unsigned opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXNUMANODES];
-static struct dev_timing opencldev_timing_per_numa[STARPU_MAXOPENCLDEVS*STARPU_MAXNUMANODES];
-#endif
-
 #ifdef STARPU_USE_MIC
 static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
 static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
@@ -1122,13 +1122,6 @@ unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid)
 }
 #endif /* STARPU_USE_CUDA */
 
-#ifdef STARPU_USE_FPGA
-unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
-{
-        return fpga_affinity_matrix[fpgaid];
-}
-#endif /* STARPU_USE_FPGA */
-
 #ifdef STARPU_USE_OPENCL
 unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 {
@@ -1136,6 +1129,13 @@ unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid)
 }
 #endif /* STARPU_USE_OPENCL */
 
+#ifdef STARPU_USE_FPGA
+unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid)
+{
+        return fpga_affinity_matrix[fpgaid];
+}
+#endif /* STARPU_USE_FPGA */
+
 void starpu_bus_print_affinity(FILE *f)
 {
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)

+ 2 - 2
src/core/sched_policy.c

@@ -633,10 +633,10 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 					starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
 				else if (task->where == STARPU_CUDA && config->cuda_nodeid >= 0)
 					starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
-				else if (task->cl->where == STARPU_FPGA && config->fpga_nodeid >= 0)
-					starpu_prefetch_task_input_on_node(task, config->fpga_nodeid);
 				else if (task->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
 					starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
+				else if (task->cl->where == STARPU_FPGA && config->fpga_nodeid >= 0)
+					starpu_prefetch_task_input_on_node(task, config->fpga_nodeid);
 				else if (task->where == STARPU_MIC && config->mic_nodeid >= 0)
 					starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
 			}

+ 29 - 29
src/core/task.c

@@ -631,35 +631,6 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 		where |= STARPU_CPU;
 	}
 
-       /* FPGA */
-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
-	{
-		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
-	{
-		cl->fpga_funcs[0] = cl->fpga_func;
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	some_impl = 0;
-	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
-		if (cl->fpga_funcs[i])
-		{
-			some_impl = 1;
-			break;
-		}
-	if (some_impl && cl->fpga_func == 0)
-	{
-		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
-	}
-	if (some_impl && is_where_unset)
-	{
-		where |= STARPU_FPGA;
-	}
-
-
-
 	/* CUDA */
 	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
 	{
@@ -714,6 +685,35 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 		where |= STARPU_OPENCL;
 	}
 
+       /* FPGA */
+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS && cl->fpga_funcs[0])
+	{
+		_STARPU_DISP("[warning] [struct starpu_codelet] both fpga_func and fpga_funcs are set. Ignoring fpga_func.\n");
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	if (cl->fpga_func && cl->fpga_func != STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS)
+	{
+		cl->fpga_funcs[0] = cl->fpga_func;
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	some_impl = 0;
+	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
+		if (cl->fpga_funcs[i])
+		{
+			some_impl = 1;
+			break;
+		}
+	if (some_impl && cl->fpga_func == 0)
+	{
+		cl->fpga_func = STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS;
+	}
+	if (some_impl && is_where_unset)
+	{
+		where |= STARPU_FPGA;
+	}
+
+
+
 	some_impl = 0;
 	for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 		if (cl->mic_funcs[i])

+ 4 - 9
src/core/task.h

@@ -97,14 +97,14 @@ static inline starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct
 	return cl->cuda_funcs[nimpl];
 }
 
-static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
+static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
 {
-	return cl->fpga_funcs[nimpl];
+	return cl->opencl_funcs[nimpl];
 }
 
-static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
+static inline starpu_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
 {
-	return cl->opencl_funcs[nimpl];
+	return cl->fpga_funcs[nimpl];
 }
 
 static inline starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
@@ -122,11 +122,6 @@ static inline const char *_starpu_task_get_cpu_name_nth_implementation(struct st
 	return cl->cpu_funcs_name[nimpl];
 }
 
-static inline char *_starpu_task_get_fpga_kernel_type_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
-{
-	return cl->fpga_kernel_type[nimpl];
-}
-
 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
 

+ 39 - 38
src/core/topology.c

@@ -84,7 +84,7 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid);
 #define STARPU_NUMA_UNINITIALIZED (-2)
 #define STARPU_NUMA_MAIN_RAM (-1)
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 
 struct handle_entry
 {
@@ -464,7 +464,7 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  * Discover the topology of the machine
  */
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_FPGA)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
 						int *current, int *workers_gpuid,
 						const char *varname, unsigned nhwgpus,
@@ -572,30 +572,6 @@ static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *con
 }
 #endif
 
-#if defined(STARPU_USE_FPGA)
-static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
-{
-	struct _starpu_machine_topology *topology = &config->topology;
-	struct starpu_conf *uconf = &config->conf;
-
-        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
-					    ? NULL
-					    : (int *)uconf->workers_fpga_deviceid,
-					    &(config->current_fpga_deviceid),
-					    (int *)topology->workers_fpga_deviceid,
-					    "STARPU_WORKERS_FPGAID",
-					    topology->nhwfpgafpgas,
-					    STARPU_FPGA_WORKER);
-}
-
-static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
-{
-	unsigned i = ((config->current_fpga_deviceid++) % config->topology.nfpgafpgas);
-
-	return (int)config->topology.workers_fpga_deviceid[i];
-}
-#endif
-
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config)
 {
@@ -677,6 +653,30 @@ static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *c
 }
 #endif
 
+#if defined(STARPU_USE_FPGA)
+static void _starpu_initialize_workers_fpga_deviceid(struct _starpu_machine_config *config)
+{
+	struct _starpu_machine_topology *topology = &config->topology;
+	struct starpu_conf *uconf = &config->conf;
+
+        _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_fpga_deviceid == 0
+					    ? NULL
+					    : (int *)uconf->workers_fpga_deviceid,
+					    &(config->current_fpga_deviceid),
+					    (int *)topology->workers_fpga_deviceid,
+					    "STARPU_WORKERS_FPGAID",
+					    topology->nhwfpgafpgas,
+					    STARPU_FPGA_WORKER);
+}
+
+static inline int _starpu_get_next_fpga_deviceid (struct _starpu_machine_config *config)
+{
+	unsigned i = ((config->current_fpga_deviceid++) % config->topology.nfpgafpgas);
+
+	return (int)config->topology.workers_fpga_deviceid[i];
+}
+#endif
+
 #if 0
 #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
 static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
@@ -2657,17 +2657,18 @@ static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_confi
 	int cuda_globalbindid = -1;
 #endif
 
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
+	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
+	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
+	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
+#endif
+
 #if defined(STARPU_USE_FPGA)
 	unsigned fpga_init[STARPU_MAXFPGADEVS] = { };
 	unsigned fpga_memory_nodes[STARPU_MAXFPGADEVS];
 	unsigned fpga_bindid[STARPU_MAXFPGADEVS];
 #endif
 
-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
-	unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
-	unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
-	unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
-#endif
 #ifdef STARPU_USE_MIC
 	unsigned mic_init[STARPU_MAXMICDEVS] = { };
 	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
@@ -3144,8 +3145,8 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 	config->cpus_nodeid = -1;
 	config->cuda_nodeid = -1;
 	config->opencl_nodeid = -1;
-	config->mic_nodeid = -1;
 	config->fpga_nodeid = -1;
+	config->mic_nodeid = -1;
 	config->mpi_nodeid = -1;
 	for (i = 0; i < starpu_worker_get_count(); i++)
 	{
@@ -3165,18 +3166,18 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 					config->cuda_nodeid = -2;
 				break;
 
-                        case STARPU_FPGA_WORKER:
-				if (config->fpga_nodeid == -1)
-					config->fpga_nodeid = starpu_worker_get_memory_node(i);
-				else if (config->fpga_nodeid != (int) starpu_worker_get_memory_node(i))
-					config->fpga_nodeid = -2;
-				break;
 			case STARPU_OPENCL_WORKER:
 				if (config->opencl_nodeid == -1)
 					config->opencl_nodeid = starpu_worker_get_memory_node(i);
 				else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
 					config->opencl_nodeid = -2;
 				break;
+                        case STARPU_FPGA_WORKER:
+				if (config->fpga_nodeid == -1)
+					config->fpga_nodeid = starpu_worker_get_memory_node(i);
+				else if (config->fpga_nodeid != (int) starpu_worker_get_memory_node(i))
+					config->fpga_nodeid = -2;
+				break;
 			case STARPU_MIC_WORKER:
 				if (config->mic_nodeid == -1)
 					config->mic_nodeid = starpu_worker_get_memory_node(i);

+ 42 - 42
src/core/workers.c

@@ -402,14 +402,14 @@ static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype
 		starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
 		return func != NULL;
 	}
-        case STARPU_FPGA_WORKER:
+	case STARPU_OPENCL_WORKER:
 	{
-		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
+		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
 		return func != NULL;
 	}
-	case STARPU_OPENCL_WORKER:
+        case STARPU_FPGA_WORKER:
 	{
-		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
+		starpu_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl);
 		return func != NULL;
 	}
 	case STARPU_MIC_WORKER:
@@ -886,30 +886,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 			}
 #endif
 
-#if defined(STARPU_USE_FPGA)
-			case STARPU_FPGA_WORKER:
-				driver.id.fpga_id = workerarg->devid;
-				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
-				{
-					workerarg->run_by_starpu = 0;
-					break;
-				}
-				STARPU_PTHREAD_CREATE_ON(
-					workerarg->name,
-					&workerarg->worker_thread,
-					NULL,
-					_starpu_fpga_worker,
-					workerarg,
-					_starpu_simgrid_get_host_by_worker(workerarg));
-#ifdef STARPU_USE_FXT
-				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
-				while (!workerarg->worker_is_running)
-					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
-				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
-#endif
-				break;
-#endif
-
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 			case STARPU_OPENCL_WORKER:
 			{
@@ -937,6 +913,30 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 			}
 #endif
 
+#if defined(STARPU_USE_FPGA)
+			case STARPU_FPGA_WORKER:
+				driver.id.fpga_id = workerarg->devid;
+				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
+				{
+					workerarg->run_by_starpu = 0;
+					break;
+				}
+				STARPU_PTHREAD_CREATE_ON(
+					workerarg->name,
+					&workerarg->worker_thread,
+					NULL,
+					_starpu_fpga_worker,
+					workerarg,
+					_starpu_simgrid_get_host_by_worker(workerarg));
+#ifdef STARPU_USE_FXT
+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
+				while (!workerarg->worker_is_running)
+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
+#endif
+				break;
+#endif
+
 #ifdef STARPU_USE_MIC
 			case STARPU_MIC_WORKER:
 			{
@@ -1157,6 +1157,14 @@ int starpu_conf_init(struct starpu_conf *conf)
 		conf->disable_asynchronous_opencl_copy = 0;
 #endif
 
+#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
+	conf->disable_asynchronous_fpga_copy = 1;
+#else
+	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
+	if (conf->disable_asynchronous_fpga_copy == -1)
+		conf->disable_asynchronous_fpga_copy = 0;
+#endif
+
 #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
 	conf->disable_asynchronous_mic_copy = 1;
 #else
@@ -1173,14 +1181,6 @@ int starpu_conf_init(struct starpu_conf *conf)
 		conf->disable_asynchronous_mpi_ms_copy = 0;
 #endif
 
-#if defined(STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY)
-	conf->disable_asynchronous_fpga_copy = 1;
-#else
-	conf->disable_asynchronous_fpga_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY");
-	if (conf->disable_asynchronous_fpga_copy == -1)
-		conf->disable_asynchronous_fpga_copy = 0;
-#endif
-
 	/* 64MiB by default */
 	conf->trace_buffer_size = ((uint64_t) starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20;
 
@@ -1231,8 +1231,8 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 	if (main_thread_bind)
 		conf->reserve_ncpus++;
 	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables);
-        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables);
+        _starpu_conf_set_value_against_environment("STARPU_NFPGA", &conf->nfpga, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables);
 #ifdef STARPU_SIMGRID
@@ -1249,9 +1249,9 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables);
+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy, conf->precedence_over_environment_variables);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables);
-	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_FPGA_COPY", &conf->disable_asynchronous_fpga_copy, conf->precedence_over_environment_variables);
 }
 
 struct starpu_tree* starpu_workers_get_tree(void)
@@ -2176,14 +2176,14 @@ int starpu_asynchronous_cuda_copy_disabled(void)
 	return _starpu_config.conf.disable_asynchronous_cuda_copy;
 }
 
-int starpu_asynchronous_fpga_copy_disabled(void)
+int starpu_asynchronous_opencl_copy_disabled(void)
 {
-	return _starpu_config.conf.disable_asynchronous_fpga_copy;
+	return _starpu_config.conf.disable_asynchronous_opencl_copy;
 }
 
-int starpu_asynchronous_opencl_copy_disabled(void)
+int starpu_asynchronous_fpga_copy_disabled(void)
 {
-	return _starpu_config.conf.disable_asynchronous_opencl_copy;
+	return _starpu_config.conf.disable_asynchronous_fpga_copy;
 }
 
 int starpu_asynchronous_mic_copy_disabled(void)

+ 15 - 9
src/core/workers.h

@@ -313,6 +313,9 @@ struct _starpu_machine_topology
 	/** Actual number of OpenCL workers used by StarPU. */
 	unsigned nopenclgpus;
 
+        /* Actual number of Fpga workers used by StarPU. */
+	unsigned nfpgafpgas;
+
 	/** Actual number of MPI workers used by StarPU. */
 	unsigned nmpidevices;
         unsigned nhwmpidevices;
@@ -320,9 +323,6 @@ struct _starpu_machine_topology
 	unsigned nhwmpicores[STARPU_MAXMPIDEVS]; /**< Each MPI node has its set of cores. */
 	unsigned nmpicores[STARPU_MAXMPIDEVS];
 
-        /* Actual number of Fpga workers used by StarPU. */
-	unsigned nfpgafpgas;
-
 	/** Topology of MP nodes (MIC) as well as necessary
 	 * objects to communicate with them. */
 	unsigned nhwmicdevices;
@@ -356,7 +356,13 @@ struct _starpu_machine_topology
 	 */
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 
-        unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
+	/** Indicates the successive FPGA identifier that should be
+	 * used by the FPGA driver.  It is either filled according
+	 * to the user's explicit parameters (from starpu_conf) or
+	 * according to the STARPU_WORKERS_FPGAID env. variable.
+	 * Otherwise, they are taken in ID order.
+	 */
+	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
 
 	/*** Indicates the successive MIC devices that should be used
 	 * by the MIC driver.  It is either filled according to the
@@ -390,27 +396,27 @@ struct _starpu_machine_config
 	/** Which GPU(s) do we use for OpenCL ? */
 	int current_opencl_gpuid;
 
+        /* Which FPGA(s) do we use for FPGA? */
+	int current_fpga_deviceid;
+
 	/** Which MIC do we use? */
 	int current_mic_deviceid;
 
 	/** Which MPI do we use? */
 	int current_mpi_deviceid;
 
-        /* Which FPGA(s) do we use for FPGA? */
-	int current_fpga_deviceid;
-
 	/** Memory node for cpus, if only one */
 	int cpus_nodeid;
 	/** Memory node for CUDA, if only one */
 	int cuda_nodeid;
 	/** Memory node for OpenCL, if only one */
 	int opencl_nodeid;
+        /* Memory node for FPGA, if only one */
+	int fpga_nodeid;
 	/** Memory node for MIC, if only one */
 	int mic_nodeid;
 	/** Memory node for MPI, if only one */
 	int mpi_nodeid;
-        /* Memory node for FPGA, if only one */
-	int fpga_nodeid;
 
 	/** Separate out previous variables from per-worker data. */
 	char padding1[STARPU_CACHELINE_SIZE];

+ 1 - 5
src/datawizard/coherency.c

@@ -109,7 +109,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 	int i_ram = -1;
 	int i_gpu = -1;
 	int i_disk = -1;
-	int i_fpga = -1;
 
 	/* Revert to dumb strategy: take RAM unless only a GPU has it */
 	for (i = 0; i < nnodes; i++)
@@ -142,6 +141,7 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
 			if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
 			    starpu_node_get_kind(i) == STARPU_OPENCL_RAM ||
+			    starpu_node_get_kind(i) == STARPU_FPGA_RAM ||
 			    starpu_node_get_kind(i) == STARPU_MIC_RAM)
 				i_gpu = i;
 
@@ -150,8 +150,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 				i_ram = i;
 			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
 				i_disk = i;
-			if (starpu_node_get_kind(i) == STARPU_FPGA_RAM)
-				i_fpga = i;
 		}
 	}
 
@@ -161,8 +159,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 	/* no luck we have to use the disk memory */
 	else if (i_gpu != -1)
 		src_node = i_gpu;
-	else if (i_fpga != -1)
-		src_node = i_fpga;
 	else
 		src_node = i_disk;
 

+ 2 - 2
src/datawizard/node_ops.c

@@ -36,12 +36,12 @@ const char* _starpu_node_get_prefix(enum starpu_node_kind kind)
 			return "CUDA";
 		case STARPU_OPENCL_RAM:
 			return "OpenCL";
+		case STARPU_FPGA_RAM:
+			return "FPGA";
 		case STARPU_DISK_RAM:
 			return "Disk";
 		case STARPU_MIC_RAM:
 			return "MIC";
-		case STARPU_FPGA_RAM:
-			return "FPGA";
 		case STARPU_MPI_MS_RAM:
 			return "MPI_MS";
 		case STARPU_UNUSED:

+ 6 - 6
src/drivers/cpu/driver_cpu.c

@@ -534,6 +534,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 #endif
+#ifdef STARPU_USE_FPGA
+	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
+#endif
 	.copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk,
 #ifdef STARPU_USE_MIC
 	.copy_interface_to[STARPU_MIC_RAM] = _starpu_mic_copy_interface_from_cpu_to_mic,
@@ -545,9 +548,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
-#ifdef STARPU_USE_FPGA
-	//.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
-#endif
 
 	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data,
@@ -561,6 +561,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 #endif
+#ifdef STARPU_USE_FPGA
+	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
+#endif
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk,
 #ifdef STARPU_USE_MIC
 	.copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic,
@@ -572,9 +575,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
-#ifdef STARPU_USE_FPGA
-	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
-#endif
 
 	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,

+ 0 - 5
src/drivers/max/driver_fpga.c

@@ -259,16 +259,11 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker
 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
 	{
 		_starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
-		//char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
-		//printf("chanel reserved: %d \n",chnl);
 
 		STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
 		if (_starpu_get_disable_kernels() <= 0)
 		{
 			_STARPU_TRACE_START_EXECUTING();
-			//int chnl = fpga_reserve_chanel_of_kernel_type(kernel_type);
-			//_starpu_fpga_transfer_data(_STARPU_TASK_GET_INTERFACES(task), j, chnl);
-			//fpga_release_chanel(chnl);
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 			_STARPU_TRACE_END_EXECUTING();
 		}