Procházet zdrojové kódy

Merge branch 'fpga' of gitlab.inria.fr:starpu/starpu into fpga

Samuel Thibault před 4 roky
rodič
revize
3f76466f0e

+ 7 - 1
.gitignore

@@ -240,7 +240,13 @@ starpu.log
 /tools/starpu_machine_display.1
 /tools/starpu_perfmodel_display.1
 /tools/starpu_perfmodel_plot.1
-/test/*.log
+/tests/*.log
+/tests/fpga/*.max
+/tests/fpga/*.h
+/tests/fpga/max_fpga_advanced_static
+/tests/fpga/max_fpga_basic_static
+/tests/fpga/max_fpga_dynamic
+/tests/fpga/max_fpga_mux
 /examples/*.log
 /tests/main/declare_deps_after_submission
 /tests/main/declare_deps_after_submission_synchronous

+ 2 - 0
doc/doxygen/chapters/440_fpga_support.doxy

@@ -268,6 +268,8 @@ In the <c>main</c> function, there are four important steps:
 The rest of the application (data registration, task submission, etc.)
 is as usual with StarPU.
 
+Complete examples are available in <c>tests/fpga/*.c</c>
+
 \subsection FPGADataTransfers Data Transfers in StarPU/FPGA Applications
 
 The communication between the host and the DFE is done through the

+ 2 - 2
examples/Makefile.am

@@ -18,8 +18,8 @@
 #
 include $(top_srcdir)/starpu.mk
 
-AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
-AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
+AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(MAGMA_LIBS) $(HWLOC_LIBS) $(FXT_LIBS)

+ 1 - 1
examples/stencil/Makefile.am

@@ -15,7 +15,7 @@
 #
 include $(top_srcdir)/starpu.mk
 
-AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) $(FXT_LIBS)

+ 16 - 0
include/starpu.h

@@ -307,6 +307,22 @@ struct starpu_conf
 	*/
 	unsigned workers_fpga_deviceid[STARPU_NMAXWORKERS];
 
+#ifdef STARPU_USE_FPGA
+	/**
+           This allows to specify the Maxeler file(s) to be loaded on FPGAs.
+	   This is an array of starpu_max_load, the last of which shall have
+	   file set to NULL. In order to use all available devices,
+	   starpu_max_load::engine_id_pattern can be set to "*", but only the
+           last non-NULL entry can be set so.
+
+	   If this is not set, it is assumed that the basic static SLiC
+           interface is used.
+        */
+	struct starpu_max_load *fpga_load;
+#else
+	void *fpga_files;
+#endif
+
 	/**
 	   If this flag is set, StarPU will recalibrate the bus.  If
 	   this value is equal to -1, the default value is used. This

+ 18 - 0
include/starpu_fpga.h

@@ -21,6 +21,7 @@
 
 #if defined STARPU_USE_FPGA
 //#include <fpga.h>
+#include <MaxSLiCInterface.h>
 
 #ifdef __cplusplus
 extern "C"
@@ -40,6 +41,18 @@ struct starpu_fpga_data_trans
         float time;
 };
 
+/**
+   This specifies a Maxeler file to be loaded on some engines.
+ */
+struct starpu_max_load {
+	max_file_t *file;		/**< Provide the file to be loaded */
+	const char *engine_id_pattern;	/**< Provide the engine(s) on which to be loaded, following
+					     the Maxeler engine naming, i.e. typically
+                                             "local:0", "local:1", etc.
+                                             In an array of struct starpu_max_load, only one can have
+                                             the "*" specification.  */
+};
+
 void starpu_fpga_report_configuration(void);
 
 /**
@@ -52,6 +65,11 @@ int starpu_fpga_set_device(unsigned devid);
  */
 int starpu_fpga_is_silent();
 
+/**
+   Maxeler engine of the current worker
+ */
+max_engine_t *starpu_fpga_get_local_engine(void);
+
 /** @} */
 
 #ifdef __cplusplus

+ 1 - 1
mpi/examples/Makefile.am

@@ -107,7 +107,7 @@ NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -
 endif
 endif
 
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la -lm $(FXT_LIBS) $(MAGMA_LIBS)
 LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)

+ 1 - 1
mpi/tests/Makefile.am

@@ -83,7 +83,7 @@ NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -
 endif
 endif
 
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(FXT_LIBS) $(MAGMA_LIBS)
 LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)

+ 2 - 1
src/core/topology.c

@@ -1950,7 +1950,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 #endif
 			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
 				+ cuda_busy_cpus
-				+ topology->nopenclgpus;
+				+ topology->nopenclgpus
+				+ topology->nfpgafpgas;
 
 			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
 			if (avail_cpus < 0)

+ 120 - 49
src/drivers/max/driver_fpga.c

@@ -44,8 +44,10 @@
 
 //#define STARPU_MAXFPGADEVS 4
 /* the number of FPGA devices */
-static unsigned  nfpgafpgas = -1;
-static size_t global_mem[STARPU_MAXFPGADEVS] = { 128ULL*1024*1024*1024 };
+static unsigned  nfpgafpgas;
+static size_t global_mem[STARPU_MAXFPGADEVS];
+static max_engine_t *engines[STARPU_MAXFPGADEVS];
+static fpga_mem current_address[STARPU_MAXFPGADEVS];
 
 static void _starpu_fpga_limit_global_mem(unsigned );
 static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
@@ -55,39 +57,84 @@ void fpga_msg(char *msg)
 	printf(FPGA_OK "%s\n" NORMAL, msg);
 }
 
-void _starpu_init_fpga()
+max_engine_t *starpu_fpga_get_local_engine(void)
 {
-	nfpgafpgas = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
-	if(nfpgafpgas == -1)
-		nfpgafpgas =1;
-	STARPU_ASSERT( nfpgafpgas <= STARPU_MAXFPGADEVS);
+	int worker = starpu_worker_get_id_check();
+	int devid = starpu_worker_get_devid(worker);
 
-        //LMemInterface addLMemInterface()
-        //// pour récupérer l'accès à la LMem
+	return engines[devid];
 }
 
-#if 0
-int fpga_allocate_memory(fpga_mem *ptr, size_t size)
+void _starpu_init_fpga()
 {
-	//This allocates BYTES
-	char *msg1="You asked to allocate ";
-	//printf(KCYN "%s%d*%d\n" KBLU, msg1,size,sizeof(unsigned));
-	printf(FPGA_OK "%s%lu bytes\n" NORMAL, msg1,size);
-
-	*ptr =(fpga_mem) malloc(size);
-
-        if (*ptr == NULL)
-        	return 0;
-	else
-		return 1;
 }
-#endif
 
 void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
 {
 	//TODO: This is statically assigned, in the next round of integration
 	// I will have to read from the struct fpga in fpga
-	config->topology.nhwfpgafpgas = nfpgafpgas;
+	struct starpu_max_load *load = _starpu_config.conf.fpga_load;
+	const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION);
+	int n;
+
+	n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
+	if (n != -1)
+	{
+		config->topology.nhwfpgafpgas = nfpgafpgas = n;
+		return;
+	}
+
+	if (!load)
+	{
+		/* Nothing specified, single-FPGA execution with basic static
+                 * interface, file will be auto-loaded by SLiC. */
+		n = 1;
+	}
+	else
+	{
+		struct starpu_max_load *cur, *star = NULL;
+		size_t nstar = 0;
+
+		/* First check if we have a star, we will want to subtract non-star loads from it */
+		for (cur =  load; cur->engine_id_pattern; cur++)
+			if (!strcmp(cur->engine_id_pattern, "*"))
+			{
+				STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last");
+				star = cur;
+
+				if (sim_socket)
+					/* not specified, assume 1 */
+					nstar = 1;
+				else
+					nstar = max_count_engines_free(cur->file, cur->engine_id_pattern);
+				break;
+			}
+
+		n = 0;
+		/* Now check the non-star loads */
+		for (cur = load; cur != star && cur->engine_id_pattern; cur++)
+		{
+			size_t size;
+
+			size = max_count_engines_free(load->file, load->engine_id_pattern);
+			STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %d on %s", (unsigned) (cur - load), load->engine_id_pattern);
+			/* One FPGA more to be used */
+			n++;
+
+			if (nstar)
+			{
+				size = max_count_engines_free(load->file, "*");
+				if (size > 1)
+					/* One of the star devices will be used to load this file */
+					nstar--;
+			}
+		}
+		n += nstar;
+	}
+
+        //LMemInterface addLMemInterface()
+        //// pour récupérer l'accès à la LMem
+	config->topology.nhwfpgafpgas = nfpgafpgas = n;
 }
 
 unsigned _starpu_fpga_get_device_count(void)
@@ -112,22 +159,47 @@ static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
 
 static void init_fpga_worker_context(unsigned workerid)
 {
-	//starpu_fpgaStreamCreate(&streams[devid][i]);
 }
 
 static void init_device_context(unsigned devid)
 {
-	unsigned i;
-	//TODO: starpu_fpga_set_device
-	starpu_fpga_set_device(devid);
-
-	//TODO: Do we need the streams? I think no
-	//cures = starpu_fpgaStreamCreate(&in_transfer_streams[devid]);
-	//cures = starpu_fpgaStreamCreate(&out_transfer_streams[devid]);
-	for (i = 0; i < nfpgafpgas; i++)
-	{
-		//starpu_fpgaStreamCreate(&in_peer_transfer_streams[i][devid]);
-		//starpu_fpgaStreamCreate(&out_peer_transfer_streams[devid][i]);
+	struct starpu_max_load *load = _starpu_config.conf.fpga_load;
+
+	/* 0 would be seen as NULL, i.e. allocation failed... */
+	// FIXME: Maxeler FPGAs want 192-byte alignment
+	current_address[devid] = (fpga_mem) (8192*192);
+	global_mem[devid] = 128ULL*1024*1024*1024;
+
+	_starpu_fpga_limit_global_mem(devid);
+
+	if (!load) {
+		/* Nothing specified, single-FPGA execution with basic static
+                 * interface, file will be auto-loaded by SLiC. */
+		return;
+	} else {
+		unsigned n;
+
+		/* Which load we shall use */
+		for (n = 0; load->file; load++)
+		{
+			if (!strcmp(load->engine_id_pattern, "*"))
+				break;
+			if (n == devid)
+				break;
+		}
+
+		STARPU_ASSERT(load->file);
+
+		if (!strcmp(load->engine_id_pattern, "*"))
+		{
+			char s[32];
+			snprintf(s, sizeof(s), "local:%u", (unsigned) devid);
+			/* FIXME: this assumes that the loads are in-order.
+			 * Ideally we'd detect which ones had an explicit load */
+			engines[n] = max_load(load->file, load->engine_id_pattern);
+		}
+		else
+			engines[n] = max_load(load->file, load->engine_id_pattern);
 	}
 }
 
@@ -141,6 +213,9 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker)
 	if (worker->memory_node != STARPU_MAIN_RAM)
 		_starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_fpga_get_global_mem_size(worker->devid));
 
+	// TODO: multiple fpga in same thread
+	init_device_context(devid);
+
 	snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid);
 	snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid);
 	starpu_pthread_setname(worker->short_name);
@@ -351,20 +426,16 @@ uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags
 {
 	(void) flags;
 	unsigned devid = starpu_memory_node_get_devid(dst_node);
-	STARPU_ASSERT(devid == 0); // For now
 
-	/* 0 would be seen as NULL, i.e. allocation failed... */
-	// FIXME: Maxeler FPGAs want 192-byte alignment
-	static fpga_mem current_address = 8192*192;
 	fpga_mem addr, next_addr;
- 	addr = current_address;
-	next_addr = current_address + size;
-	if (next_addr >= global_mem[0])
+ 	addr = current_address[devid];
+	next_addr = current_address[devid] + size;
+	if (next_addr >= (fpga_mem) global_mem[devid])
 	{
-		printf("Memory overflow\n");
+		printf("Memory overflow on %d\n", devid);
 		return 0;
 	}
-	current_address = next_addr;
+	current_address[devid] = next_addr;
 	printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
         return (uintptr_t) addr;
 }
@@ -567,19 +638,19 @@ struct _starpu_driver_ops _starpu_driver_fpga_ops =
 	.deinit = _starpu_fpga_driver_deinit
 };
 
-// TODO: structure node_ops, comme dans driver_cuda.c, avec starpu_fpga_allocate_memory, etc.
+// TODO: transfers
 struct _starpu_node_ops _starpu_driver_fpga_node_ops =
 {
 	.copy_data_to[STARPU_UNUSED] = NULL,
-	.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
-	.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
+	//.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
+	//.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_data_to[STARPU_DISK_RAM] = NULL,
 	.copy_data_to[STARPU_MIC_RAM] = NULL,
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 	.copy_interface_to[STARPU_UNUSED] = NULL,
-	.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
+	//.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
 	.copy_interface_to[STARPU_FPGA_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_interface_to[STARPU_DISK_RAM] = NULL,

+ 1 - 1
starpu-1.0.pc.in

@@ -25,7 +25,7 @@ includedir=@includedir@
 Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ @STARPU_FPGA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.1.pc.in

@@ -22,7 +22,7 @@ includedir=@includedir@
 Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ @STARPU_FPGA_CPPFLAGS@
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.2.pc.in

@@ -22,7 +22,7 @@ includedir=@includedir@
 Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ @STARPU_FPGA_CPPFLAGS@
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpu-1.3.pc.in

@@ -22,7 +22,7 @@ includedir=@includedir@
 Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ @OPENMP_CFLAGS@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ @OPENMP_CFLAGS@ @STARPU_FPGA_CPPFLAGS@
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@ @OPENMP_CFLAGS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 1 - 1
starpufft/src/Makefile.am

@@ -17,7 +17,7 @@
 include $(top_srcdir)/starpu-notests.mk
 
 AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(FFTWF_CFLAGS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS) $(STARPU_FPGA_CPPFLAGS)
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(FFTW_LIBS) $(FFTWF_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
 
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la

+ 1 - 1
starpufft/tests/Makefile.am

@@ -45,7 +45,7 @@ endif
 
 endif
 
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/starpufft/include -I$(top_srcdir)/starpufft/src
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(FXT_LIBS) $(MAGMA_LIBS)
 LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)

+ 28 - 7
tests/Makefile.am

@@ -15,8 +15,8 @@
 #
 include $(top_srcdir)/starpu.mk
 
-AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
-AM_CXXFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
+AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CXXFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_FPGA_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@
 LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) $(FXT_LIBS)
@@ -384,7 +384,10 @@ myPROGRAMS +=				\
 
 if STARPU_USE_FPGA
 myPROGRAMS +=				\
-        fpga/max_fpga
+        fpga/max_fpga_basic_static	\
+        fpga/max_fpga_advanced_static	\
+        fpga/max_fpga_dynamic		\
+        fpga/max_fpga_mux
 endif
 endif
 
@@ -1043,11 +1046,29 @@ perfmodels_regression_based_energy_SOURCES=\
 perfmodels_regression_based_gpu_SOURCES=\
 	perfmodels/regression_based_gpu.c
 
-fpga_max_fpga_SOURCES=\
-	fpga/max_fpga.c
-fpga_max_fpga_LDADD = $(LDADD) \
+fpga_max_fpga_basic_static_SOURCES=\
+	fpga/max_fpga_basic_static.c
+fpga_max_fpga_basic_static_LDADD = $(LDADD) \
 	fpga/slic_MyTasks.o
-fpga/max_fpga.o: fpga/MyTasks.max
+fpga/max_fpga_basic_static.o: fpga/MyTasks.max
+
+fpga_max_fpga_advanced_static_SOURCES=\
+	fpga/max_fpga_advanced_static.c
+fpga_max_fpga_advanced_static_LDADD = $(LDADD) \
+	fpga/slic_MyTasks.o
+fpga/max_fpga_advanced_static.o: fpga/MyTasks.max
+
+fpga_max_fpga_dynamic_SOURCES=\
+	fpga/max_fpga_dynamic.c
+fpga_max_fpga_dynamic_LDADD = $(LDADD) \
+	fpga/slic_MyTasks.o
+fpga/max_fpga_dynamic.o: fpga/MyTasks.max
+
+fpga_max_fpga_mux_SOURCES=\
+	fpga/max_fpga_mux.c
+fpga_max_fpga_mux_LDADD = $(LDADD) \
+	fpga/slic_MyTasksMux.o
+fpga/max_fpga_mux.o: fpga/MyTasksMux.max
 
 if STARPU_USE_OPENCL
 perfmodels_regression_based_memset_SOURCES+=\

+ 14 - 8
tests/fpga/max_fpga_advanced_static.c

@@ -20,12 +20,9 @@
 #include "../helper.h"
 
 #include "MyTasks.h"
-#include "MaxSLiCInterface.h"
+#include <MaxSLiCInterface.h>
 #define SIZE (192/sizeof(int32_t))
 
-static max_file_t *maxfile;
-static max_engine_t *engine;
-
 void fpga_impl(void *buffers[], void *cl_arg)
 {
 	(void)cl_arg;
@@ -46,6 +43,8 @@ void fpga_impl(void *buffers[], void *cl_arg)
 	size_t ptrAT3 = ptrCT2;
 	size_t ptrBT3 = ptrCT2;
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("Loading DFE memory.\n");
 
 	/* C = A+B */
@@ -100,6 +99,8 @@ void fpga_impl1(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
 	MyTasks_interfaceT1_actions_t act = {
@@ -131,6 +132,8 @@ void fpga_impl2(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
 	/* C = A*B */
 	MyTasks_interfaceT2_actions_t act = {
@@ -161,6 +164,8 @@ void fpga_impl3(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
 	MyTasks_interfaceT3_actions_t act = {
@@ -192,12 +197,15 @@ int main(int argc, char **argv)
 	int ret;
 	int size=1234;
 
-	maxfile = MyTasks_init();
-	engine = max_load(maxfile, "*");
+	struct starpu_max_load load[2];
+	load[0].file = MyTasks_init();
+	load[0].engine_id_pattern = "*";
+	load[1].file = NULL;
 
 	starpu_conf_init(&conf);
 	conf.sched_policy_name = "eager";
 	conf.calibrate = 0;
+	conf.fpga_load = load;
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
@@ -262,7 +270,5 @@ int main(int argc, char **argv)
 	if (ret == EXIT_SUCCESS)
 		printf("OK!\n");
 
-	max_unload(engine);
-
 	return ret;
 }

+ 1 - 1
tests/fpga/max_fpga_basic_static.c

@@ -20,7 +20,7 @@
 #include "../helper.h"
 
 #include "MyTasks.h"
-#include "MaxSLiCInterface.h"
+#include <MaxSLiCInterface.h>
 #define SIZE (192/sizeof(int32_t))
 
 void fpga_impl(void *buffers[], void *cl_arg)

+ 13 - 5
tests/fpga/max_fpga_dynamic.c

@@ -20,11 +20,10 @@
 #include "../helper.h"
 
 #include "MyTasks.h"
-#include "MaxSLiCInterface.h"
+#include <MaxSLiCInterface.h>
 #define SIZE (192/sizeof(int32_t))
 
 static max_file_t *maxfile;
-static max_engine_t *engine;
 
 void fpga_impl1(void *buffers[], void *cl_arg)
 {
@@ -36,6 +35,8 @@ void fpga_impl1(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
 
@@ -81,6 +82,8 @@ void fpga_impl2(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
 	/* C = A*B */
 
@@ -125,6 +128,8 @@ void fpga_impl3(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
 
@@ -171,11 +176,16 @@ int main(int argc, char **argv)
 	int size=1234;
 
 	maxfile = MyTasks_init();
-	engine = max_load(maxfile, "*");
+
+	struct starpu_max_load load[2];
+	load[0].file = maxfile;
+	load[0].engine_id_pattern = "*";
+	load[1].file = NULL;
 
 	starpu_conf_init(&conf);
 	conf.sched_policy_name = "eager";
 	conf.calibrate = 0;
+	conf.fpga_load = load;
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
@@ -235,7 +245,5 @@ int main(int argc, char **argv)
 	if (ret == EXIT_SUCCESS)
 		printf("OK!\n");
 
-	max_unload(engine);
-
 	return ret;
 }

+ 13 - 5
tests/fpga/max_fpga_mux.c

@@ -20,11 +20,10 @@
 #include "../helper.h"
 
 #include "MyTasksMux.h"
-#include "MaxSLiCInterface.h"
+#include <MaxSLiCInterface.h>
 #define SIZE (192/sizeof(int32_t))
 
 static max_file_t *maxfile;
-static max_engine_t *engine;
 
 /*
  * Dynamically configure multiplexer and streaming from CPU or from LMem (ignoring the other)
@@ -87,6 +86,8 @@ void fpga_impl1(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T1 with %p %p %p\n", ptrAT1, ptrBT1, ptrCT1);
 	/* C = A+B */
 
@@ -135,6 +136,8 @@ void fpga_impl2(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T2 with %p %p %p\n", ptrAT2, ptrBT2, ptrCT2);
 	/* C = A*B */
 
@@ -182,6 +185,8 @@ void fpga_impl3(void *buffers[], void *cl_arg)
 
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	max_engine_t *engine = starpu_fpga_get_local_engine();;
+
 	printf("T3 with %p %p %p\n", ptrAT3, ptrBT3, ptrCT3);
 	/* C = A+B */
 
@@ -228,11 +233,16 @@ int main(int argc, char **argv)
 	int size=1234;
 
 	maxfile = MyTasksMux_init();
-	engine = max_load(maxfile, "*");
+
+	struct starpu_max_load load[2];
+	load[0].file = maxfile;
+	load[0].engine_id_pattern = "*";
+	load[1].file = NULL;
 
 	starpu_conf_init(&conf);
 	conf.sched_policy_name = "eager";
 	conf.calibrate = 0;
+	conf.fpga_load = load;
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
@@ -292,7 +302,5 @@ int main(int argc, char **argv)
 	if (ret == EXIT_SUCCESS)
 		printf("OK!\n");
 
-	max_unload(engine);
-
 	return ret;
 }