Nathalie Furmento 5 anni fa
parent
commit
16ce791ff6

+ 0 - 48
doc/doxygen/440_fpga_support.doxy

@@ -1,48 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019                          CNRS
- * Copyright (C) 2019                          Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \page FPGASupport FPGA Support
-
-\section FPGA FPGA 
-
-The use of specialized hardware such as accelerators or coprocessors offers an
-interesting approach to overcome the physical limits encountered by processor
-architects. As a result, many machines are now equipped with one or several
-accelerators (e.g. a GPU), in addition to the usual processor(s). While a lot of
-efforts have been devoted to offload computation onto such accelerators, very
-little attention as been paid to portability concerns on the one hand, and to the
-possibility of having heterogeneous accelerators and processors to interact on the other hand.
-
- In addition, StarPU comes with programming language support, in the form of an OpenCL front-end (\ref FPGAextensions).
-
-\section PortingApplicationsToFPGA Porting Applications To FPGA
-
-The way to port an application to FPGA is to set the field
-starpu_codelet::fpga_funcs, to provide StarPU with the function
-for FPGA implementation, so for instance:
-
-\verbatim
-struct starpu_codelet cl =
-{
-    .fpga_funcs = {myfunc},
-    .nbuffers = 1,
-}
-\endverbatim
-
-
-
-*/

+ 1 - 2
doc/doxygen/chapters/440_fpga_support.doxy

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2019                          CNRS
- * Copyright (C) 2019                          Inria
+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/api/fpga_extensions.doxy

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2019                                     CNRS
- * Copyright (C) 2019                                     Inria
+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 3
examples/basic_examples/mult-fpga.c

@@ -1,8 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013, 2015  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2010       Mehdi Juhoor <mjuhoor@gmail.com>
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 0 - 2
include/starpu_data_interfaces.h

@@ -325,11 +325,9 @@ struct starpu_data_copy_methods
 	   core.
 	*/
 
-
 	int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
-
 	int (*mic_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
 
 	/**

+ 1 - 1
include/starpu_driver.h

@@ -54,7 +54,7 @@ struct starpu_driver
 		unsigned cpu_id;
 		unsigned cuda_id;
 #if defined(STARPU_USE_FPGA)
-	  unsigned fpga_id;
+		unsigned fpga_id;
 #endif
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 		cl_device_id opencl_id;

+ 1 - 2
include/starpu_fpga.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2019  CNRS
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 3 - 10
include/starpu_task.h

@@ -66,8 +66,7 @@ extern "C"
    starpu_task::where) to specify the codelet (or the task) may be
    executed on a MAX FPGA.
 */
-
-#define STARPU_FPGA	((1ULL)<<9)
+#define STARPU_FPGA	((1ULL)<<4)
 
 /**
    To be used when setting the field starpu_codelet::where (or
@@ -178,7 +177,6 @@ typedef void (*starpu_cuda_func_t)(void **, void*);
 /**
    FPGA implementation of a codelet.
 */
-
 typedef void (*starpu_fpga_func_t)(void **, void*);
 
 /**
@@ -231,7 +229,6 @@ typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
    this macro is deprecated. One should always only define the field
    starpu_codelet::fpga_funcs.
 */
-
 #define STARPU_MULTIPLE_FPGA_IMPLEMENTATIONS   ((starpu_fpga_func_t) -1)
 
 /**
@@ -352,7 +349,6 @@ struct starpu_codelet
 	   Optional field which has been made deprecated. One should
 	   use instead the starpu_codelet::fpga_funcs field.
 	*/
-
 	starpu_fpga_func_t fpga_func STARPU_DEPRECATED;
 
         /**
@@ -360,7 +356,6 @@ struct starpu_codelet
 	   Optional field which has been made deprecated. One should
 	   use instead the starpu_codelet::opencl_funcs field.
 	*/
-
 	starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
 
 	/**
@@ -410,7 +405,6 @@ struct starpu_codelet
            ::STARPU_FPGA does not appear in the field
            starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
         */
-
 	starpu_fpga_func_t fpga_funcs[STARPU_MAXIMPLEMENTATIONS];
 
 	/**
@@ -482,10 +476,9 @@ struct starpu_codelet
 	*/
 	const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
 
-	/** 
-	 fpga kernel type
+	/**
+	   fpga kernel type
         */
-
 	char *fpga_kernel_type[STARPU_MAXIMPLEMENTATIONS];
 
 	/**

+ 7 - 10
src/core/topology.c

@@ -539,8 +539,7 @@ static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *con
 #endif
 
 #if defined(STARPU_USE_FPGA)
-static void
-_starpu_initialize_workers_fpga_fpgaid (struct _starpu_machine_config *config)
+static void _starpu_initialize_workers_fpga_fpgaid (struct _starpu_machine_config *config)
 {
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct starpu_conf *uconf = &config->conf;
@@ -556,11 +555,9 @@ _starpu_initialize_workers_fpga_fpgaid (struct _starpu_machine_config *config)
 		STARPU_FPGA_WORKER);
 }
 
-static inline int
-_starpu_get_next_fpga_fpgaid (struct _starpu_machine_config *config)
+static inline int _starpu_get_next_fpga_fpgaid (struct _starpu_machine_config *config)
 {
-	unsigned i =
-		((config->current_fpga_fpgaid++) % config->topology.nfpgafpgas);
+	unsigned i = ((config->current_fpga_fpgaid++) % config->topology.nfpgafpgas);
 
 	return (int)config->topology.workers_fpga_fpgaid[i];
 }
@@ -1180,7 +1177,7 @@ unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
 		_starpu_init_cuda();
 #endif
 
-#if defined(STARPU_USE_FPGA) 
+#if defined(STARPU_USE_FPGA)
 	_starpu_init_fpga();
 #endif
 	_starpu_init_topology(config);
@@ -1791,7 +1788,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 #endif
 
 
-#if defined(STARPU_USE_FPGA) 
+#if defined(STARPU_USE_FPGA)
 	int nfpga = config->conf.nfpga;
 	if (nfpga != 0)
 	{
@@ -2807,7 +2804,7 @@ static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_confi
 					fpga_init[devid] = 1;
 					workerarg->bindid = fpga_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
 
-					memory_node = fpga_memory_nodes[devid] = _starpu_memory_node_register(STARPU_FPGA_RAM, devid, &_starpu_driver_fpga_node_ops); 
+					memory_node = fpga_memory_nodes[devid] = _starpu_memory_node_register(STARPU_FPGA_RAM, devid, &_starpu_driver_fpga_node_ops);
 					_starpu_register_bus(STARPU_MAIN_RAM, memory_node);
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 
@@ -3099,7 +3096,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 				else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
 					config->cpus_nodeid = -2;
 				break;
-                        
+
 			case STARPU_CUDA_WORKER:
 				if (config->cuda_nodeid == -1)
 					config->cuda_nodeid = starpu_worker_get_memory_node(i);

+ 2 - 2
src/datawizard/coherency.c

@@ -38,7 +38,7 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 {
         //fpga_msg("The new troublesome point is here");
-	
+
         int src_node = -1;
 	unsigned i;
 
@@ -152,7 +152,7 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 				i_ram = i;
 			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
 				i_disk = i;
-                        if (starpu_node_get_kind(i) == STARPU_FPGA_RAM)			
+                        if (starpu_node_get_kind(i) == STARPU_FPGA_RAM)
 				i_fpga = i;
 		}
 	}

BIN
src/drivers/max/.old.tar.gz


+ 61 - 52
src/drivers/max/driver_fpga.c

@@ -1,9 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2015, 2020  Université de Bordeaux
- * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
- * Copyright (C) 2011  Télécom-SudParis
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2010       Mehdi Juhoor <mjuhoor@gmail.com>
+ * Copyright (C) 2011       Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -47,14 +46,15 @@
 
 //#define STARPU_MAXFPGADEVS 4
 /* the number of FPGA devices */
-static unsigned  nfpgafpgas = -1; 
+static unsigned  nfpgafpgas = -1;
 static fpgaDeviceProp props[STARPU_MAXFPGADEVS];
 static size_t global_mem[STARPU_MAXFPGADEVS] = { 128ULL*1024*1024*1024 };
 
 static void _starpu_fpga_limit_global_mem(unsigned );
 static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
 
-void fpga_msg(char *msg){
+void fpga_msg(char *msg)
+{
 	printf(FPGA_OK "%s\n" NORMAL, msg);
 }
 
@@ -66,43 +66,49 @@ void _starpu_init_fpga()
 	STARPU_ASSERT( nfpgafpgas <= STARPU_MAXFPGADEVS);
 
         //LMemInterface addLMemInterface()
-        //// pour récupérer l'accès à la LMem	
+        //// pour récupérer l'accès à la LMem
 }
 
 #if 0
-int fpga_allocate_memory(fpga_mem *ptr, size_t size){
+int fpga_allocate_memory(fpga_mem *ptr, size_t size)
+{
 //This allocates BYTES
 	char *msg1="You asked to allocate ";
 //	printf(KCYN "%s%d*%d\n" KBLU, msg1,size,sizeof(unsigned));
 	printf(FPGA_OK "%s%lu bytes\n" NORMAL, msg1,size);
 
 	*ptr =(fpga_mem) malloc(size);
-  
+
         if (*ptr == NULL)
         	return 0;
-       		else
+	else
 		return 1;
-       			  }
+}
 #endif
 
-int fpgaGetDeviceProperties(fpgaDeviceProp *props,unsigned devid){
+int fpgaGetDeviceProperties(fpgaDeviceProp *props,unsigned devid)
+{
 //TODO
         props->totalGlobalMem=1*1024*1024;
         props->concurrentKernels=4;
         props->name="Fpga_Props_Name";
         return 0;
 }
-void _starpu_fpga_discover_devices (struct _starpu_machine_config *config){
+
+void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
+{
 	//TODO: This is statically assigned, in the next round of integration
 	// I will have to read from the struct fpga in fpga
 	config->topology.nhwfpgafpgas = nfpgafpgas;
 }
 
-unsigned _starpu_fpga_get_device_count(void){
+unsigned _starpu_fpga_get_device_count(void)
+{
 	return nfpgafpgas;
 }
 
-static void	_starpu_fpga_limit_global_mem(unsigned devid){
+static void _starpu_fpga_limit_global_mem(unsigned devid)
+{
 	starpu_ssize_t limit=-1;
 
 //TODO
@@ -111,11 +117,13 @@ static void	_starpu_fpga_limit_global_mem(unsigned devid){
 		global_mem[devid] = limit*1024*1024;
 }
 
-static size_t _starpu_fpga_get_global_mem_size(unsigned devid){
+static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
+{
 	return global_mem[devid];
 }
 
-static void init_fpga_worker_context(unsigned workerid){
+static void init_fpga_worker_context(unsigned workerid)
+{
 //		starpu_fpgaStreamCreate(&streams[devid][i]);
 }
 
@@ -125,7 +133,7 @@ static void init_device_context(unsigned devid)
 	unsigned i;
 //TODO: fpgaSetDevice
 	fpgaSetDevice(devid);
-	
+
 //TODO: fpgaGetDeviceProperties
 	fpgaGetDeviceProperties(&props[devid], devid);
 //TODO: Do we need the streams? I think no
@@ -138,8 +146,8 @@ static void init_device_context(unsigned devid)
 	}
 }
 
-
-int _starpu_fpga_driver_init(struct _starpu_worker *worker){
+int _starpu_fpga_driver_init(struct _starpu_worker *worker)
+{
 	int devid = worker->devid;
 //fpga_msg("successful till here");
 	_starpu_driver_start(worker, _STARPU_FUT_CPU_KEY, 1);
@@ -163,7 +171,8 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker){
 	return 0;
 }
 
-static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch){
+static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch)
+{
 	int ret;
 	int profiling = starpu_profiling_status_get();
 
@@ -220,9 +229,10 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	_starpu_datawizard_progress(1);
-	if (memnode != STARPU_MAIN_RAM){
+	if (memnode != STARPU_MAIN_RAM)
+	{
 		_starpu_datawizard_progress(1);
-		}
+	}
 	_STARPU_TRACE_END_PROGRESS(memnode);
 
 	struct _starpu_job *j;
@@ -248,7 +258,7 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 	int is_parallel_task = (j->task_size > 1);
 
 	struct starpu_perfmodel_arch* perf_arch;
-	
+
 	if (is_parallel_task)
 	{
 		STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
@@ -259,7 +269,7 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 		{
 			struct _starpu_combined_worker *combined_worker;
 			combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid);
-			
+
 			fpga_worker->combined_workerid = j->combined_workerid;
 			fpga_worker->worker_size = combined_worker->worker_size;
 			fpga_worker->current_rank = rank;
@@ -315,7 +325,8 @@ int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
 	return 0;
 }
 
-int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker){
+int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
+{
 	_STARPU_TRACE_WORKER_DEINIT_START;
 
 	unsigned memnode = fpga_worker->memory_node;
@@ -332,7 +343,8 @@ int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker){
 	return 0;
 }
 
-void *_starpu_fpga_worker(void *_arg){
+void *_starpu_fpga_worker(void *_arg)
+{
 	struct _starpu_worker* worker = _arg;
          unsigned memnode = worker->memory_node;
 
@@ -341,7 +353,7 @@ void *_starpu_fpga_worker(void *_arg){
 	while (_starpu_machine_is_running())
 	{
 		_starpu_may_pause();
-//fpga_msg("\tEntered the main loop\n");
+		//fpga_msg("\tEntered the main loop\n");
 		_starpu_fpga_driver_run_once(worker);
 	}
 	_STARPU_TRACE_END_PROGRESS(memnode);
@@ -350,7 +362,7 @@ void *_starpu_fpga_worker(void *_arg){
 	return NULL;
 }
 
-uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags) 
+uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags)
 {
 	(void) flags;
 	unsigned devid = starpu_memory_node_get_devid(dst_node);
@@ -363,19 +375,17 @@ uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags
 // TODO: vérifier si current_address + size > taille de la LMEm
  	addr = current_address;
 	current_address += size;
-printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
-//success = 0
+	printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
+	//success = 0
         return (uintptr_t) addr;
 }
 
-
-
 int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size)
 {
-printf("ram to fpga, fpga @= %p\n",dst);
+	printf("ram to fpga, fpga @= %p\n",dst);
 	memcpy(dst,src,size);
 	return 0;
-  // LMemLoopback_writeLMem(dst, size, src);
+	// LMemLoopback_writeLMem(dst, size, src);
 }
 
 /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
@@ -383,52 +393,51 @@ printf("ram to fpga, fpga @= %p\n",dst);
  *   */
 void copy_ram_to_fpga(void *src, void *dst, size_t size)
 {
-printf("ram to fpga, fpga @= %p\n",dst);
-
-      // LMemLoopback_writeLMem(size, dst, src);
-   
+	printf("ram to fpga, fpga @= %p\n",dst);
+	// LMemLoopback_writeLMem(size, dst, src);
 }
 
 void copy_fpga_to_ram(void *src, void *dst, size_t size)
 {
-printf("ram to fpga, fpga @= %p\n",src);
+	printf("ram to fpga, fpga @= %p\n",src);
        //LMemLoopback_readLMem(size, src, dst);
 
 }
+
 /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  * node to the address pointed by DST in the DST_NODE memory node
  */
 int _starpu_fpga_copy_fpga_to_ram(void *src, void *dst, size_t size)
 {
-printf("fpga to ram, fpga @= %p\n",src);
+	printf("fpga to ram, fpga @= %p\n",src);
 	memcpy(dst,src,size);
 	return 0;
- //LMemLoopback_readLMem(src, size, dst);
+	//LMemLoopback_readLMem(src, size, dst);
 }
+
 /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  * node to the address pointed by DST in the DST_NODE memory node
  */
 int _starpu_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size)
 {
-printf("fpga to ram, fpga @= %p\n",src);
+	printf("fpga to ram, fpga @= %p\n",src);
 	memcpy(dst,src,size);
 	return 0;
- //LMemLoopback_XXXLMem(src, size, dst);
+	//LMemLoopback_XXXLMem(src, size, dst);
 }
 
 /* Asynchronous transfers */
 int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size)
 {
-printf("ram to fpga, fpga @= %p\n",dst);
+	printf("ram to fpga, fpga @= %p\n",dst);
 	memcpy(dst,src,size);
 	return 0;
-
- // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem();
+	// Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem();
 }
 
 int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size)
 {
-printf("fpga to ram, fpga @= %p\n",src);
+	printf("fpga to ram, fpga @= %p\n",src);
 	memcpy(dst,src,size);
 	return 0;
 }
@@ -441,7 +450,8 @@ int _starpu_run_fpga(struct _starpu_worker *workerarg)
 	return 0;
 }
 
-void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl){
+void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl)
+{
 	struct starpu_task *task = j->task;
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 
@@ -449,7 +459,7 @@ void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl
 	for (index = 0; index < nbuffers; index++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
-//		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
+		//		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
 		unsigned *interface_id = (unsigned *)malloc(sizeof(unsigned));
 		*interface_id = handle->ops->interfaceid;
 
@@ -560,7 +570,6 @@ int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, vo
 	return 0;
 }
 
-
 struct _starpu_driver_ops _starpu_driver_fpga_ops =
 {
 	.init = _starpu_fpga_driver_init,
@@ -587,7 +596,7 @@ struct _starpu_node_ops _starpu_driver_fpga_node_ops =
 	.copy_interface_to[STARPU_DISK_RAM] = NULL,
 	.copy_interface_to[STARPU_MIC_RAM] = NULL,
 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
-        
+
         .wait_request_completion = NULL,
 	.test_request_completion = NULL,
 	.is_direct_access_supported = NULL,

+ 6 - 6
src/drivers/max/driver_fpga.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2012-2014, 2020  Université de Bordeaux
- * Copyright (C) 2010, 2012  CNRS
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,9 +17,9 @@
 #ifndef __DRIVER_FPGA_H__
 #define __DRIVER_FPGA_H__
 //#ifdef NOT_DEFINED
-	#ifdef STARPU_USE_FPGA
-	#include <starpu_fpga.h>
-	#endif
+#ifdef STARPU_USE_FPGA
+#include <starpu_fpga.h>
+#endif
 //#endif
 #include <starpu.h>
 #include <common/config.h>
@@ -37,7 +36,6 @@ typedef unsigned * fpga_mem;
 extern struct _starpu_driver_ops _starpu_driver_fpga_ops;
 extern struct _starpu_node_ops _starpu_driver_fpga_node_ops;
 
-
 int fpgaSetDevice(unsigned devid);
 void _starpu_init_fpga(void);
 void _starpu_fpga_discover_devices (struct _starpu_machine_config *config);
@@ -61,8 +59,10 @@ int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size);
 
 int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
 int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel);
+
 #else
 #define _starpu_fpga_discover_devices(config) ((void) (config))
 #endif
+
 #endif //  __DRIVER_FPGA_H__
 

+ 6 - 3
tests/perfmodels/LMemLoopbackCpuCode.c

@@ -16,8 +16,10 @@
 int check(int size, int32_t *outData, int32_t *inA, int32_t *inB)
 {
 	int status = 0;
-	for (int i = 0; i < size; i++) {
-		if (outData[i] != inA[i] + inB[i]) {
+	for (int i = 0; i < size; i++)
+	{
+		if (outData[i] != inA[i] + inB[i])
+		{
 			fprintf(stderr, "[%d] Verification error, out: %u != expected: %u\n",
 				i, outData[i], inA[i] + inB[i]);
 			status = 1;
@@ -33,7 +35,8 @@ int main()
 	int32_t *inA = (int32_t*) malloc(sizeBytes);
 	int32_t *inB = (int32_t*) malloc(sizeBytes);
 
-	for (int i = 0; i < size; i++) {
+	for (int i = 0; i < size; i++)
+	{
 		inA[i] = i;
 		inB[i] = size - i;
 	}

+ 231 - 242
tests/perfmodels/max_fpga.c

@@ -1,3 +1,18 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
 #include <starpu.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -10,306 +25,280 @@
 
 
 void fpga_impl(void *buffers[], void *cl_arg)
-{   
-    (void)cl_arg;
-    
-    int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]);
-    int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
-    int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
-
-int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
-
-
-    int size = STARPU_VECTOR_GET_NX(buffers[0]);
-    
-    int sizeBytes=SIZE *sizeof(int32_t);
-    size_t LMemsize= SIZE *sizeof(int32_t);
-    
-    size_t poubelle = 0xc0000;
-
-    size_t ptrCT1 = 0x00000000000000c0;
-
-    size_t ptrAT2 = ptrCT1;
-    size_t ptrBT2 = ptrCT1;
-    size_t ptrCT2 = 0x0000000000000180;
-
-    size_t ptrAT3 = ptrCT2;
-    size_t ptrBT3 = ptrCT2;
-
-    printf("Loading DFE memory.\n");
-
-/* C = A+B */
-    StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    ptrCT1, LMemsize,
-    poubelle, LMemsize);
-printf("T1 finished\n");
-
-/* C = A*B */
-    StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
-    ptrAT2, LMemsize,
-    poubelle, LMemsize,
-    ptrBT2, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    ptrCT2, LMemsize);
-printf("T2 finished\n");
-
-/* C = A+B */
-    StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
-    poubelle, LMemsize,
-    ptrAT3, LMemsize,
-    poubelle, LMemsize,
-    ptrBT3, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize);
-printf("T3 finished\n");
-
-    printf("Running DFE.\n");
-              
-  }
+{
+	(void)cl_arg;
+
+	int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]);
+	int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
+	int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
+
+	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int size = STARPU_VECTOR_GET_NX(buffers[0]);
+
+	int sizeBytes=SIZE *sizeof(int32_t);
+	size_t LMemsize= SIZE *sizeof(int32_t);
+
+	size_t poubelle = 0xc0000;
+
+	size_t ptrCT1 = 0x00000000000000c0;
+
+	size_t ptrAT2 = ptrCT1;
+	size_t ptrBT2 = ptrCT1;
+	size_t ptrCT2 = 0x0000000000000180;
+
+	size_t ptrAT3 = ptrCT2;
+	size_t ptrBT3 = ptrCT2;
+
+	printf("Loading DFE memory.\n");
+
+	/* C = A+B */
+	StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  ptrCT1, LMemsize,
+		  poubelle, LMemsize);
+	printf("T1 finished\n");
+
+	/* C = A*B */
+	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
+		  ptrAT2, LMemsize,
+		  poubelle, LMemsize,
+		  ptrBT2, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  ptrCT2, LMemsize);
+	printf("T2 finished\n");
+
+	/* C = A+B */
+	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
+		  poubelle, LMemsize,
+		  ptrAT3, LMemsize,
+		  poubelle, LMemsize,
+		  ptrBT3, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize);
+	printf("T3 finished\n");
+
+	printf("Running DFE.\n");
 
+}
 
 static struct starpu_codelet cl =
 {
-    .fpga_funcs = {fpga_impl},
-  
-    .nbuffers = 3,
-    .modes = {STARPU_R, STARPU_R, STARPU_W}
+ 	.fpga_funcs = {fpga_impl},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_R, STARPU_W}
 };
 
-
-
 void fpga_impl1(void *buffers[], void *cl_arg)
-{   
-    (void)cl_arg;
-    
-    int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]);
-    int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
-    size_t   ptrC = (size_t)   STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
+{
+	(void)cl_arg;
+
+	int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]);
+	int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
+	size_t   ptrC = (size_t)   STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
 
-int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	int sizeBytes=SIZE *sizeof(int32_t);
+	size_t LMemsize= SIZE *sizeof(int32_t);
 
-    int size = STARPU_VECTOR_GET_NX(buffers[0]);
-    
-    int sizeBytes=SIZE *sizeof(int32_t);
-    size_t LMemsize= SIZE *sizeof(int32_t);
-    
-    size_t poubelle = 0xc0000;
+	size_t poubelle = 0xc0000;
 
 #if 0
-printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
-//XXX
-    ptrC = 0x00000000000000c0;
+	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
+	//XXX
+	ptrC = 0x00000000000000c0;
 #endif
 
-printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
-/* C = A+B */
-    StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    ptrC, LMemsize,
-    poubelle, LMemsize);
-printf("T1 finished\n");
-
-  }
-
+	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
+	/* C = A+B */
+	StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  ptrC, LMemsize,
+		  poubelle, LMemsize);
+	printf("T1 finished\n");
+}
 
 static struct starpu_codelet cl1 =
 {
-    .fpga_funcs = {fpga_impl1},
-  
-    .nbuffers = 3,
-    .modes = {STARPU_R, STARPU_R, STARPU_W},
-    .specific_nodes = 1,
-    .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL},
+ 	.fpga_funcs = {fpga_impl1},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_R, STARPU_W},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL},
 };
 
 void fpga_impl2(void *buffers[], void *cl_arg)
-{   
-    (void)cl_arg;
-    
-    size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */
-    size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
-    size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
+{
+	(void)cl_arg;
 
-int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */
+	size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
+	size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
 
+	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
-    int size = STARPU_VECTOR_GET_NX(buffers[0]);
-    
-    int sizeBytes=SIZE *sizeof(int32_t);
-    size_t LMemsize= SIZE *sizeof(int32_t);
-    
-    size_t poubelle = 0xc0000;
+	int sizeBytes=SIZE *sizeof(int32_t);
+	size_t LMemsize= SIZE *sizeof(int32_t);
+
+	size_t poubelle = 0xc0000;
 
 #if 0
-printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
-//XXX
-    ptrA = 0x00000000000000c0;
-    ptrB = 0x00000000000000c0;
-    ptrC = 0x0000000000000180;
+	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
+	//XXX
+	ptrA = 0x00000000000000c0;
+	ptrB = 0x00000000000000c0;
+	ptrC = 0x0000000000000180;
 #endif
 
-printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
-/* C = A*B */
-    StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
-    ptrA, LMemsize,
-    poubelle, LMemsize,
-    ptrB, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize,
-    ptrC, LMemsize);
-printf("T2 finished\n");
-              
-  }
+	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
+	/* C = A*B */
+	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
+		  ptrA, LMemsize,
+		  poubelle, LMemsize,
+		  ptrB, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize,
+		  ptrC, LMemsize);
+	printf("T2 finished\n");
+}
 
 static struct starpu_codelet cl2 =
 {
-    .fpga_funcs = {fpga_impl2},
-  
-    .nbuffers = 3,
-    .modes = {STARPU_R, STARPU_R, STARPU_W}
-    /* local by default */
+ 	.fpga_funcs = {fpga_impl2},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_R, STARPU_W}
+	/* local by default */
 };
 
 void fpga_impl3(void *buffers[], void *cl_arg)
-{   
-    (void)cl_arg;
-    
-    size_t   ptrA = (size_t)   STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */
-    size_t   ptrB = (size_t)   STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
-    int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
+{
+	(void)cl_arg;
+
+	size_t   ptrA = (size_t)   STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */
+	size_t   ptrB = (size_t)   STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
+	int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
 
-int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
+	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
+	int sizeBytes=SIZE *sizeof(int32_t);
+	size_t LMemsize= SIZE *sizeof(int32_t);
 
-    int size = STARPU_VECTOR_GET_NX(buffers[0]);
-    
-    int sizeBytes=SIZE *sizeof(int32_t);
-    size_t LMemsize= SIZE *sizeof(int32_t);
-    
-    size_t poubelle = 0xc0000;
+	size_t poubelle = 0xc0000;
 
 #if 0
-printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
-//XXX
-    ptrA = 0x0000000000000180;
-    ptrB = 0x0000000000000180;
+	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
+	//XXX
+	ptrA = 0x0000000000000180;
+	ptrB = 0x0000000000000180;
 #endif
 
-printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
-/* C = A+B */
-    StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
-    poubelle, LMemsize,
-    ptrA, LMemsize,
-    poubelle, LMemsize,
-    ptrB, LMemsize,
-    poubelle, LMemsize,
-    poubelle, LMemsize);
-printf("T3 finished\n");
-  }
+	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
+	/* C = A+B */
+	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
+		  poubelle, LMemsize,
+		  ptrA, LMemsize,
+		  poubelle, LMemsize,
+		  ptrB, LMemsize,
+		  poubelle, LMemsize,
+		  poubelle, LMemsize);
+	printf("T3 finished\n");
+}
 
 static struct starpu_codelet cl3 =
 {
-    .fpga_funcs = {fpga_impl3},
-  
-    .nbuffers = 3,
-    .modes = {STARPU_R, STARPU_R, STARPU_W},
-    .specific_nodes = 1,
-    .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU},
+ 	.fpga_funcs = {fpga_impl3},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_R, STARPU_W},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU},
 };
 
-
-
 int main(int argc, char **argv)
 {
+	/* Enable profiling */
+	starpu_profiling_status_set(1);
+
+	struct starpu_conf conf;
+	starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c;
+	int ret;
+	int size=1234;
+
+	starpu_conf_init(&conf);
+	conf.sched_policy_name = "eager";
+	conf.calibrate = 0;
+
+	ret = starpu_initialize(&conf, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	int32_t a[SIZE];
+	int32_t b[SIZE];
+	int32_t c[SIZE];
+
+	int i;
+	for(i = 0; i < SIZE; ++i)
+	{
+		a[i] = random() % 100;
+		b[i] = random() % 100;
+	}
 
-    /* Enable profiling */
-    starpu_profiling_status_set(1);
-
-    struct starpu_conf conf;
-    starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c;
-    int ret;
-    int size=1234;
-
-    starpu_conf_init(&conf);
-
-    conf.sched_policy_name = "eager";
-    conf.calibrate = 0;
-
-    ret = starpu_initialize(&conf, &argc, &argv);
-    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
-    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-    
-    int32_t a[SIZE];
-    int32_t b[SIZE];
-    int32_t c[SIZE];
-
-    int i;
-    for(i = 0; i < SIZE; ++i)
-    {
-        a[i] = random() % 100;
-        b[i] = random() % 100;
-    }
-
-    starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0]));
-    starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0]));
+	starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0]));
+	starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0]));
 
-    starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0]));
-    starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0]));
+	starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0]));
+	starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0]));
 
-    starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0]));
+	starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0]));
 
 #if 0
-    ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0);
-    fprintf(stderr,"task submitted %d\n", ret);
+	ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0);
+	fprintf(stderr,"task submitted %d\n", ret);
 #else
-    ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0);
-    fprintf(stderr,"task submitted %d\n", ret);
-    ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0);
-    fprintf(stderr,"task submitted %d\n", ret);
-    ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0);
-    fprintf(stderr,"task submitted %d\n", ret);
+	ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0);
+	fprintf(stderr,"task submitted %d\n", ret);
+	ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0);
+	fprintf(stderr,"task submitted %d\n", ret);
+	ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0);
+	fprintf(stderr,"task submitted %d\n", ret);
 #endif
-    
-    starpu_data_unregister(handle_a);
-    starpu_data_unregister(handle_b);
-    starpu_data_unregister(handle_c);
-    
-    ret = EXIT_SUCCESS;
-
-    for (i = 0; i < SIZE; ++i) 
-    {
-	int ct1 = a[i] + b[i];
-	int ct2 = ct1 * ct1;
-	int ct3 = ct2 + ct2;
-
-	if (c[i] != ct3)
-	    ret = EXIT_FAILURE;
-
-	if (i < 10) {
-	    printf("%d == %d\n", c[i], ct3);
-	    if (c[i] != ct3)
-		printf("OOOPS\n");
-	}
-    }
 
+	starpu_data_unregister(handle_a);
+	starpu_data_unregister(handle_b);
+	starpu_data_unregister(handle_c);
 
-    starpu_shutdown();
+	ret = EXIT_SUCCESS;
 
-    if (ret == EXIT_SUCCESS)
-	printf("OK!\n");
+	for (i = 0; i < SIZE; ++i)
+	{
+		int ct1 = a[i] + b[i];
+		int ct2 = ct1 * ct1;
+		int ct3 = ct2 + ct2;
 
-    return ret;
+		if (c[i] != ct3)
+			ret = EXIT_FAILURE;
 
-}
+		if (i < 10)
+		{
+			printf("%d == %d\n", c[i], ct3);
+			if (c[i] != ct3)
+				printf("OOOPS\n");
+		}
+	}
+
+	starpu_shutdown();
 
+	if (ret == EXIT_SUCCESS)
+		printf("OK!\n");
 
+	return ret;
+}

+ 1 - 3
tests/perfmodels/opencl_memset_01.c

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012                                     Inria
- * Copyright (C) 2012,2015-2017                           CNRS
- * Copyright (C) 2014,2016                                Université de Bordeaux
+ * Copyright (C) 2012-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
tests/perfmodels/opencl_memset_kernel_01.cl

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012                                     Inria
- * Copyright (C) 2012,2015,2017                           CNRS
+ * Copyright (C) 2012-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
tests/perfmodels/regression_based_01.c

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012,2016                                Inria
- * Copyright (C) 2010-2015,2017                           Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
tests/perfmodels/regression_based_02.c

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011,2012,2014                           Inria
- * Copyright (C) 2011-2016,2019                           Université de Bordeaux
- * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2011                                     Télécom-SudParis
+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2011       Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
tests/perfmodels/regression_based_03.c

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011,2012,2014                           Inria
- * Copyright (C) 2011-2016,2019                           Université de Bordeaux
- * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2011                                     Télécom-SudParis
+ * Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2011       Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by