Samuel Thibault 5 anos atrás
pai
commit
f4fc738d3b

+ 0 - 1
src/core/task.c

@@ -748,7 +748,6 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 	{
 		where |= STARPU_MIC|STARPU_MPI_MS;
 	}
-
 	cl->where = where;
 
 	STARPU_WMB();

+ 0 - 3
src/core/workers.c

@@ -628,17 +628,14 @@ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
 			if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
 				return 0;
 			break;
-              
 		case STARPU_CUDA_WORKER:
 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
 				return 0;
 			break;
-            
 		case STARPU_OPENCL_WORKER:
 			if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
 				return 0;
 			break;
-               
 		default:
 			STARPU_ABORT();
 		}

+ 0 - 99
src/datawizard/copy_driver.c

@@ -155,8 +155,6 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
 	STARPU_ASSERT(src_replicate->allocated);
 	STARPU_ASSERT(dst_replicate->allocated);
-	//int ret = 0;
-
 
 #ifdef STARPU_SIMGRID
 	if (src_node == STARPU_MAIN_RAM || dst_node == STARPU_MAIN_RAM)
@@ -187,7 +185,6 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	}
 #endif
 
-       
 	struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(src_node);
 	if (node_ops && node_ops->copy_interface_to[dst_kind])
 	{
@@ -198,71 +195,6 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		STARPU_ABORT_MSG("No copy_interface_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node)));
 	}
 #endif /* !SIMGRID */
-
-
-#ifdef STARPU_USE_FPGA
-	
-if (src_kind == STARPU_CPU_RAM || dst_kind == STARPU_FPGA_RAM)
-        {
-       
-		/* RAM -> FPGA */
-		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
-				!(copy_methods->ram_to_fpga_async || copy_methods->any_to_any))
-		{
-			/* this is not associated to a request so it's synchronous */
-			STARPU_ASSERT(copy_methods->ram_to_fpga || copy_methods->any_to_any);
-			if (copy_methods->ram_to_fpga)
-				copy_methods->ram_to_fpga(src_interface, src_node, dst_interface, dst_node);
-			else
-				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
-		}
-		else
-		{
-			//req->async_channel.type = STARPU_FPGA_RAM;
-			if (copy_methods->ram_to_fpga_async)
-				copy_methods->ram_to_fpga_async(src_interface, src_node, dst_interface, dst_node);
-			else
-			{
-				STARPU_ASSERT(copy_methods->any_to_any);
-				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-			}
-			//_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node);
-		}
-		
-}
-     
-	
-if (src_node == STARPU_FPGA_RAM || dst_node == STARPU_CPU_RAM)
-{
-        
-		/* FPGA -> RAM */
-		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
-				!(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
-		{
-			/* this is not associated to a request so it's synchronous */
-			STARPU_ASSERT(copy_methods->fpga_to_ram || copy_methods->any_to_any);
-			if (copy_methods->fpga_to_ram)
-				copy_methods->fpga_to_ram(src_interface, src_node, dst_interface, dst_node);
-			else
-				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
-		}
-		else
-		{
-			//req->async_channel.type = STARPU_FPGA_RAM;
-			if (copy_methods->fpga_to_ram_async)
-				copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
-			else
-			{
-				STARPU_ASSERT(copy_methods->any_to_any);
-				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-			}
-			//_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
-		}
-	}
-            
-#endif
-
-
 }
 
 int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_data_handle_t handle,
@@ -367,10 +299,7 @@ void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node
 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 {
 	struct _starpu_async_channel *async_channel = async_data;
-
-        enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
-
 	struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(src_node);
 
 	if (node_ops && node_ops->copy_data_to[dst_kind])
@@ -385,33 +314,6 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
 		STARPU_ABORT_MSG("No copy_data_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node)));
 		return -1;
 	}
-
-#ifdef STARPU_USE_FPGA
-
-	
-if (src_kind == STARPU_FPGA_RAM || dst_kind == STARPU_CPU_RAM)
-{
-       
-		if (async_data)
-		
-                        return _starpu_fpga_copy_fpga_to_ram_async((void*) (src + src_offset), (void*) (dst + dst_offset), size);
-                      
-		else
-			{return _starpu_fpga_copy_fpga_to_ram((void*) (src + src_offset), (void*) (dst + dst_offset), size); 
-                          }  
-      }             
-                    
-   if (src_kind == STARPU_CPU_RAM || dst_kind == STARPU_FPGA_RAM )
-        {
-      
-		if (async_data)
-			{return _starpu_fpga_copy_ram_to_fpga_async((void*) (src + src_offset), (void*) (dst + dst_offset), size);}
-		else
-			{return _starpu_fpga_copy_ram_to_fpga((void*) (src + src_offset), (void*) (dst + dst_offset), size);  }  
-      }     
-    
-#endif
-
 }
 
 int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node,
@@ -565,7 +467,6 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 #endif /* !SIMGRID */
 }
 
-
 unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel)
 {
 #ifdef STARPU_SIMGRID

+ 0 - 1
src/datawizard/interfaces/variable_interface.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_fpga.h>
 
 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
 

+ 0 - 2
src/datawizard/malloc.c

@@ -559,7 +559,6 @@ int starpu_free(void *A)
 static uintptr_t _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags)
 {
 	uintptr_t addr = 0;
-         unsigned devid = starpu_memory_node_get_devid(dst_node);
 
 	/* Handle count first */
 	if (flags & STARPU_MALLOC_COUNT)
@@ -583,7 +582,6 @@ static uintptr_t _starpu_malloc_on_node(unsigned dst_node, size_t size, int flag
 		if (flags & STARPU_MALLOC_COUNT)
 			starpu_memory_deallocate(dst_node, size);
 	}
-
 	return addr;
 }
 

+ 7 - 1
src/drivers/cpu/driver_cpu.c

@@ -36,6 +36,7 @@
 #include <drivers/mic/driver_mic_source.h>
 #include <drivers/mpi/driver_mpi_source.h>
 #include <drivers/disk/driver_disk.h>
+#include <drivers/max/driver_fpga.h>
 #include <core/sched_policy.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_nodes.h>
@@ -538,6 +539,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
+#ifdef STARPU_USE_FPGA
+	.copy_interface_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_interface_from_cpu_to_fpga,
+#endif
 
 	.copy_data_to[STARPU_UNUSED] = NULL,
 	.copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data,
@@ -551,7 +555,6 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 #endif
-
 	.copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk,
 #ifdef STARPU_USE_MIC
 	.copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic,
@@ -563,6 +566,9 @@ struct _starpu_node_ops _starpu_driver_cpu_node_ops =
 #else
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 #endif
+#ifdef STARPU_USE_FPGA
+	.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_cpu_to_fpga,
+#endif
 
 	.copy2d_data_to[STARPU_UNUSED] = NULL,
 	.copy2d_data_to[STARPU_CPU_RAM] = NULL,

+ 94 - 5
src/drivers/max/driver_fpga.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2015  Université de Bordeaux
+ * Copyright (C) 2009-2015, 2020  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
  * Copyright (C) 2011  Télécom-SudParis
@@ -466,6 +466,16 @@ printf("fpga to ram, fpga @= %p\n",src);
 	return 0;
  //LMemLoopback_readLMem(src, size, dst);
 }
+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
+ * node to the address pointed by DST in the DST_NODE memory node
+ */
+int _starpu_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size)
+{
+printf("fpga to ram, fpga @= %p\n",src);
+	memcpy(dst,src,size);
+	return 0;
+ //LMemLoopback_XXXLMem(src, size, dst);
+}
 
 /* Asynchronous transfers */
 int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size)
@@ -528,6 +538,85 @@ void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl
 	}
 }
 
+int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
+{
+	return _starpu_fpga_copy_ram_to_fpga(src + src_offset, dst + dst_offset, size);
+}
+
+int _starpu_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
+{
+	return _starpu_fpga_copy_fpga_to_ram(src + src_offset, dst + dst_offset, size);
+}
+
+int _starpu_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
+{
+	return _starpu_fpga_copy_fpga_to_fpga(src + src_offset, dst + dst_offset, size);
+}
+
+int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
+{
+	int src_kind = starpu_node_get_kind(src_node);
+	int dst_kind = starpu_node_get_kind(dst_node);
+
+	STARPU_ASSERT(src_kind == STARPU_FPGA_RAM && dst_kind == STARPU_CPU_RAM);
+
+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
+	    !(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
+	{
+		/* this is not associated to a request so it's synchronous */
+		STARPU_ASSERT(copy_methods->fpga_to_ram || copy_methods->any_to_any);
+		if (copy_methods->fpga_to_ram)
+			copy_methods->fpga_to_ram(src_interface, src_node, dst_interface, dst_node);
+		else
+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
+	}
+	else
+	{
+		//req->async_channel.type = STARPU_FPGA_RAM;
+		if (copy_methods->fpga_to_ram_async)
+			copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
+		else
+		{
+			STARPU_ASSERT(copy_methods->any_to_any);
+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+		}
+		//_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
+	}
+}
+
+int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
+{
+	int src_kind = starpu_node_get_kind(src_node);
+	int dst_kind = starpu_node_get_kind(dst_node);
+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_FPGA_RAM);
+
+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
+
+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
+	    !(copy_methods->ram_to_fpga_async || copy_methods->any_to_any))
+	{
+		/* this is not associated to a request so it's synchronous */
+		STARPU_ASSERT(copy_methods->ram_to_fpga || copy_methods->any_to_any);
+		if (copy_methods->ram_to_fpga)
+			copy_methods->ram_to_fpga(src_interface, src_node, dst_interface, dst_node);
+		else
+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
+	}
+	else
+	{
+		//req->async_channel.type = STARPU_FPGA_RAM;
+		if (copy_methods->ram_to_fpga_async)
+			copy_methods->ram_to_fpga_async(src_interface, src_node, dst_interface, dst_node);
+		else
+		{
+			STARPU_ASSERT(copy_methods->any_to_any);
+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+		}
+		//_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node);
+	}
+	return 0;
+}
 
 
 struct _starpu_driver_ops _starpu_driver_fpga_ops =
@@ -538,19 +627,19 @@ struct _starpu_driver_ops _starpu_driver_fpga_ops =
 	.deinit = _starpu_fpga_driver_deinit_from_worker
 };
 
-// TODO: structure node_ops, comme dans driver_cuda.c, avec starpu_fpga_allocate_memory, _starpu_fpga_copy_ram_to_fpga, etc.
+// TODO: structure node_ops, comme dans driver_cuda.c, avec starpu_fpga_allocate_memory, etc.
 struct _starpu_node_ops _starpu_driver_fpga_node_ops =
 {
 	.copy_data_to[STARPU_UNUSED] = NULL,
-	.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_fpga_to_ram,
-	.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_ram_to_fpga,
+	.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
+	.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
 	.copy_data_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_data_to[STARPU_DISK_RAM] = NULL,
 	.copy_data_to[STARPU_MIC_RAM] = NULL,
 	.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
 
 	.copy_interface_to[STARPU_UNUSED] = NULL,
-	.copy_interface_to[STARPU_CPU_RAM] = NULL,
+	.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
 	.copy_interface_to[STARPU_FPGA_RAM] = NULL,
 	.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
 	.copy_interface_to[STARPU_DISK_RAM] = NULL,

+ 3 - 1
src/drivers/max/driver_fpga.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2012-2014, 2020  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -58,5 +58,7 @@ void copy_ram_to_fpga(int32_t *src, int32_t dst, size_t size);
 void copy_ram_to_fpga(int32_t *src, int32_t dst, size_t size);
 int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size);
 int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size);
+
+int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
 #endif //  __DRIVER_FPGA_H__