Ver código fonte

fix warnings and build

Samuel Thibault 5 anos atrás
pai
commit
a2c35e133d
2 arquivos alterados com 44 adições e 107 exclusões
  1. 40 102
      src/drivers/max/driver_fpga.c
  2. 4 5
      src/drivers/max/driver_fpga.h

+ 40 - 102
src/drivers/max/driver_fpga.c

@@ -49,7 +49,10 @@
 /* the number of FPGA devices */
 static unsigned  nfpgafpgas = -1; 
 static fpgaDeviceProp props[STARPU_MAXFPGADEVS];
-static size_t global_mem[STARPU_MAXFPGADEVS] = { 128*1024*1024*1024 };
+static size_t global_mem[STARPU_MAXFPGADEVS] = { 128ULL*1024*1024*1024 };
+
+static void _starpu_fpga_limit_global_mem(unsigned );
+static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
 
 void fpga_msg(char *msg){
 	printf(FPGA_OK "%s\n" NORMAL, msg);
@@ -70,7 +73,7 @@ int fpga_allocate_memory(fpga_mem *ptr, size_t size){
 //This allocates BYTES
 	char *msg1="You asked to allocate ";
 //	printf(KCYN "%s%d*%d\n" KBLU, msg1,size,sizeof(unsigned));
-	printf(FPGA_OK "%s%d bytes\n" NORMAL, msg1,size);
+	printf(FPGA_OK "%s%lu bytes\n" NORMAL, msg1,size);
 
 	*ptr =(fpga_mem) malloc(size);
   
@@ -160,40 +163,20 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker){
 
 static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch){
 	int ret;
-	int is_parallel_task = (j->task_size > 1);
 	int profiling = starpu_profiling_status_get();
-	struct timespec codelet_start, codelet_end;
 
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
-#ifdef STARPU_OPENMP
-	/* At this point, j->continuation as been cleared as the task is being
-	 * woken up, thus we use j->discontinuous instead for the check */
-	const unsigned continuation_wake_up = j->discontinuous;
-#else
-	const unsigned continuation_wake_up = 0;
-#endif
 
 	STARPU_ASSERT(cl);
 
-	if (rank == 0 && !continuation_wake_up)
-	{
-		ret = _starpu_fetch_task_input(task, j, fpga_args);
-		if (ret != 0)
-		{
-			/* there was not enough memory so the codelet cannot be executed right now ... */
-			/* push the codelet back and try another one ... */
-			return -EAGAIN;
-		}
-	}
-
-	if (is_parallel_task)
+	/* TODO: use asynchronous */
+	ret = _starpu_fetch_task_input(task, j, 0);
+	if (ret != 0)
 	{
-		STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
-
-		/* In the case of a combined worker, the scheduler needs to know
-		 * when each actual worker begins the execution */
-		_starpu_sched_pre_exec_hook(worker_task);
+		/* there was not enough memory so the codelet cannot be executed right now ... */
+		/* push the codelet back and try another one ... */
+		return -EAGAIN;
 	}
 
 	/* Give profiling variable */
@@ -204,13 +187,9 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker
 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
 	{
 		_starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
-		char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
+		//char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
 //printf("chanel reserved: %d \n",chnl);
 
-		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
-			/* bind to parallel worker */
-			_starpu_bind_thread_on_cpus(_starpu_get_combined_worker_struct(j->combined_workerid));
-
 		STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
 		if (_starpu_get_disable_kernels() <= 0)
 		{
@@ -221,43 +200,13 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 			_STARPU_TRACE_END_EXECUTING();
 		}
-		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
-			/* rebind to single CPU */
-			_starpu_bind_thread_on_cpu(fpga_args->config, fpga_args->bindid, fpga_args->workerid);
 	}
 
 	_starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling);
 
-	if (is_parallel_task)
-	{
-		STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
-		ANNOTATE_HAPPENS_BEFORE(&j->after_work_busy_barrier);
-		(void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1);
-		if (rank == 0)
-		{
-			/* Wait with a busy barrier for other workers to have
-			 * finished with the blocking barrier before we can
-			 * safely drop the job structure */
-			while (j->after_work_busy_barrier > 0)
-			{
-				STARPU_UYIELD();
-				STARPU_SYNCHRONIZE();
-			}
-			ANNOTATE_HAPPENS_AFTER(&j->after_work_busy_barrier);
-		}
-	}
+	_starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling);
 
-	if (rank == 0)
-	{
-		_starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling);
-
-#ifdef STARPU_OPENMP
-		if (!j->continuation)
-#endif
-		{
-			_starpu_push_task_output(j);
-		}
-	}
+	_starpu_push_task_output(j);
 
 	return 0;
 }
@@ -399,37 +348,23 @@ void *_starpu_fpga_worker(void *_arg){
 	return NULL;
 }
 
-int _starpu_fpga_allocate_memory(int devid, fpga_mem *addr, size_t size) 
+uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags) 
 {
+	(void) flags;
+	unsigned devid = starpu_memory_node_get_devid(dst_node);
+	STARPU_ASSERT(devid == 0); // For now
+
 	static fpga_mem current_address = 0;
+	fpga_mem addr;
 // TODO: vérifier si current_address + size > taille de la LMEm
- 	*addr = current_address;
+ 	addr = current_address;
 	current_address += size;
-printf("fpga mem returned from allocation @: %p\n",*addr);
+printf("fpga mem returned from allocation @: %p\n",addr);
 //success = 0
-        return 0;
+        return (uintptr_t) addr;
 }
 
 
-int _starpu_fpga_driver_init_from_worker(struct _starpu_worker *worker)
-{
-	return _starpu_fpga_driver_init(worker->set);
-}
-
-int _starpu_fpga_run_from_worker(struct _starpu_worker *worker)
-{
-	return _starpu_run_fpga(worker->set);
-}
-
-int _starpu_fpga_driver_run_once_from_worker(struct _starpu_worker *worker)
-{
-	return _starpu_fpga_driver_run_once(worker->set);
-}
-
-int _starpu_fpga_driver_deinit_from_worker(struct _starpu_worker *worker)
-{
-	return _starpu_fpga_driver_deinit(worker->set);
-}
 
 int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size)
 {
@@ -442,7 +377,7 @@ printf("ram to fpga, fpga @= %p\n",dst);
 /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  *  * node to the address pointed by DST in the DST_NODE memory node
  *   */
-void copy_ram_to_fpga(int32_t *src, int32_t dst, size_t size)
+void copy_ram_to_fpga(void *src, void *dst, size_t size)
 {
 printf("ram to fpga, fpga @= %p\n",dst);
 
@@ -450,9 +385,9 @@ printf("ram to fpga, fpga @= %p\n",dst);
    
 }
 
-void copy_fpga_to_ram(int32_t *src, int32_t dst, size_t size)
+void copy_fpga_to_ram(void *src, void *dst, size_t size)
 {
-printf("ram to fpga, fpga @= %p\n",dst);
+printf("ram to fpga, fpga @= %p\n",src);
        //LMemLoopback_readLMem(size, src, dst);
 
 }
@@ -518,10 +453,10 @@ void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl
 		{
 			case STARPU_VARIABLE_INTERFACE_ID:
 			{
-				void *ptr = STARPU_VARIABLE_GET_PTR(buffers[index]);
+				void *ptr = (void*) STARPU_VARIABLE_GET_PTR(buffers[index]);
 				size_t size = STARPU_VARIABLE_GET_ELEMSIZE(buffers[index]);
 				//fpga_data_send(chnl,ptr,size);
-				printf("Driver Fpga @: %p, size %d \n",ptr,size);
+				printf("Driver Fpga @: %p, size %lu \n",ptr,size);
 				break;
 			}
 			case STARPU_MATRIX_INTERFACE_ID:
@@ -540,17 +475,17 @@ void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl
 
 int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
 {
-	return _starpu_fpga_copy_ram_to_fpga(src + src_offset, dst + dst_offset, size);
+	return _starpu_fpga_copy_ram_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
 }
 
 int _starpu_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
 {
-	return _starpu_fpga_copy_fpga_to_ram(src + src_offset, dst + dst_offset, size);
+	return _starpu_fpga_copy_fpga_to_ram((void*) src + src_offset, (void*) dst + dst_offset, ssize);
 }
 
 int _starpu_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
 {
-	return _starpu_fpga_copy_fpga_to_fpga(src + src_offset, dst + dst_offset, size);
+	return _starpu_fpga_copy_fpga_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
 }
 
 int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
@@ -560,6 +495,8 @@ int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, vo
 
 	STARPU_ASSERT(src_kind == STARPU_FPGA_RAM && dst_kind == STARPU_CPU_RAM);
 
+	int ret = 1;
+
 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
 	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
 	    !(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
@@ -575,14 +512,15 @@ int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, vo
 	{
 		//req->async_channel.type = STARPU_FPGA_RAM;
 		if (copy_methods->fpga_to_ram_async)
-			copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
+			ret = copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
 		else
 		{
 			STARPU_ASSERT(copy_methods->any_to_any);
-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
 		}
 		//_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
 	}
+	return ret;
 }
 
 int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
@@ -621,10 +559,10 @@ int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, vo
 
 struct _starpu_driver_ops _starpu_driver_fpga_ops =
 {
-	.init = _starpu_fpga_driver_init_from_worker,
-	.run = _starpu_fpga_run_from_worker,
-	.run_once = _starpu_fpga_driver_run_once_from_worker,
-	.deinit = _starpu_fpga_driver_deinit_from_worker
+	.init = _starpu_fpga_driver_init,
+	.run = _starpu_run_fpga,
+	.run_once = _starpu_fpga_driver_run_once,
+	.deinit = _starpu_fpga_driver_deinit
 };
 
 // TODO: structure node_ops, comme dans driver_cuda.c, avec starpu_fpga_allocate_memory, etc.

+ 4 - 5
src/drivers/max/driver_fpga.h

@@ -41,9 +41,7 @@ void _starpu_init_fpga(void);
 void _starpu_fpga_discover_devices (struct _starpu_machine_config *config);
 unsigned _starpu_fpga_get_device_count(void);
 
-static void _starpu_fpga_limit_global_mem(unsigned );
-static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
-int _starpu_fpga_allocate_memory(int devid, fpga_mem *addr, size_t size);
+uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags);
 void *_starpu_fpga_worker(void *);
 struct _starpu_worker;
 int _starpu_run_fpga(struct _starpu_worker *);
@@ -54,11 +52,12 @@ int _starpu_fpga_driver_deinit(struct _starpu_worker *);
 void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int );
 int _starpu_fpga_copy_fpga_to_ram(void *src, void *dst, size_t size);
 int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size);
-void copy_ram_to_fpga(int32_t *src, int32_t dst, size_t size);
-void copy_ram_to_fpga(int32_t *src, int32_t dst, size_t size);
+void copy_ram_to_fpga(void *src, void * dst, size_t size);
+void copy_ram_to_fpga(void *src, void * dst, size_t size);
 int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size);
 int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size);
 
 int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
+int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel);
 #endif //  __DRIVER_FPGA_H__