Prechádzať zdrojové kódy

Simplify datawizard code

Sylvain Henry 15 rokov pred
rodič
commit
df21b0826a

+ 82 - 135
src/datawizard/copy_driver.c

@@ -97,146 +97,93 @@ cudaError_t cures;
 cudaStream_t *stream;
 #endif
 
-	switch (dst_kind) {
-	case STARPU_RAM:
-		switch (src_kind) {
-			case STARPU_RAM:
-				/* STARPU_RAM -> STARPU_RAM */
-				STARPU_ASSERT(copy_methods->ram_to_ram);
-				copy_methods->ram_to_ram(handle, src_node, dst_node);
-				break;
+	switch (MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
+      case MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
+         /* STARPU_CPU_RAM -> STARPU_CPU_RAM */
+         STARPU_ASSERT(copy_methods->ram_to_ram);
+         copy_methods->ram_to_ram(handle, src_node, dst_node);
+         break;
 #ifdef STARPU_USE_CUDA
-			case STARPU_CUDA_RAM:
-				/* CUBLAS_RAM -> STARPU_RAM */
-				/* only the proper CUBLAS thread can initiate this ! */
-				if (_starpu_get_local_memory_node() == src_node)
-				{
-					/* only the proper CUBLAS thread can initiate this directly ! */
-					STARPU_ASSERT(copy_methods->cuda_to_ram);
-					if (!req || !copy_methods->cuda_to_ram_async)
-					{
-						/* this is not associated to a request so it's synchronous */
-						copy_methods->cuda_to_ram(handle, src_node, dst_node);
-					}
-					else {
-						cures = cudaEventCreate(&req->async_channel.cuda_event);
-						if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
-
-						stream = starpu_cuda_get_local_stream();
-						ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
-
-						cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
-						if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
-					}
-				}
-				else
-				{
-					/* we should not have a blocking call ! */
-					STARPU_ABORT();
-				}
-				break;
-#endif
-#ifdef STARPU_USE_OPENCL
-         case STARPU_OPENCL_RAM:
-            /* OpenCL -> RAM */
-            if (_starpu_get_local_memory_node() == src_node)
-            {
-               STARPU_ASSERT(copy_methods->opencl_to_ram);
-               if (!req || !copy_methods->opencl_to_ram_async)
-               {
-                  /* this is not associated to a request so it's synchronous */
-                  copy_methods->opencl_to_ram(handle, src_node, dst_node);
-               }
-               else {
-                  ret = copy_methods->opencl_to_ram_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
-               }
+      case MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
+         /* CUBLAS_RAM -> STARPU_CPU_RAM */
+         /* only the proper CUBLAS thread can initiate this ! */
+         if (_starpu_get_local_memory_node() == src_node) {
+            /* only the proper CUBLAS thread can initiate this directly ! */
+            STARPU_ASSERT(copy_methods->cuda_to_ram);
+            if (!req || !copy_methods->cuda_to_ram_async) {
+               /* this is not associated to a request so it's synchronous */
+               copy_methods->cuda_to_ram(handle, src_node, dst_node);
             }
-            else
-            {
-               /* we should not have a blocking call ! */
-               STARPU_ABORT();
+            else {
+               cures = cudaEventCreate(&req->async_channel.cuda_event);
+               if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
+
+               stream = starpu_cuda_get_local_stream();
+               ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
+
+               cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
+               if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
             }
-            break;
-#endif
-			case STARPU_SPU_LS:
-				STARPU_ABORT(); // TODO
-				break;
-			case STARPU_UNUSED:
-				printf("error node %u STARPU_UNUSED\n", src_node);
-			default:
-				assert(0);
-				break;
-		}
-		break;
-#ifdef STARPU_USE_CUDA
-	case STARPU_CUDA_RAM:
-		switch (src_kind) {
-			case STARPU_RAM:
-				/* STARPU_RAM -> CUBLAS_RAM */
-				/* only the proper CUBLAS thread can initiate this ! */
-				STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
-				STARPU_ASSERT(copy_methods->ram_to_cuda);
-				if (!req || !copy_methods->ram_to_cuda_async)
-				{
-					/* this is not associated to a request so it's synchronous */
-					copy_methods->ram_to_cuda(handle, src_node, dst_node);
-				}
-				else {
-					cures = cudaEventCreate(&req->async_channel.cuda_event);
-					if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
-
-					stream = starpu_cuda_get_local_stream();
-					ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
-
-					cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
-					if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
-				}
-				break;
-			case STARPU_CUDA_RAM:
-			case STARPU_SPU_LS:
-				STARPU_ABORT(); // TODO 
-				break;
-			case STARPU_UNUSED:
-			default:
-				STARPU_ABORT();
-				break;
-		}
-		break;
+         }
+         else {
+            /* we should not have a blocking call ! */
+            STARPU_ABORT();
+         }
+         break;
+      case MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
+         /* STARPU_CPU_RAM -> CUBLAS_RAM */
+         /* only the proper CUBLAS thread can initiate this ! */
+         STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
+         STARPU_ASSERT(copy_methods->ram_to_cuda);
+         if (!req || !copy_methods->ram_to_cuda_async) {
+            /* this is not associated to a request so it's synchronous */
+            copy_methods->ram_to_cuda(handle, src_node, dst_node);
+         }
+         else {
+            cures = cudaEventCreate(&req->async_channel.cuda_event);
+            if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
+
+            stream = starpu_cuda_get_local_stream();
+            ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
+
+            cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
+            if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
+         }
+         break;
 #endif
 #ifdef STARPU_USE_OPENCL
-	case STARPU_OPENCL_RAM:
-		switch (src_kind) {
-		        case STARPU_RAM:
-				/* STARPU_RAM -> STARPU_OPENCL_RAM */
-				STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
-				STARPU_ASSERT(copy_methods->ram_to_opencl);
-				if (!req || !copy_methods->ram_to_opencl_async)
-				{
-					/* this is not associated to a request so it's synchronous */
-					copy_methods->ram_to_opencl(handle, src_node, dst_node);
-				}
-				else {
-                                        ret = copy_methods->ram_to_opencl_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
-				}
-				break;
-			case STARPU_CUDA_RAM:
-			case STARPU_OPENCL_RAM:
-			case STARPU_SPU_LS:
-				STARPU_ABORT(); // TODO 
-				break;
-			case STARPU_UNUSED:
-			default:
-				STARPU_ABORT();
-				break;
-		}
-		break;
+      case MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
+         /* OpenCL -> RAM */
+         if (_starpu_get_local_memory_node() == src_node) {
+            STARPU_ASSERT(copy_methods->opencl_to_ram);
+            if (!req || !copy_methods->opencl_to_ram_async) {
+               /* this is not associated to a request so it's synchronous */
+               copy_methods->opencl_to_ram(handle, src_node, dst_node);
+            }
+            else {
+               ret = copy_methods->opencl_to_ram_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
+            }
+         }
+         else {
+            /* we should not have a blocking call ! */
+            STARPU_ABORT();
+         }
+         break;
+      case MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
+         /* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
+         STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
+         STARPU_ASSERT(copy_methods->ram_to_opencl);
+         if (!req || !copy_methods->ram_to_opencl_async) {
+            /* this is not associated to a request so it's synchronous */
+            copy_methods->ram_to_opencl(handle, src_node, dst_node);
+         }
+         else {
+            ret = copy_methods->ram_to_opencl_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
+         }
+         break;
 #endif
-	case STARPU_SPU_LS:
-		STARPU_ABORT(); // TODO
-		break;
-	case STARPU_UNUSED:
 	default:
-		assert(0);
+      STARPU_ABORT();
 		break;
 	}
 
@@ -337,7 +284,7 @@ void _starpu_driver_wait_request_completion(starpu_async_channel *async_channel
          }
          break;
 #endif
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 		default:
 			STARPU_ABORT();
 	}
@@ -375,7 +322,7 @@ unsigned _starpu_driver_test_request_completion(starpu_async_channel *async_chan
             break;
          }
 #endif
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 		default:
 			STARPU_ABORT();
 			success = 0;

+ 4 - 4
src/datawizard/interfaces/bcsr_interface.c

@@ -256,7 +256,7 @@ static size_t allocate_bcsr_buffer_on_node(void *interface_, uint32_t dst_node)
 	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr_nzval = (uintptr_t)malloc(nnz*r*c*elemsize);
 			if (!addr_nzval)
 				goto fail_nzval;
@@ -324,7 +324,7 @@ static size_t allocate_bcsr_buffer_on_node(void *interface_, uint32_t dst_node)
 
 fail_rowptr:
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void *)addr_colind);
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
@@ -342,7 +342,7 @@ fail_rowptr:
 
 fail_colind:
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void *)addr_nzval);
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
@@ -372,7 +372,7 @@ static void free_bcsr_buffer_on_node(void *interface, uint32_t node)
 
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)bcsr_interface->nzval);
 			free((void*)bcsr_interface->colind);
 			free((void*)bcsr_interface->rowptr);

+ 2 - 2
src/datawizard/interfaces/block_interface.c

@@ -278,7 +278,7 @@ static size_t allocate_block_buffer_on_node(void *interface_, uint32_t dst_node)
 	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(nx*ny*nz*elemsize);
 			if (!addr) 
 				fail = 1;
@@ -345,7 +345,7 @@ static void free_block_buffer_on_node(void *interface, uint32_t node)
 
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)block_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA

+ 4 - 4
src/datawizard/interfaces/csr_interface.c

@@ -227,7 +227,7 @@ static size_t allocate_csr_buffer_on_node(void *interface_, uint32_t dst_node)
 	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr_nzval = (uintptr_t)malloc(nnz*elemsize);
 			if (!addr_nzval)
 				goto fail_nzval;
@@ -295,7 +295,7 @@ static size_t allocate_csr_buffer_on_node(void *interface_, uint32_t dst_node)
 
 fail_rowptr:
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void *)addr_colind);
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
@@ -313,7 +313,7 @@ fail_rowptr:
 
 fail_colind:
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void *)addr_nzval);
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
@@ -343,7 +343,7 @@ static void free_csr_buffer_on_node(void *interface, uint32_t node)
 
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)csr_interface->nzval);
 			free((void*)csr_interface->colind);
 			free((void*)csr_interface->rowptr);

+ 2 - 2
src/datawizard/interfaces/matrix_interface.c

@@ -256,7 +256,7 @@ static size_t allocate_matrix_buffer_on_node(void *interface_, uint32_t dst_node
 	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc((size_t)nx*ny*elemsize);
 			if (!addr) 
 				fail = 1;
@@ -321,7 +321,7 @@ static void free_matrix_buffer_on_node(void *interface, uint32_t node)
 
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)matrix_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA

+ 7 - 2
src/datawizard/interfaces/variable_interface.c

@@ -23,6 +23,11 @@
 
 #include <common/hash.h>
 
+
+
+
+
+
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #endif
@@ -190,7 +195,7 @@ static size_t allocate_variable_buffer_on_node(void *interface_, uint32_t dst_no
 #endif
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(elemsize);
 			if (!addr)
 				fail = 1;
@@ -240,7 +245,7 @@ static void free_variable_buffer_on_node(void *interface, uint32_t node)
 {
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)STARPU_GET_VARIABLE_PTR(interface));
 			break;
 #ifdef STARPU_USE_CUDA

+ 2 - 2
src/datawizard/interfaces/vector_interface.c

@@ -220,7 +220,7 @@ static size_t allocate_vector_buffer_on_node(void *interface_, uint32_t dst_node
 #endif
 
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(nx*elemsize);
 			if (!addr)
 				fail = 1;
@@ -274,7 +274,7 @@ static void free_vector_buffer_on_node(void *interface, uint32_t node)
 
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
-		case STARPU_RAM:
+		case STARPU_CPU_RAM:
 			free((void*)vector_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA

+ 11 - 5
src/datawizard/memory_nodes.h

@@ -23,13 +23,19 @@
 #include <datawizard/memalloc.h>
 
 typedef enum {
-	STARPU_UNUSED,
-	STARPU_SPU_LS,
-	STARPU_RAM,
-	STARPU_CUDA_RAM,
-        STARPU_OPENCL_RAM,
+   STARPU_UNUSED     = 0x00,
+	STARPU_CPU_RAM    = 0x01,
+	STARPU_CUDA_RAM   = 0x02,
+   STARPU_OPENCL_RAM = 0x03,
+	STARPU_SPU_LS     = 0x04
 } starpu_node_kind;
 
+typedef starpu_node_kind starpu_memory_node_tuple;
+
+#define MEMORY_NODE_TUPLE(node1,node2) (node1 | (node2 << 4))
+#define MEMORY_NODE_TUPLE_FIRST(tuple) (tuple & 0x0F)
+#define MEMORY_NODE_TUPLE_SECOND(tuple) (tuple & 0xF0)
+
 typedef struct {
 	unsigned nnodes;
 	starpu_node_kind nodes[STARPU_MAXNODES];