Prechádzať zdrojové kódy

Use clWaitForEvents instead of clFinish

We do not need to use command queue barriers
Sylvain Henry 12 rokov pred
rodič
commit
f94891f8ce
1 zmenil súbory, kde vykonal 61 pridanie a 41 odobranie
  1. 61 41
      src/drivers/opencl/driver_opencl.c

+ 61 - 41
src/drivers/opencl/driver_opencl.c

@@ -228,10 +228,10 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t
 	 * want to know this __now__, so we just perform a dummy copy.
 	 */
 	char dummy = 0;
+	cl_event ev;
 	err = clEnqueueWriteBuffer(alloc_queues[worker->devid], memory, CL_TRUE,
 				   0, sizeof(dummy), &dummy,
-				   0, NULL, NULL);
-	clFinish(alloc_queues[worker->devid]);
+				   0, NULL, &ev);
 	if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
 		return err;
 	if (err == CL_OUT_OF_RESOURCES)
@@ -239,6 +239,9 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t
 	if (err != CL_SUCCESS)
 		STARPU_OPENCL_REPORT_ERROR(err);
 
+	clWaitForEvents(1, &ev);
+	clReleaseEvent(ev);
+
         *mem = memory;
         return CL_SUCCESS;
 #endif
@@ -246,51 +249,68 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t
 
 cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
 {
-        cl_int err;
-        struct _starpu_worker *worker = _starpu_get_local_worker_key();
-
-        if (event)
-                _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, event);
-        if (event)
-                _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (STARPU_LIKELY(err == CL_SUCCESS))
-	{
-		if (event == NULL)
-		{
-			/* We want a synchronous copy, let's synchronise the queue */
-			clFinish(transfer_queues[worker->devid]);
-		}
-		if (ret)
-		{
-			*ret = (event == NULL) ? 0 : -EAGAIN;
-		}
-	}
-	return err;
+   cl_int err;
+   struct _starpu_worker *worker = _starpu_get_local_worker_key();
+
+
+   if (event)
+      _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
+
+   cl_event ev;
+   err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
+
+   if (event)
+      _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
+
+
+   if (STARPU_LIKELY(err == CL_SUCCESS))
+   {
+      if (event == NULL)
+      {
+         /* We want a synchronous copy, let's synchronise the queue */
+         clWaitForEvents(1, &ev);
+         clReleaseEvent(ev);
+      }
+      else {
+         *event = ev;
+      }
+
+      if (ret)
+      {
+         *ret = (event == NULL) ? 0 : -EAGAIN;
+      }
+   }
+   return err;
 }
 
 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
 {
-        cl_int err;
-        struct _starpu_worker *worker = _starpu_get_local_worker_key();
+	cl_int err;
+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
 
-        if (event)
-                _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, event);
-        if (event)
-                _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (STARPU_LIKELY(err == CL_SUCCESS))
+	if (event)
+		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
+	cl_event ev;
+	err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
+	if (event)
+		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
+	if (STARPU_LIKELY(err == CL_SUCCESS))
 	{
-		if (event == NULL)
-		{
-			/* We want a synchronous copy, let's synchronise the queue */
-			clFinish(transfer_queues[worker->devid]);
-		}
-		if (ret)
-		{
-			*ret = (event == NULL) ? 0 : -EAGAIN;
-		}
-	}
+	  if (event == NULL)
+	  {
+		 /* We want a synchronous copy, let's synchronise the queue */
+		 clWaitForEvents(1, &ev);
+		 clReleaseEvent(ev);
+	 }
+	  else {
+		  *event = ev;
+	  }
+
+	  if (ret)
+	  {
+		 *ret = (event == NULL) ? 0 : -EAGAIN;
+	 }
+  }
 	return err;
 }