浏览代码

Fix clFlush use

Use them only for asynchronous requests (otherwise clWaitForEvents has an
implicit flush).

And use it after putting the marker for testing kernel completion,
otherwise it may not have been flushed.
Samuel Thibault 7 年之前
父节点
当前提交
35da708dcf
共有 1 个文件被更改,包括 4 次插入4 次删除
  1. 4 4
      src/drivers/opencl/driver_opencl.c

+ 4 - 4
src/drivers/opencl/driver_opencl.c

@@ -303,7 +303,6 @@ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTR
 
 	cl_event ev;
 	err = clEnqueueWriteBuffer(in_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
-	clFlush(in_transfer_queues[worker->devid]);
 
 	if (event)
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
@@ -321,6 +320,7 @@ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTR
 		}
 		else
 		{
+			clFlush(in_transfer_queues[worker->devid]);
 			*event = ev;
 		}
 
@@ -341,7 +341,6 @@ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 	cl_event ev;
 	err = clEnqueueReadBuffer(out_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
-	clFlush(out_transfer_queues[worker->devid]);
 	if (event)
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 	if (STARPU_LIKELY(err == CL_SUCCESS))
@@ -357,6 +356,7 @@ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_
 		}
 		else
 		{
+			clFlush(out_transfer_queues[worker->devid]);
 			*event = ev;
 		}
 
@@ -377,7 +377,6 @@ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 	cl_event ev;
 	err = clEnqueueCopyBuffer(peer_transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev);
-	clFlush(peer_transfer_queues[worker->devid]);
 	if (event)
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 	if (STARPU_LIKELY(err == CL_SUCCESS))
@@ -393,6 +392,7 @@ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_
 		}
 		else
 		{
+			clFlush(peer_transfer_queues[worker->devid]);
 			*event = ev;
 		}
 
@@ -987,7 +987,6 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 
 		cl_command_queue queue;
 		starpu_opencl_get_queue(worker->devid, &queue);
-		clFlush(queue);
 #endif
 		_STARPU_TRACE_END_EXECUTING();
 	}
@@ -1077,6 +1076,7 @@ static void _starpu_opencl_execute_job(struct starpu_task *task, struct _starpu_
 			 */
 			err = clEnqueueMarker(queue, &task_events[worker->devid][pipeline_idx]);
 			_STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err);
+			clFlush(queue);
 #endif
 			_STARPU_TRACE_START_EXECUTING();
 		}