瀏覽代碼

SOCL: remove unnecessary mallocs

Sylvain Henry 12 年之前
父節點
當前提交
77ac0af6c7
共有 3 個文件被更改,包括 34 次插入114 次删除
  1. 11 42
      socl/src/cl_enqueuecopybuffer.c
  2. 12 39
      socl/src/cl_enqueuereadbuffer.c
  3. 11 33
      socl/src/cl_enqueuewritebuffer.c

+ 11 - 42
socl/src/cl_enqueuecopybuffer.c

@@ -16,46 +16,30 @@
 
 #include "socl.h"
 
-struct arg_copybuffer {
-   size_t src_offset, dst_offset;
-   cl_mem src_buffer, dst_buffer;
-   size_t cb;
-};
-
 static void soclEnqueueCopyBuffer_opencl_task(void *descr[], void *args) {
-   struct arg_copybuffer *arg;
    int wid;
    cl_command_queue cq;
    cl_event ev;
+   command_copy_buffer cmd = (command_copy_buffer)args;;
 
-   arg = (struct arg_copybuffer*)args;
    wid = starpu_worker_get_id();
    starpu_opencl_get_queue(wid, &cq);
 
    cl_mem src = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
    cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[1]);
 
-   clEnqueueCopyBuffer(cq, src,dst, arg->src_offset, arg->dst_offset, arg->cb, 0, NULL, &ev);
+   clEnqueueCopyBuffer(cq, src,dst, cmd->src_offset, cmd->dst_offset, cmd->cb, 0, NULL, &ev);
    clWaitForEvents(1, &ev);
    clReleaseEvent(ev);
-
-   gc_entity_unstore(&arg->src_buffer);
-   gc_entity_unstore(&arg->dst_buffer);
-
-   free(arg);
 }
 
 static void soclEnqueueCopyBuffer_cpu_task(void *descr[], void *args) {
-   struct arg_copybuffer *arg;
-   arg = (struct arg_copybuffer*)args;
+   command_copy_buffer cmd = (command_copy_buffer)args;;
+
    void * src = (void*)STARPU_VARIABLE_GET_PTR(descr[0]);
    void * dst = (void*)STARPU_VARIABLE_GET_PTR(descr[1]);
-   memcpy(dst+arg->dst_offset, src+arg->src_offset, arg->cb);
-
-   gc_entity_unstore(&arg->src_buffer);
-   gc_entity_unstore(&arg->dst_buffer);
 
-   free(arg);
+   memcpy(dst+cmd->dst_offset, src+cmd->src_offset, cmd->cb);
 }
 
 static struct starpu_perfmodel copy_buffer_perfmodel = {
@@ -73,20 +57,11 @@ static struct starpu_codelet codelet_copybuffer = {
 };
 
 cl_int command_copy_buffer_submit(command_copy_buffer cmd) {
-	/* Aliases */
-	cl_mem src_buffer = cmd->src_buffer;
-	cl_mem dst_buffer = cmd->dst_buffer;
-	size_t src_offset = cmd->src_offset;
-	size_t dst_offset = cmd->dst_offset;
-	size_t cb = cmd->cb;
-
-	struct starpu_task *task;
-	struct arg_copybuffer *arg;
 
-	task = task_create(CL_COMMAND_COPY_BUFFER);
+	struct starpu_task * task = task_create(CL_COMMAND_COPY_BUFFER);
 
-	task->handles[0] = src_buffer->handle;
-	task->handles[1] = dst_buffer->handle;
+	task->handles[0] = cmd->src_buffer->handle;
+	task->handles[1] = cmd->dst_buffer->handle;
 	task->cl = &codelet_copybuffer;
 
 	/* Execute the task on a specific worker? */
@@ -95,16 +70,10 @@ cl_int command_copy_buffer_submit(command_copy_buffer cmd) {
 	  task->workerid = cmd->_command.cq->device->worker_id;
 	}
 
-	arg = (struct arg_copybuffer*)malloc(sizeof(struct arg_copybuffer));
-	arg->src_offset = src_offset;
-	arg->dst_offset = dst_offset;
-	arg->cb = cb;
-	gc_entity_store(&arg->src_buffer, src_buffer);
-	gc_entity_store(&arg->dst_buffer, dst_buffer);
-	task->cl_arg = arg;
-	task->cl_arg_size = sizeof(struct arg_copybuffer);
+	task->cl_arg = cmd;
+	task->cl_arg_size = sizeof(*cmd);
 
-	dst_buffer->scratch = 0;
+	cmd->dst_buffer->scratch = 0;
 
 	task_submit(task, cmd);
 

+ 12 - 39
socl/src/cl_enqueuereadbuffer.c

@@ -16,46 +16,33 @@
 
 #include "socl.h"
 
-struct arg_readbuffer {
-   size_t offset;
-   size_t cb;
-   void * ptr;
-   cl_mem buffer;
-};
-
 static void soclEnqueueReadBuffer_cpu_task(void *descr[], void *args) {
-   struct arg_readbuffer *arg;
-   arg = (struct arg_readbuffer*)args;
+   command_read_buffer cmd = (command_read_buffer)args;
+
    void * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]);
-   DEBUG_MSG("[Buffer %d] Reading %ld bytes from %p to %p\n", arg->buffer->id, arg->cb, ptr+arg->offset, arg->ptr);
+   DEBUG_MSG("[Buffer %d] Reading %ld bytes from %p to %p\n", cmd->buffer->id, cmd->cb, ptr+cmd->offset, cmd->ptr);
 
    //This fix is for people who use USE_HOST_PTR and still use ReadBuffer to sync the buffer in host mem at host_ptr.
    //They should use buffer mapping facilities instead.
-   if (ptr+arg->offset != arg->ptr)
-      memcpy(arg->ptr, ptr+arg->offset, arg->cb);
-
-   gc_entity_unstore(&arg->buffer);
-   free(args);
+   if (ptr+cmd->offset != cmd->ptr)
+      memcpy(cmd->ptr, ptr+cmd->offset, cmd->cb);
 }
 
 static void soclEnqueueReadBuffer_opencl_task(void *descr[], void *args) {
-   struct arg_readbuffer *arg;
-   arg = (struct arg_readbuffer*)args;
+   command_read_buffer cmd = (command_read_buffer)args;
 
    cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
 
-   DEBUG_MSG("[Buffer %d] Reading %ld bytes from offset %ld into %p\n", arg->buffer->id, arg->cb, arg->offset, arg->ptr);
+   DEBUG_MSG("[Buffer %d] Reading %ld bytes from offset %ld into %p\n", cmd->buffer->id, cmd->cb, cmd->offset, cmd->ptr);
 
    int wid = starpu_worker_get_id();
    cl_command_queue cq;
    starpu_opencl_get_queue(wid, &cq);
 
-   cl_int ret = clEnqueueReadBuffer(cq, mem, CL_TRUE, arg->offset, arg->cb, arg->ptr, 0, NULL, NULL);
+   cl_int ret = clEnqueueReadBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
       DEBUG_CL("clEnqueueReadBuffer", ret);
 
-   gc_entity_unstore(&arg->buffer);
-   free(args);
 }
 
 static struct starpu_perfmodel read_buffer_perfmodel = {
@@ -73,18 +60,10 @@ static struct starpu_codelet codelet_readbuffer = {
 };
 
 cl_int command_read_buffer_submit(command_read_buffer cmd) {
-	/* Aliases */
-	cl_mem buffer = cmd->buffer;
-	size_t offset = cmd->offset;
-	size_t cb = cmd->cb;
-	void * ptr = cmd->ptr;
-
-	struct starpu_task *task;
-	struct arg_readbuffer *arg;
 
-	task = task_create(CL_COMMAND_READ_BUFFER);
+	struct starpu_task * task = task_create(CL_COMMAND_READ_BUFFER);
 
-	task->handles[0] = buffer->handle;
+	task->handles[0] = cmd->buffer->handle;
 	task->cl = &codelet_readbuffer;
 
 	/* Execute the task on a specific worker? */
@@ -93,14 +72,8 @@ cl_int command_read_buffer_submit(command_read_buffer cmd) {
 	  task->workerid = cmd->_command.cq->device->worker_id;
 	}
 
-	arg = (struct arg_readbuffer*)malloc(sizeof(struct arg_readbuffer));
-	arg->offset = offset;
-	arg->cb = cb;
-	arg->ptr = ptr;
-	task->cl_arg = arg;
-	task->cl_arg_size = sizeof(struct arg_readbuffer);
-
-	gc_entity_store(&arg->buffer, buffer);
+	task->cl_arg = cmd;
+	task->cl_arg_size = sizeof(*cmd);
 
 	task_submit(task, cmd);
 

+ 11 - 33
socl/src/cl_enqueuewritebuffer.c

@@ -16,48 +16,35 @@
 
 #include "socl.h"
 
-struct arg_writebuffer {
-   size_t offset;
-   size_t cb;
-   const void * ptr;
-   cl_mem buffer;
-};
 
 static void soclEnqueueWriteBuffer_cpu_task(void *descr[], void *args) {
-   struct arg_writebuffer *arg;
-   arg = (struct arg_writebuffer*)args;
+   command_write_buffer cmd = (command_write_buffer)args;
+
    void * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]);
-   DEBUG_MSG("[Buffer %d] Writing %ld bytes from %p to %p\n", arg->buffer->id, arg->cb, arg->ptr, ptr+arg->offset);
+   DEBUG_MSG("[Buffer %d] Writing %ld bytes from %p to %p\n", cmd->buffer->id, cmd->cb, cmd->ptr, ptr+cmd->offset);
 
    //FIXME: Fix for people who use USE_HOST_PTR, modify data at host_ptr and use WriteBuffer to commit the change.
    // StarPU may have erased host mem at host_ptr (for instance by retrieving current buffer data at host_ptr)
    // Buffer mapping facilities should be used instead
    // Maybe we should report the bug here... for now, we just avoid memcpy crash due to overlapping regions...
-   if (ptr+arg->offset != arg->ptr)
-      memcpy(ptr+arg->offset, arg->ptr, arg->cb);
-
-   gc_entity_unstore(&arg->buffer);
-   free(args);
+   if (ptr+cmd->offset != cmd->ptr)
+      memcpy(ptr+cmd->offset, cmd->ptr, cmd->cb);
 }
 
 static void soclEnqueueWriteBuffer_opencl_task(void *descr[], void *args) {
-   struct arg_writebuffer *arg;
-   arg = (struct arg_writebuffer*)args;
+   command_write_buffer cmd = (command_write_buffer)args;
 
    cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
 
-   DEBUG_MSG("[Buffer %d] Writing %ld bytes to offset %ld from %p\n", arg->buffer->id, arg->cb, arg->offset, arg->ptr);
+   DEBUG_MSG("[Buffer %d] Writing %ld bytes to offset %ld from %p\n", cmd->buffer->id, cmd->cb, cmd->offset, cmd->ptr);
 
    int wid = starpu_worker_get_id();
    cl_command_queue cq;
    starpu_opencl_get_queue(wid, &cq);
 
-   cl_int err = clEnqueueWriteBuffer(cq, mem, CL_TRUE, arg->offset, arg->cb, arg->ptr, 0, NULL, NULL);
+   cl_int err = clEnqueueWriteBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, NULL);
    if (err != CL_SUCCESS)
       DEBUG_CL("clEnqueueWriteBuffer", err);
-
-   gc_entity_unstore(&arg->buffer);
-   free(args);
 }
 
 static struct starpu_perfmodel write_buffer_perfmodel = {
@@ -86,12 +73,9 @@ static struct starpu_codelet codelet_writebuffer_partial = {
 cl_int command_write_buffer_submit(command_write_buffer cmd) {
 	/* Aliases */
 	cl_mem buffer = cmd->buffer;
-	size_t offset = cmd->offset;
 	size_t cb = cmd->cb;
-	const void * ptr = cmd->ptr;
 
 	struct starpu_task *task;
-	struct arg_writebuffer *arg;
 
 	task = task_create(CL_COMMAND_WRITE_BUFFER);
 
@@ -102,12 +86,8 @@ cl_int command_write_buffer_submit(command_write_buffer cmd) {
 	else 
 		task->cl = &codelet_writebuffer;
 
-	arg = (struct arg_writebuffer*)malloc(sizeof(struct arg_writebuffer));
-	arg->offset = offset;
-	arg->cb = cb;
-	arg->ptr = ptr;
-	task->cl_arg = arg;
-	task->cl_arg_size = sizeof(struct arg_writebuffer);
+	task->cl_arg = cmd;
+	task->cl_arg_size = sizeof(*cmd);
 
 	/* Execute the task on a specific worker? */
 	if (cmd->_command.cq->device != NULL) {
@@ -115,10 +95,8 @@ cl_int command_write_buffer_submit(command_write_buffer cmd) {
 	  task->workerid = cmd->_command.cq->device->worker_id;
 	}
 
-	gc_entity_store(&arg->buffer, buffer);
-
 	//The buffer now contains meaningful data
-	arg->buffer->scratch = 0;
+	cmd->buffer->scratch = 0;
 
 	task_submit(task, cmd);