Przeglądaj źródła

SOCL: delay command execution (task graph)

Sylvain Henry 13 lat temu
rodzic
commit
fa035d1c95

+ 8 - 6
socl/src/Makefile.am

@@ -24,16 +24,18 @@ SUBDIRS =
 lib_LTLIBRARIES = libsocl.la
 
 libsocl_la_SOURCES = 						\
-  socl.c \
-  gc.c \
-  graph.c \
+  command.c \
+  command_list.c \
+  command_queue.c \
+  debug.c \
+  devices.c \
   event.c \
+  gc.c \
   init.c \
-  task.c \
-  command_queue.c \
   mem_objects.c \
+  socl.c \
+  task.c \
   util.c \
-  devices.c \
   cl_getplatformids.c \
   cl_getplatforminfo.c \
   cl_getdeviceids.c \

+ 3 - 4
socl/src/cl_createcommandqueue.c

@@ -30,8 +30,7 @@ static void release_callback_command_queue(void * e) {
   gc_entity_unstore(&cq->context);
 
   /* Destruct object */
-  pthread_spin_destroy(&cq->spin);
-  free(cq->events);
+  pthread_mutex_destroy(&cq->mutex);
 }
 
 
@@ -68,9 +67,9 @@ soclCreateCommandQueue(cl_context                   context,
       profiling_queue_count += 1;
    }
 
-   cq->events = NULL;
+   cq->commands = NULL;
    cq->barrier = NULL;
-   pthread_spin_init(&cq->spin, 0);
+   pthread_mutex_init(&cq->mutex, NULL);
 
    if (errcode_ret != NULL)
       *errcode_ret = CL_SUCCESS;

+ 11 - 11
socl/src/cl_createkernel.c

@@ -32,23 +32,23 @@ static void soclCreateKernel_task(void *data) {
    }
 
    /* One worker creates argument structures */
-   if (__sync_bool_compare_and_swap(&k->arg_count, 0, 666)) {
+   if (__sync_bool_compare_and_swap(&k->num_args, 0, 666)) {
       unsigned int i;
-      cl_uint arg_count;
+      cl_uint num_args;
 
-      err = clGetKernelInfo(k->cl_kernels[range], CL_KERNEL_NUM_ARGS, sizeof(arg_count), &arg_count, NULL);
+      err = clGetKernelInfo(k->cl_kernels[range], CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL);
       if (err != CL_SUCCESS) {
          DEBUG_CL("clGetKernelInfo", err);
          ERROR_STOP("Unable to get kernel argument count. Aborting.\n");
       }
-      k->arg_count = arg_count;
-      DEBUG_MSG("Kernel has %d arguments\n", arg_count);
+      k->num_args = num_args;
+      DEBUG_MSG("Kernel has %d arguments\n", num_args);
 
-      k->arg_size = (size_t*)malloc(sizeof(size_t) * arg_count);
-      k->arg_value = (void**)malloc(sizeof(void*) * arg_count);
-      k->arg_type = (enum kernel_arg_type*)malloc(sizeof(enum kernel_arg_type) * arg_count);
+      k->arg_size = (size_t*)malloc(sizeof(size_t) * num_args);
+      k->arg_value = (void**)malloc(sizeof(void*) * num_args);
+      k->arg_type = (enum kernel_arg_type*)malloc(sizeof(enum kernel_arg_type) * num_args);
       /* Settings default type to NULL */
-      for (i=0; i<arg_count; i++) {
+      for (i=0; i<num_args; i++) {
          k->arg_value[i] = NULL;
          k->arg_type[i] = Null;
       }
@@ -70,7 +70,7 @@ static void release_callback_kernel(void * e) {
 
   //Free args
   unsigned int j;
-  for (j=0; j<kernel->arg_count; j++) {
+  for (j=0; j<kernel->num_args; j++) {
     switch (kernel->arg_type[j]) {
       case Null:
         break;
@@ -125,7 +125,7 @@ soclCreateKernel(cl_program    program,
    
    gc_entity_store(&k->program, program);
    k->kernel_name = strdup(kernel_name);
-   k->arg_count = 0;
+   k->num_args = 0;
    k->arg_value = NULL;
    k->arg_size = NULL;
 

+ 2 - 2
socl/src/cl_createprogramwithsource.c

@@ -40,11 +40,11 @@ static void soclCreateProgramWithSource_task(void *data) {
 }
 
 static void rp_task(void *data) {
-   struct _cl_program *d = (struct _cl_program*)data;
+   cl_program program = (cl_program)data;
 
    int range = starpu_worker_get_range();
 
-   cl_int err = clReleaseProgram(d->cl_programs[range]);
+   cl_int err = clReleaseProgram(program->cl_programs[range]);
    if (err != CL_SUCCESS)
       DEBUG_CL("clReleaseProgram", err);
 }

+ 4 - 3
socl/src/cl_enqueuebarrier.c

@@ -19,8 +19,9 @@
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueBarrier(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
 {
-   cl_event ev = command_queue_barrier(cq);   
-   gc_entity_release(ev);
+	command_marker cmd = command_barrier_create();
 
-   return CL_SUCCESS;
+	command_queue_enqueue(cq, cmd, 0, NULL);
+
+	return CL_SUCCESS;
 }

+ 40 - 33
socl/src/cl_enqueuecopybuffer.c

@@ -66,6 +66,42 @@ static starpu_codelet codelet_copybuffer = {
    .nbuffers = 2
 };
 
+cl_int command_copy_buffer_submit(command_copy_buffer cmd) {
+	/* Aliases */
+	cl_mem src_buffer = cmd->src_buffer;
+	cl_mem dst_buffer = cmd->dst_buffer;
+	size_t src_offset = cmd->src_offset;
+	size_t dst_offset = cmd->dst_offset;
+	size_t cb = cmd->cb;
+
+	struct starpu_task *task;
+	struct arg_copybuffer *arg;
+
+	task = task_create(CL_COMMAND_COPY_BUFFER);
+
+	task->buffers[0].handle = src_buffer->handle;
+	task->buffers[0].mode = STARPU_R;
+	task->buffers[1].handle = dst_buffer->handle;
+	task->buffers[1].mode = STARPU_RW;
+	task->cl = &codelet_copybuffer;
+
+	arg = (struct arg_copybuffer*)malloc(sizeof(struct arg_copybuffer));
+	arg->src_offset = src_offset;
+	arg->dst_offset = dst_offset;
+	arg->cb = cb;
+	gc_entity_store(&arg->src_buffer, src_buffer);
+	gc_entity_store(&arg->dst_buffer, dst_buffer);
+	task->cl_arg = arg;
+	task->cl_arg_size = sizeof(struct arg_copybuffer);
+
+	dst_buffer->scratch = 0;
+
+	task_submit(task, cmd);
+
+	return CL_SUCCESS;
+}
+
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyBuffer(cl_command_queue  cq, 
                     cl_mem              src_buffer,
@@ -77,40 +113,11 @@ soclEnqueueCopyBuffer(cl_command_queue  cq,
                     const cl_event *    events,
                     cl_event *          event) CL_API_SUFFIX__VERSION_1_0
 {
-   struct starpu_task *task;
-   struct arg_copybuffer *arg;
-   cl_event ev;
-
-   cl_int ndeps;
-   cl_event *deps;
-
-   task = task_create(CL_COMMAND_COPY_BUFFER);
-   ev = task_event(task);
-
-   task->buffers[0].handle = src_buffer->handle;
-   task->buffers[0].mode = STARPU_R;
-   task->buffers[1].handle = dst_buffer->handle;
-   task->buffers[1].mode = STARPU_RW;
-   task->cl = &codelet_copybuffer;
-
-   arg = (struct arg_copybuffer*)malloc(sizeof(struct arg_copybuffer));
-   arg->src_offset = src_offset;
-   arg->dst_offset = dst_offset;
-   arg->cb = cb;
-   gc_entity_store(&arg->src_buffer, src_buffer);
-   gc_entity_store(&arg->dst_buffer, dst_buffer);
-   task->cl_arg = arg;
-   task->cl_arg_size = sizeof(struct arg_copybuffer);
-
-   dst_buffer->scratch = 0;
-
-   DEBUG_MSG("Submitting CopyBuffer task (event %d)\n", ev->id);
-
-   command_queue_enqueue(cq, task_event(task), 0, num_events, events, &ndeps, &deps);
+	command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb);
 
-   task_submit(task, ndeps, deps);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-   RETURN_OR_RELEASE_EVENT(ev, event);
+	RETURN_EVENT(cmd, event);
 
-   return CL_SUCCESS;
+	return CL_SUCCESS;
 }

+ 25 - 53
socl/src/cl_enqueuemapbuffer.c

@@ -16,81 +16,53 @@
 
 #include "socl.h"
 
-struct mb_data {
-  cl_event ev;
-  cl_mem buffer;
-  cl_map_flags map_flags;
-};
-
 static void mapbuffer_callback(void *args) {
-  struct mb_data *arg = (struct mb_data*)args;
-
-  starpu_tag_notify_from_apps(arg->ev->id);
-  arg->ev->status = CL_COMPLETE;
+	command_map_buffer cmd = (command_map_buffer)args;
 
-  gc_entity_unstore(&arg->ev);
-  gc_entity_unstore(&arg->buffer);
-  free(args);
+	starpu_tag_notify_from_apps(cmd->event->id);
+	cmd->event->status = CL_COMPLETE;
 }
 
 static void mapbuffer_task(void *args) {
-  struct mb_data *arg = (struct mb_data*)args;
+	command_map_buffer cmd = (command_map_buffer)args;
+
+	starpu_access_mode mode = (cmd->map_flags == CL_MAP_READ ? STARPU_R : STARPU_RW);
+
+	starpu_data_acquire_cb(cmd->buffer->handle, mode, mapbuffer_callback, cmd);
+}
+
+cl_int command_map_buffer_submit(command_map_buffer cmd) {
+	starpu_task task = task_create_cpu(mapbuffer_task, cmd, 0);
 
-  starpu_access_mode mode = (arg->map_flags == CL_MAP_READ ? STARPU_R : STARPU_RW);
+	task_submit(task, cmd);
 
-  starpu_data_acquire_cb(arg->buffer->handle, mode, mapbuffer_callback, arg);
+	return CL_SUCCESS;
 }
 
 CL_API_ENTRY void * CL_API_CALL
 soclEnqueueMapBuffer(cl_command_queue cq,
                    cl_mem           buffer,
-                   cl_bool          blocking_map, 
+                   cl_bool          blocking, 
                    cl_map_flags     map_flags,
                    size_t           offset, 
-                   size_t           UNUSED(cb),
+                   size_t           cb,
                    cl_uint          num_events,
                    const cl_event * events,
                    cl_event *       event,
                    cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0
 {
-   struct starpu_task *task;
-   struct mb_data *arg;
-   cl_event ev;
-   cl_int err;
-   cl_int ndeps;
-   cl_event *deps;
-
-   /* Create custom event that will be triggered when map is complete */
-   ev = event_create();
-
-   /* Store arguments */
-   arg = (struct mb_data*)malloc(sizeof(struct mb_data));
-   arg->map_flags = map_flags;
-   gc_entity_store(&arg->ev, ev);
-   gc_entity_store(&arg->buffer, buffer);
-
-   /* Create StarPU task */
-   task = task_create_cpu(CL_COMMAND_MAP_BUFFER, mapbuffer_task, arg, 0);
-   cl_event map_event = task_event(task);
-
-   /* Enqueue task */
-   DEBUG_MSG("Submitting MapBuffer task (event %d)\n", ev->id);
-   command_queue_enqueue(cq, ev, 0, num_events, events, &ndeps, &deps);
-
-   task_submit(task, ndeps, deps);
-
-   gc_entity_release(map_event);
+	cl_event ev = event_create();
 
-   if (errcode_ret != NULL)
-      *errcode_ret = err;
+	command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb, ev);
 
-   if (err != CL_SUCCESS)
-      return NULL;
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-   if (blocking_map == CL_TRUE)
-      soclWaitForEvents(1, &ev);
+	if (errcode_ret != NULL)
+		*errcode_ret = CL_SUCCESS;
 
-   RETURN_OR_RELEASE_EVENT(ev, event);
+	RETURN_CUSTOM_EVENT(ev,event);
 
-   return (void*)(starpu_variable_get_local_ptr(buffer->handle) + offset);
+	MAY_BLOCK_CUSTOM(blocking,ev);
+	
+	return (void*)(starpu_variable_get_local_ptr(buffer->handle) + offset);
 }

+ 10 - 7
socl/src/cl_enqueuemarker.c

@@ -22,16 +22,19 @@ soclEnqueueMarker(cl_command_queue  cq,
 {
 	if (event == NULL)
 		return CL_INVALID_VALUE;
+	
+	command_marker cmd = command_marker_create();
 
-	cl_int ndeps;
-	cl_event *deps;
+	command_queue_enqueue(cq, cmd, 0, NULL);
 
-	starpu_task * task = task_create(CL_COMMAND_MARKER);
-	*event = task_event(task);
+	RETURN_EVENT(cmd, event);
 
-	command_queue_enqueue(cq, task_event(task), 0, 0, NULL, &ndeps, &deps);
+	return CL_SUCCESS;
+}
 
-	task_submit(task, ndeps, deps);
+cl_int command_marker_submit(command_marker cmd) {
+	struct starpu_task *task;
+	task = task_create(CL_COMMAND_MARKER);
 
-	return task_event(task);
+	task_submit(task, cmd);
 }

+ 120 - 229
socl/src/cl_enqueuendrangekernel.c

@@ -16,81 +16,64 @@
 
 #include "socl.h"
 
-typedef struct running_kernel * running_kernel;
-
-struct running_kernel {
-  cl_kernel kernel;
-  cl_mem *buffers;
-  unsigned int buffer_count;
-  starpu_codelet *codelet;
-  cl_uint work_dim;
-  size_t * global_work_offset;
-  size_t * global_work_size;
-  size_t * local_work_size;
-  /* Arguments */
-  unsigned int arg_count;
-  size_t *arg_size;
-  enum kernel_arg_type  *arg_type;
-  void  **arg_value;
-};
 
-static void soclEnqueueNDRangeKernel_task(void *descr[], void *args) {
-   running_kernel d;
+void soclEnqueueNDRangeKernel_task(void *descr[], void *args) {
+	command_ndrange_kernel cmd = (command_ndrange_kernel)args;
+
    cl_command_queue cq;
    int wid;
    cl_int err;
 
-   d = (running_kernel)args;
    wid = starpu_worker_get_id();
    starpu_opencl_get_queue(wid, &cq);
 
-   DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, d->kernel->id);
+   DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, cmd->kernel->id);
 
    int range = starpu_worker_get_range();
 
    /* Set arguments */
    {
-      unsigned int i;
-      int buf = 0;
-      for (i=0; i<d->arg_count; i++) {
-         switch (d->arg_type[i]) {
-            case Null:
-               err = clSetKernelArg(d->kernel->cl_kernels[range], i, d->arg_size[i], NULL);
-               break;
-            case Buffer: {
-                  cl_mem mem;  
-                  mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]);
-                  err = clSetKernelArg(d->kernel->cl_kernels[range], i, d->arg_size[i], &mem);
-                  buf++;
-               }
-               break;
-            case Immediate:
-               err = clSetKernelArg(d->kernel->cl_kernels[range], i, d->arg_size[i], d->arg_value[i]);
-               break;
-         }
-         if (err != CL_SUCCESS) {
-            DEBUG_CL("clSetKernelArg", err);
-            DEBUG_ERROR("Aborting\n");
-         }
-      }
+	   unsigned int i;
+	   int buf = 0;
+	   for (i=0; i<cmd->num_args; i++) {
+		   switch (cmd->arg_types[i]) {
+			   case Null:
+				   err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], NULL);
+				   break;
+			   case Buffer: {
+						cl_mem mem;  
+						mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]);
+						err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], &mem);
+						buf++;
+					}
+					break;
+			   case Immediate:
+					err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], cmd->args[i]);
+					break;
+		   }
+		   if (err != CL_SUCCESS) {
+			   DEBUG_CL("clSetKernelArg", err);
+			   DEBUG_ERROR("Aborting\n");
+		   }
+	   }
    }
 
    /* Calling Kernel */
    cl_event event;
-   err = clEnqueueNDRangeKernel(cq, d->kernel->cl_kernels[range], d->work_dim, d->global_work_offset, d->global_work_size, d->local_work_size, 0, NULL, &event);
+   err = clEnqueueNDRangeKernel(cq, cmd->kernel->cl_kernels[range], cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size, 0, NULL, &event);
 
    if (err != CL_SUCCESS) {
-      ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err);
-      DEBUG_CL("clEnqueueNDRangeKernel", err);
-      DEBUG_MSG("Workdim %d, global_work_offset %p, global_work_size %p, local_work_size %p\n",
-                d->work_dim, d->global_work_offset, d->global_work_size, d->local_work_size);
-      DEBUG_MSG("Global work size: %ld %ld %ld\n", d->global_work_size[0],
-            (d->work_dim > 1 ? d->global_work_size[1] : 1), (d->work_dim > 2 ? d->global_work_size[2] : 1)); 
-      if (d->local_work_size != NULL)
-         DEBUG_MSG("Local work size: %ld %ld %ld\n", d->local_work_size[0],
-               (d->work_dim > 1 ? d->local_work_size[1] : 1), (d->work_dim > 2 ? d->local_work_size[2] : 1)); 
-      ERROR_MSG("Aborting.\n");
-      exit(1);
+	   ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err);
+	   DEBUG_CL("clEnqueueNDRangeKernel", err);
+	   DEBUG_MSG("Workdim %d, global_work_offset %p, global_work_size %p, local_work_size %p\n",
+			   cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size);
+	   DEBUG_MSG("Global work size: %ld %ld %ld\n", cmd->global_work_size[0],
+			   (cmd->work_dim > 1 ? cmd->global_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->global_work_size[2] : 1)); 
+	   if (cmd->local_work_size != NULL)
+		   DEBUG_MSG("Local work size: %ld %ld %ld\n", cmd->local_work_size[0],
+				   (cmd->work_dim > 1 ? cmd->local_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->local_work_size[2] : 1)); 
+	   ERROR_MSG("Aborting.\n");
+	   exit(1);
    }
 
    /* Waiting for kernel to terminate */
@@ -99,202 +82,110 @@ static void soclEnqueueNDRangeKernel_task(void *descr[], void *args) {
 }
 
 static void cleaning_task_callback(void *args) {
-   running_kernel arg = (running_kernel)args;
+	command_ndrange_kernel cmd = (command_ndrange_kernel)args;
 
-   free(arg->arg_size);
-   free(arg->arg_type);
+	free(cmd->arg_sizes);
+	free(cmd->arg_types);
 
-   unsigned int i;
-   for (i=0; i<arg->arg_count; i++) {
-      free(arg->arg_value[i]);
-   }
-   free(arg->arg_value);
-
-   for (i=0; i<arg->buffer_count; i++)
-      gc_entity_unstore(&arg->buffers[i]);
+	unsigned int i;
+	for (i=0; i<cmd->num_args; i++) {
+		free(cmd->args[i]);
+	}
+	free(cmd->args);
 
-   gc_entity_unstore(&arg->kernel);
+	for (i=0; i<cmd->num_buffers; i++)
+		gc_entity_unstore(&cmd->buffers[i]);
 
-   free(arg->buffers);
-   free(arg->global_work_offset);
-   free(arg->global_work_size);
-   free(arg->local_work_size);
-   void * co = arg->codelet;
-   arg->codelet = NULL;
-   free(co);
+	free(cmd->buffers);
+	void * co = cmd->codelet;
+	cmd->codelet = NULL;
+	free(co);
 }
 
 static struct starpu_perfmodel_t perf_model = {
-  .type = STARPU_HISTORY_BASED,
-  .symbol = "perf_model"
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "perf_model"
 };
 
 /**
  * Real kernel enqueuing command
  */
-cl_int graph_play_enqueue_kernel(node_enqueue_kernel n) {
-
-   struct starpu_task *task;
-   running_kernel arg;
-   starpu_codelet *codelet;
-   cl_event ev;
-   
-   /* Alias struc fields */
-   cl_command_queue cq = n->cq;
-   cl_kernel        kernel = n->kernel;
-   cl_uint          work_dim = n->work_dim;
-   size_t *	    global_work_offset = (size_t*)n->global_work_offset;
-   size_t *   	    global_work_size = (size_t*)n->global_work_size;
-   size_t *   	    local_work_size = (size_t*)n->local_work_size;
-   cl_uint          num_events = n->node.num_events;
-   const cl_event * events = n->node.events;
-   cl_event         event = n->node.event;
-   char 	    is_task = n->is_task;
-   cl_int ndeps;
-   cl_event *deps;
-
-
-   /* Allocate structures */
-
-   /* Codelet */
-   codelet = (starpu_codelet*)malloc(sizeof(starpu_codelet));
-   if (codelet == NULL)
-      return CL_OUT_OF_HOST_MEMORY;
-
-   /* Codelet arguments */
-   arg = (running_kernel)malloc(sizeof(struct running_kernel));
-   if (arg == NULL) {
-      free(codelet);
-      return CL_OUT_OF_HOST_MEMORY;
-   }
-
-	/* StarPU task */
-	if (event != NULL) {
-		task = task_create_with_event(is_task ? CL_COMMAND_TASK : CL_COMMAND_NDRANGE_KERNEL, event);
+cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) {
+
+	starpu_task task = task_create();
+	task->cl = cmd->codelet;
+	task->cl_arg = cmd;
+	task->cl_arg_size = sizeof(cmd);
+
+	starpu_codelet * codelet = cmd->codelet;
+
+	/* We need to detect which parameters are OpenCL's memory objects and
+	 * we retrieve their corresponding StarPU buffers */
+	cmd->num_buffers = 0;
+	cmd->buffers = malloc(sizeof(cl_mem) * cmd->num_args);
+
+	unsigned int i;
+	for (i=0; i<cmd->num_args; i++) {
+		if (cmd->arg_types[i] == Buffer) {
+
+			cl_mem buf = *(cl_mem*)cmd->args[i];
+
+			gc_entity_store(&cmd->buffers[cmd->num_buffers], buf);
+			task->buffers[cmd->num_buffers].handle = buf->handle;
+
+			/* Determine best StarPU buffer access mode */
+			int mode;
+			if (buf->mode == CL_MEM_READ_ONLY)
+				mode = STARPU_R;
+			else if (buf->mode == CL_MEM_WRITE_ONLY) {
+				mode = STARPU_W;
+				buf->scratch = 0;
+			}
+			else if (buf->scratch) { //RW but never accessed in RW or W mode
+				mode = STARPU_W;
+				buf->scratch = 0;
+			}
+			else {
+				mode = STARPU_RW;
+				buf->scratch = 0;
+			}
+			task->buffers[cmd->num_buffers].mode = mode; 
+
+			cmd->num_buffers += 1;
+		}
 	}
-	else {
-		
-		task = task_create(is_task ? CL_COMMAND_TASK : CL_COMMAND_NDRANGE_KERNEL);
-	}
-	ev = task_event(task);
-
-   /*******************
-    * Initializations *
-    *******************/
-
-   /* ------- *
-    * Codelet *
-    * ------- */
-   codelet->where = STARPU_OPENCL;
-   codelet->power_model = NULL;
-   codelet->opencl_func = &soclEnqueueNDRangeKernel_task;
-   //codelet->model = NULL;
-   codelet->model = &perf_model;
-
-   /* ---------------- *
-    * Codelet argument *
-    * ---------------- */
-   gc_entity_store(&arg->kernel, kernel);
-   arg->work_dim = work_dim;
-   arg->codelet = codelet;
-
-   arg->global_work_offset = memdup_safe(global_work_offset, sizeof(size_t)*work_dim);
-   arg->global_work_size = memdup_safe(global_work_size, sizeof(size_t)*work_dim);
-   arg->local_work_size = memdup_safe(local_work_size, sizeof(size_t)*work_dim);
-
-   /* ----------- *
-    * StarPU task *
-    * ----------- */
-   task->cl = codelet;
-   task->cl_arg = arg;
-   task->cl_arg_size = sizeof(struct running_kernel);
-
-   /* Convert OpenCL's memory objects to StarPU buffers */
-   codelet->nbuffers = 0;
-   {
-      arg->buffers = malloc(sizeof(cl_mem) * kernel->arg_count);
-      arg->buffer_count = 0;
-
-      unsigned int i;
-      for (i=0; i<kernel->arg_count; i++) {
-         if (kernel->arg_type[i] == Buffer) {
-
-            cl_mem buf = (cl_mem)kernel->arg_value[i];
-
-            /* We save cl_mem references in order to properly release them after kernel termination */
-            gc_entity_store(&arg->buffers[arg->buffer_count], buf);
-            arg->buffer_count += 1;
-
-            codelet->nbuffers++;
-            task->buffers[codelet->nbuffers-1].handle = buf->handle;
-
-            /* Determine best StarPU buffer access mode */
-            int mode;
-            if (buf->mode == CL_MEM_READ_ONLY)
-               mode = STARPU_R;
-            else if (buf->mode == CL_MEM_WRITE_ONLY) {
-               mode = STARPU_W;
-               buf->scratch = 0;
-            }
-            else if (buf->scratch) { //RW but never accessed in RW or W mode
-               mode = STARPU_W;
-               buf->scratch = 0;
-            }
-            else {
-               mode = STARPU_RW;
-               buf->scratch = 0;
-            }
-            task->buffers[codelet->nbuffers-1].mode = mode; 
-         }
-      }
-   }
-
-   /* Copy arguments as kernel args can be modified by the time we launch the kernel */
-   arg->arg_count = kernel->arg_count;
-   arg->arg_size = memdup(kernel->arg_size, sizeof(size_t) * kernel->arg_count);
-   arg->arg_type = memdup(kernel->arg_type, sizeof(enum kernel_arg_type) * kernel->arg_count);
-   arg->arg_value = memdup_deep_varsize_safe(kernel->arg_value, kernel->arg_count, kernel->arg_size);
+	codelet->nbuffers = cmd->num_buffers;
 
-   DEBUG_MSG("Submitting NDRange task (event %d)\n", ev->id);
+	task_submit(task, cmd);
 
-   command_queue_enqueue(cq, task_event(task), 0, num_events, events, &ndeps, &deps);
+	/* Enqueue a cleaning task */
+	//FIXME: execute this in the callback?
+	starpu_task cleaning_task = task_create_cpu(cleaning_task_callback, cmd,1);
+	cl_event ev = command_event_get(cmd);
+	task_depends_on(cleaning_task, 1, &ev);
+	task_submit(cleaning_task, cmd);
 
-   task_submit(task, ndeps, deps);
-
-   /* Enqueue a cleaning task */
-   starpu_task * cleaning_task = task_create_cpu(0, cleaning_task_callback, arg,1);
-   task_submit(cleaning_task, 1, &ev);
-  
-   return CL_SUCCESS;
+	return CL_SUCCESS;
 }
 
-/**
- * Virtual kernel enqueueing command
- */
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueNDRangeKernel(cl_command_queue cq,
-                       cl_kernel        kernel,
-                       cl_uint          work_dim,
-                       const size_t *   global_work_offset,
-                       const size_t *   global_work_size,
-                       const size_t *   local_work_size,
-                       cl_uint          num_events,
-                       const cl_event * events,
-                       cl_event *       event) CL_API_SUFFIX__VERSION_1_1
+		cl_kernel        kernel,
+		cl_uint          work_dim,
+		const size_t *   global_work_offset,
+		const size_t *   global_work_size,
+		const size_t *   local_work_size,
+		cl_uint          num_events,
+		const cl_event * events,
+		cl_event *       event) CL_API_SUFFIX__VERSION_1_1
 {
-	node_enqueue_kernel n;
-
-	n = graph_create_enqueue_kernel(0, cq, kernel, work_dim, global_work_offset, global_work_size,
-		local_work_size, num_events, events, kernel->arg_count, kernel->arg_size,
-		kernel->arg_type, kernel->arg_value);
-	
-	//FIXME: temporarily, we execute the node directly. In the future, we will postpone this.
-	graph_play_enqueue_kernel(n);
-	graph_free(n);
+	command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, work_dim,
+			global_work_offset, global_work_size, local_work_size);
 
-	//graph_store(n);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-	RETURN_OR_RELEASE_EVENT(n->node.event, event);
+	RETURN_EVENT(cmd, event);
 
 	return CL_SUCCESS;
 }

+ 36 - 29
socl/src/cl_enqueuereadbuffer.c

@@ -66,6 +66,37 @@ static starpu_codelet codelet_readbuffer = {
    .nbuffers = 1
 };
 
+cl_int command_read_buffer_submit(command_read_buffer cmd) {
+	/* Aliases */
+	cl_mem buffer = cmd->buffer;
+	size_t offset = cmd->offset;
+	size_t cb = cmd->cb;
+	void * ptr = cmd->ptr;
+
+	struct starpu_task *task;
+	struct arg_readbuffer *arg;
+
+	task = task_create(CL_COMMAND_READ_BUFFER);
+
+	task->buffers[0].handle = buffer->handle;
+	task->buffers[0].mode = STARPU_R;
+	task->cl = &codelet_readbuffer;
+
+	arg = (struct arg_readbuffer*)malloc(sizeof(struct arg_readbuffer));
+	arg->offset = offset;
+	arg->cb = cb;
+	arg->ptr = ptr;
+	task->cl_arg = arg;
+	task->cl_arg_size = sizeof(struct arg_readbuffer);
+
+	gc_entity_store(&arg->buffer, buffer);
+
+	task_submit(task, cmd);
+
+	return CL_SUCCESS;
+}
+
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueReadBuffer(cl_command_queue  cq,
                     cl_mem              buffer,
@@ -77,38 +108,14 @@ soclEnqueueReadBuffer(cl_command_queue  cq,
                     const cl_event *    events,
                     cl_event *          event) CL_API_SUFFIX__VERSION_1_0
 { 
-   struct starpu_task *task;
-   struct arg_readbuffer *arg;
-   cl_event ev;
-
-   cl_int ndeps;
-   cl_event *deps;
-
-   task = task_create(CL_COMMAND_READ_BUFFER);
-   ev = task_event(task);
-
-   task->buffers[0].handle = buffer->handle;
-   task->buffers[0].mode = STARPU_R;
-   task->cl = &codelet_readbuffer;
-
-   arg = (struct arg_readbuffer*)malloc(sizeof(struct arg_readbuffer));
-   arg->offset = offset;
-   arg->cb = cb;
-   arg->ptr = ptr;
-   task->cl_arg = arg;
-   task->cl_arg_size = sizeof(struct arg_readbuffer);
-
-   gc_entity_store(&arg->buffer, buffer);
-
-   task->synchronous = (blocking == CL_TRUE);
 
-   DEBUG_MSG("Submitting EnqueueRWBuffer task (event %d)\n", ev->id);
+	command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr);
 
-   command_queue_enqueue(cq, task_event(task), 0, num_events, events, &ndeps, &deps);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-   task_submit(task, ndeps, deps);
+	RETURN_EVENT(cmd, event);
 
-   RETURN_OR_RELEASE_EVENT(ev, event);
+	MAY_BLOCK(blocking);
 
-   return CL_SUCCESS;
+	return CL_SUCCESS;
 }

+ 3 - 19
socl/src/cl_enqueuetask.c

@@ -16,14 +16,6 @@
 
 #include "socl.h"
 
-static cl_uint work_dim = 3;
-static const size_t global_work_offset[3] = {0,0,0};
-static const size_t global_work_size[3] = {1,1,1};
-static const size_t * local_work_size = NULL;
-
-CL_API_ENTRY cl_int CL_API_CALL
-soclEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) CL_API_SUFFIX__VERSION_1_0;
-
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueTask(cl_command_queue cq,
               cl_kernel         kernel,
@@ -31,19 +23,11 @@ soclEnqueueTask(cl_command_queue cq,
               const cl_event *  events,
               cl_event *        event) CL_API_SUFFIX__VERSION_1_0
 {
-	node_enqueue_kernel n;
-
-	n = graph_create_enqueue_kernel(1, cq, kernel, work_dim, global_work_offset, global_work_size,
-		local_work_size, num_events, events, kernel->arg_count, kernel->arg_size,
-		kernel->arg_type, kernel->arg_value);
+	command_ndrange_kernel cmd = command_task_create(kernel);
 	
-	//FIXME: temporarily, we execute the node directly. In the future, we will postpone this.
-	graph_play_enqueue_kernel(n);
-	graph_free(n);
-
-	//graph_store(n);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-	RETURN_OR_RELEASE_EVENT(n->node.event, event);
+	RETURN_EVENT(cmd, event);
 
 	return CL_SUCCESS;
 }

+ 18 - 19
socl/src/cl_enqueueunmapmemobject.c

@@ -16,32 +16,31 @@
 
 #include "socl.h"
 
+cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd) {
+	/* Aliases */
+	cl_mem buffer = cmd->buffer;
+
+	//FIXME: use a callback
+	starpu_task task = task_create_cpu((void(*)(void*))starpu_data_release, buffer->handle, 0);
+
+	task_submit(task, cmd);
+
+	return CL_SUCCESS;
+}
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueUnmapMemObject(cl_command_queue cq,
-                        cl_mem            memobj,
-                        void *            UNUSED(mapped_ptr),
+                        cl_mem            buffer,
+                        void *            ptr,
                         cl_uint           num_events,
                         const cl_event *  events,
                         cl_event *        event) CL_API_SUFFIX__VERSION_1_0
 {
-   struct starpu_task *task;
-   cl_int err;
-   cl_event ev;
-   cl_int ndeps;
-   cl_event *deps;
-
-   /* Create StarPU task */
-   task = task_create_cpu(CL_COMMAND_UNMAP_MEM_OBJECT, (void(*)(void*))starpu_data_release, memobj->handle, 0);
-   ev = task_event(task);
-
-   DEBUG_MSG("Submitting UnmapBuffer task (event %d)\n", task->tag_id);
-
-   command_queue_enqueue(cq, task_event(task), 0, num_events, events, &ndeps, &deps);
-
-   task_submit(task, ndeps, deps);
+	command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr);
 
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-   RETURN_OR_RELEASE_EVENT(ev, event);
+	RETURN_EVENT(cmd, event);
 
-   return CL_SUCCESS;
+	return CL_SUCCESS;
 }

+ 2 - 9
socl/src/cl_enqueuewaitforevents.c

@@ -22,16 +22,9 @@ soclEnqueueWaitForEvents(cl_command_queue cq,
                        const cl_event * events) CL_API_SUFFIX__VERSION_1_0
 {
 
-	cl_int ndeps;
-	cl_event *deps;
+	command_marker cmd = command_marker_create();
 
-	//CL_COMMAND_MARKER has been chosen as CL_COMMAND_WAIT_FOR_EVENTS doesn't exist
-	starpu_task * task = task_create(CL_COMMAND_MARKER);
-
-	DEBUG_MSG("Submitting WAIT_FOR_EVENTS task (event %d)\n", task->tag_id);
-	command_queue_enqueue(cq, task_event(task), 1, num_events, events, &ndeps, &deps);
-
-	task_submit(task, ndeps, deps);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
 	return CL_SUCCESS;
 }

+ 42 - 38
socl/src/cl_enqueuewritebuffer.c

@@ -68,6 +68,43 @@ static starpu_codelet codelet_writebuffer = {
    .nbuffers = 1
 };
 
+cl_int command_write_buffer_submit(command_write_buffer cmd) {
+	/* Aliases */
+	cl_mem buffer = cmd->buffer;
+	size_t offset = cmd->offset;
+	size_t cb = cmd->cb;
+	const void * ptr = cmd->ptr;
+
+	struct starpu_task *task;
+	struct arg_writebuffer *arg;
+
+	task = task_create(CL_COMMAND_WRITE_BUFFER);
+
+	task->buffers[0].handle = buffer->handle;
+	//If only a subpart of the buffer is written, RW access mode is required
+	if (cb != buffer->size)
+		task->buffers[0].mode = STARPU_RW;
+	else 
+		task->buffers[0].mode = STARPU_W;
+	task->cl = &codelet_writebuffer;
+
+	arg = (struct arg_writebuffer*)malloc(sizeof(struct arg_writebuffer));
+	arg->offset = offset;
+	arg->cb = cb;
+	arg->ptr = ptr;
+	task->cl_arg = arg;
+	task->cl_arg_size = sizeof(struct arg_writebuffer);
+
+	gc_entity_store(&arg->buffer, buffer);
+
+	//The buffer now contains meaningful data
+	arg->buffer->scratch = 0;
+
+	task_submit(task, cmd);
+
+	return CL_SUCCESS;
+}
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWriteBuffer(cl_command_queue cq, 
                      cl_mem             buffer, 
@@ -79,46 +116,13 @@ soclEnqueueWriteBuffer(cl_command_queue cq,
                      const cl_event *   events, 
                      cl_event *         event) CL_API_SUFFIX__VERSION_1_0
 { 
-   struct starpu_task *task;
-   struct arg_writebuffer *arg;
-   cl_event ev;
-
-   cl_int ndeps;
-   cl_event *deps;
-
-   task = task_create(CL_COMMAND_WRITE_BUFFER);
-   ev = task_event(task);
-
-   task->buffers[0].handle = buffer->handle;
-   //If only a subpart of the buffer is written, RW access mode is required
-   if (cb != buffer->size)
-      task->buffers[0].mode = STARPU_RW;
-   else 
-      task->buffers[0].mode = STARPU_W;
-   task->cl = &codelet_writebuffer;
-
-   arg = (struct arg_writebuffer*)malloc(sizeof(struct arg_writebuffer));
-   arg->offset = offset;
-   arg->cb = cb;
-   arg->ptr = ptr;
-   task->cl_arg = arg;
-   task->cl_arg_size = sizeof(struct arg_writebuffer);
-
-   gc_entity_store(&arg->buffer, buffer);
-
-   //The buffer now contains meaningful data
-   arg->buffer->scratch = 0;
-
-   task->synchronous = (blocking == CL_TRUE);
-
-   DEBUG_MSG("Submitting EnqueueRWBuffer task (event %d)\n", ev->id);
+	command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr);
 
-   command_queue_enqueue(cq, task_event(task), 0, num_events, events, &ndeps, &deps);
+	command_queue_enqueue(cq, cmd, num_events, events);
 
-   task_submit(task, ndeps, deps);
+	RETURN_EVENT(cmd, event);
 
-   /* Return retained event if required by user */
-   RETURN_OR_RELEASE_EVENT(ev,event);
+	MAY_BLOCK(blocking);
 
-   return CL_SUCCESS;
+	return CL_SUCCESS;
 }

+ 9 - 6
socl/src/cl_finish.c

@@ -17,11 +17,14 @@
 #include "socl.h"
 
 CL_API_ENTRY cl_int CL_API_CALL
-soclFinish(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
-{
-   cl_event ev = command_queue_barrier(cq);
-   soclWaitForEvents(1, &ev);
-   gc_entity_release(ev);
+soclFinish(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0 {
 
-   return CL_SUCCESS;
+	command_marker cmd = command_barrier_create();
+
+	command_queue_enqueue(cq, cmd, 0, NULL);
+		cl_event ev = command_event_get(cmd);
+
+	MAY_BLOCK(CL_TRUE)
+
+	return CL_SUCCESS;
 }

+ 2 - 1
socl/src/cl_geteventinfo.c

@@ -17,6 +17,7 @@
 #include "socl.h"
 #include "getinfo.h"
 
+
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetEventInfo(cl_event       event,
                cl_event_info    param_name,
@@ -33,7 +34,7 @@ soclGetEventInfo(cl_event       event,
 
    switch (param_name) {
       INFO_CASE(CL_EVENT_COMMAND_QUEUE, event->cq);
-      INFO_CASE(CL_EVENT_COMMAND_TYPE, event->type);
+      INFO_CASE(CL_EVENT_COMMAND_TYPE, event->command->typ);
       INFO_CASE(CL_EVENT_COMMAND_EXECUTION_STATUS, event->status);
       INFO_CASE(CL_EVENT_REFERENCE_COUNT, event->_entity.refs);
       default:

+ 1 - 1
socl/src/cl_getkernelinfo.c

@@ -29,7 +29,7 @@ soclGetKernelInfo(cl_kernel       kernel,
 
    switch (param_name) {
       INFO_CASE_EX(CL_KERNEL_FUNCTION_NAME, kernel->kernel_name, strlen(kernel->kernel_name)+1)
-      INFO_CASE(CL_KERNEL_NUM_ARGS, kernel->arg_count)
+      INFO_CASE(CL_KERNEL_NUM_ARGS, kernel->num_args)
       INFO_CASE(CL_KERNEL_REFERENCE_COUNT, kernel->_entity.refs)
       INFO_CASE(CL_KERNEL_PROGRAM, kernel->program)
       INFO_CASE(CL_KERNEL_CONTEXT, kernel->program->context)

+ 5 - 3
socl/src/cl_setkernelarg.c

@@ -25,7 +25,7 @@ soclSetKernelArg(cl_kernel  kernel,
    if (kernel == NULL)
       return CL_INVALID_KERNEL;
 
-   if (arg_index >= kernel->arg_count)
+   if (arg_index >= kernel->num_args)
       return CL_INVALID_ARG_INDEX;
 
    //FIXME: we don't return CL_INVALID_ARG_VALUE if "arg_value is NULL for an argument that is not declared with __local qualifier or vice-versa"
@@ -38,7 +38,8 @@ soclSetKernelArg(cl_kernel  kernel,
          break;
       case Buffer:
          kernel->arg_type[arg_index] = Null;
-         gc_entity_unstore((cl_mem*)&kernel->arg_value[arg_index]);
+         gc_entity_unstore((cl_mem*)kernel->arg_value[arg_index]);
+	 free(kernel->arg_value[arg_index]);
          kernel->arg_value[arg_index] = NULL;
          break;
       case Immediate:
@@ -60,7 +61,8 @@ soclSetKernelArg(cl_kernel  kernel,
       if ((arg_size == sizeof(cl_mem)) && ((buf = mem_object_fetch(arg_value)) != NULL)) {
          DEBUG_MSG("Found buffer %d \n", buf->id);
          kernel->arg_type[arg_index] = Buffer;
-         gc_entity_store(&kernel->arg_value[arg_index], buf);
+         kernel->arg_value[arg_index] = malloc(sizeof(void*));
+	 gc_entity_store((cl_mem*)kernel->arg_value[arg_index], buf);
       }
       else {
          /* Argument must be an immediate buffer  */

+ 6 - 1
socl/src/cl_waitforevents.c

@@ -23,7 +23,12 @@ soclWaitForEvents(cl_uint           num_events,
    unsigned int i;
    DEBUG_MSG("Waiting for events: ");
    for (i=0; i<num_events; i++) {
-      DEBUG_MSG_NOHEAD("%d ", event_list[i]->id);
+   	command_graph_dump(event_list[i]->command);
+
+   	/* We need to submit commands if it's not already done */
+	command_submit_deep(event_list[i]->command);
+
+      	DEBUG_MSG_NOHEAD("%d ", event_list[i]->id);
    }
    DEBUG_MSG_NOHEAD("\n");
 

+ 235 - 0
socl/src/command.c

@@ -0,0 +1,235 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "socl.h"
+
+void command_init_ex(cl_command cmd, cl_command_type typ) {
+	cmd->typ = typ;
+	cmd->num_events = 0;
+	cmd->events = NULL;
+	cmd->event = event_create();
+	cmd->event->command = cmd;
+	cmd->cq = NULL;
+	cmd->task = NULL;
+	cmd->submitted = 0;
+}
+
+
+void command_submit_ex(cl_command cmd) {
+#define SUBMIT(typ,name) case typ:\
+	name##_submit((name)cmd);\
+	break;
+
+	assert(cmd->submitted == 0);
+
+	switch(cmd->typ) {
+		SUBMIT(CL_COMMAND_NDRANGE_KERNEL, command_ndrange_kernel)
+		SUBMIT(CL_COMMAND_TASK, command_ndrange_kernel)
+		SUBMIT(CL_COMMAND_READ_BUFFER, command_read_buffer)
+		SUBMIT(CL_COMMAND_WRITE_BUFFER, command_write_buffer)
+		SUBMIT(CL_COMMAND_COPY_BUFFER, command_copy_buffer)
+		SUBMIT(CL_COMMAND_MAP_BUFFER, command_map_buffer)
+		SUBMIT(CL_COMMAND_UNMAP_MEM_OBJECT, command_unmap_mem_object)
+		SUBMIT(CL_COMMAND_MARKER, command_marker)
+		default:
+			ERROR_STOP("Trying to submit unknown command (type %x)", cmd->typ);
+	}
+
+	cmd->submitted = 1;
+#undef SUBMIT
+}
+
+cl_int command_submit_deep_ex(cl_command cmd) {
+	if (cmd->submitted == 1)
+		return CL_SUCCESS;
+	
+	/* We set this in order to avoid cyclic dependencies */
+	cmd->submitted = 1;
+
+	unsigned int i;
+	for (i=0; i<cmd->num_events; i++)
+		command_submit_deep(cmd->events[i]->command);
+	
+	cmd->submitted = 0;
+
+	command_submit_ex(cmd);
+
+	return CL_SUCCESS;
+}
+
+void command_graph_dump_ex(cl_command cmd) {
+
+	unsigned int i;
+	for (i=0; i<cmd->num_events; i++)
+		command_graph_dump_ex(cmd->events[i]->command);
+
+	printf("CMD %lx TYPE %d DEPS", cmd, cmd->typ);
+	for (i=0; i<cmd->num_events; i++)
+		printf(" %lx", cmd->events[i]->command);
+	printf("\n");
+
+}
+
+#define nullOrDup(name,size) cmd->name = memdup_safe(name,size)
+#define dup(name) cmd->name = name
+#define dupEntity(name) do { cmd->name = name; gc_entity_retain(name); } while (0);
+
+void soclEnqueueNDRangeKernel_task(void *descr[], void *args);
+
+command_ndrange_kernel command_ndrange_kernel_create (
+		cl_kernel        kernel,
+		cl_uint          work_dim,
+		const size_t *   global_work_offset,
+		const size_t *   global_work_size,
+		const size_t *   local_work_size)
+{
+	command_ndrange_kernel cmd = malloc(sizeof(struct command_ndrange_kernel_t));
+	command_init(cmd, CL_COMMAND_NDRANGE_KERNEL);
+
+	dupEntity(kernel);
+	dup(work_dim);
+	nullOrDup(global_work_offset, work_dim*sizeof(size_t));
+	nullOrDup(global_work_size, work_dim*sizeof(size_t));
+	nullOrDup(local_work_size, work_dim*sizeof(size_t));
+
+   	/* Codelet */
+   	cmd->codelet = (starpu_codelet*)malloc(sizeof(starpu_codelet));
+	starpu_codelet * codelet = cmd->codelet;
+	codelet->where = STARPU_OPENCL;
+	codelet->power_model = NULL;
+	codelet->opencl_func = &soclEnqueueNDRangeKernel_task;
+	codelet->model = NULL;
+
+   	/* Kernel is mutable, so we duplicate its parameters... */
+	cmd->num_args = kernel->num_args;
+	cmd->arg_sizes = memdup(kernel->arg_size, sizeof(size_t) * kernel->num_args);
+	cmd->arg_types = memdup(kernel->arg_type, sizeof(enum kernel_arg_type) * kernel->num_args);
+	cmd->args = memdup_deep_varsize_safe(kernel->arg_value, kernel->num_args, kernel->arg_size);
+
+	return cmd;
+}
+
+command_ndrange_kernel command_task_create (cl_kernel kernel) {
+
+	static cl_uint task_work_dim = 3;
+	static const size_t task_global_work_offset[3] = {0,0,0};
+	static const size_t task_global_work_size[3] = {1,1,1};
+	static const size_t * task_local_work_size = NULL;
+
+	command_ndrange_kernel cmd = command_ndrange_kernel_create(
+			kernel, task_work_dim, task_global_work_offset,
+			task_global_work_size, task_local_work_size);
+
+	/* This is the only difference with command_ndrange_kernel_create */
+	cmd->_command.typ = CL_COMMAND_TASK;
+
+	return cmd;
+}
+
+command_marker command_barrier_create () {
+
+	command_marker cmd = malloc(sizeof(struct command_marker_t));
+	command_init(cmd, CL_COMMAND_BARRIER);
+
+	return cmd;
+}
+
+command_marker command_marker_create () {
+
+	command_marker cmd = malloc(sizeof(struct command_marker_t));
+	command_init(cmd, CL_COMMAND_MARKER);
+
+	return cmd;
+}
+
+command_map_buffer command_map_buffer_create(
+		cl_mem buffer,
+		cl_map_flags map_flags,
+		size_t offset,
+		size_t cb,
+		cl_event event
+		) {
+
+	command_map_buffer cmd = malloc(sizeof(struct command_map_buffer_t));
+	command_init(cmd, CL_COMMAND_MAP_BUFFER);
+
+	dupEntity(buffer);
+	dup(map_flags);
+	dup(offset);
+	dup(cb);
+	dupEntity(event);
+
+	return cmd;
+}
+
+command_unmap_mem_object command_unmap_mem_object_create(cl_mem buffer, void * ptr) {
+	command_unmap_mem_object cmd = malloc(sizeof(struct command_unmap_mem_object_t));
+	command_init(cmd, CL_COMMAND_UNMAP_MEM_OBJECT);
+
+	dupEntity(buffer);
+	dup(ptr);
+
+	return cmd;
+}
+
+command_read_buffer command_read_buffer_create(cl_mem buffer, size_t offset, size_t cb, void * ptr) {
+
+	command_read_buffer cmd = malloc(sizeof(struct command_read_buffer_t));
+	command_init(cmd, CL_COMMAND_READ_BUFFER);
+
+	dupEntity(buffer);
+	dup(offset);
+	dup(cb);
+	dup(ptr);
+
+	return cmd;
+}
+
+command_write_buffer command_write_buffer_create(cl_mem buffer, size_t offset, size_t cb, const void * ptr) {
+
+	command_write_buffer cmd = malloc(sizeof(struct command_write_buffer_t));
+	command_init(cmd, CL_COMMAND_WRITE_BUFFER);
+
+	dupEntity(buffer);
+	dup(offset);
+	dup(cb);
+	dup(ptr);
+
+	return cmd;
+}
+
+command_copy_buffer command_copy_buffer_create( cl_mem src_buffer, cl_mem dst_buffer,
+		size_t src_offset, size_t dst_offset, size_t cb)
+{
+	command_copy_buffer cmd = malloc(sizeof(struct command_copy_buffer_t));
+	command_init(cmd, CL_COMMAND_COPY_BUFFER);
+
+	dupEntity(src_buffer);
+	dupEntity(dst_buffer);
+	dup(src_offset);
+	dup(dst_offset);
+	dup(cb);
+
+	return cmd;
+}
+
+#undef nullOrDup
+#undef nodeNullOrDup
+#undef dup
+#undef dupEntity
+#undef nodeDup
+#undef memdup
+

+ 198 - 0
socl/src/command.h

@@ -0,0 +1,198 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef SOCL_COMMANDS_H
+#define SOCL_COMMANDS_H
+
+typedef struct cl_command_t * cl_command;
+
+/**
+ * Initialize a command structure
+ *
+ * Command constructors for each kind of command use this method
+ * Implicit and explicit dependencies must be passed as parameters
+ */
+void command_init_ex(cl_command cmd, cl_command_type typ);
+#define command_init(cmd,typ) \
+	command_init_ex((cl_command)cmd,typ)
+
+/** Submit a command for execution */
+void command_submit_ex(cl_command cmd);
+#define command_submit(cmd) \
+	command_submit_ex(&(cmd)->_command)
+
+/** Submit a command and its dependencies */
+cl_int command_submit_deep_ex(cl_command cmd);
+#define command_submit_deep(cmd) (command_submit_deep_ex((cl_command)cmd))
+
+void command_graph_dump_ex(cl_command cmd);
+#define command_graph_dump(cmd) (command_graph_dump_ex((cl_command)cmd))
+
+/**************************
+ * OpenCL Commands
+ **************************/
+struct cl_command_t {
+	cl_command_type	typ;	 	/* Command type */
+	cl_uint 	num_events;	/* Number of dependencies */
+	cl_event * 	events;		/* Dependencies */
+	cl_event  	event;		/* Event for this command */
+	cl_command_queue cq;		/* Command queue the command is enqueued in */
+	starpu_task	task;		/* Associated StarPU task, if any */
+	char		submitted;	/* True if the command has been submitted to StarPU */
+};
+
+#define command_type_get(cmd) (((cl_command)cmd)->typ)
+#define command_event_get(cmd) (((cl_command)cmd)->event)
+#define command_num_events_get(cmd) (((cl_command)cmd)->num_events)
+#define command_events_get(cmd) (((cl_command)cmd)->events)
+#define command_task_get(cmd) (((cl_command)cmd)->task)
+#define command_cq_get(cmd) (((cl_command)cmd)->cq)
+
+#define CL_COMMAND struct cl_command_t _command;
+
+typedef struct command_ndrange_kernel_t {
+	CL_COMMAND
+
+	cl_kernel        kernel;
+	cl_uint          work_dim;
+	const size_t *   global_work_offset;
+	const size_t *   global_work_size;
+	const size_t *   local_work_size;
+	cl_uint 	 num_args;
+	size_t *	 arg_sizes;
+	enum kernel_arg_type * arg_types;
+	void **		 args;
+	starpu_codelet * codelet;
+	cl_uint		 num_buffers;
+	cl_mem *	 buffers;
+} * command_ndrange_kernel;
+
+
+typedef struct command_read_buffer_t {
+	CL_COMMAND
+	
+	cl_mem buffer;
+	size_t offset;
+	size_t cb;
+	void * ptr;
+} * command_read_buffer;
+
+
+typedef struct command_write_buffer_t {
+	CL_COMMAND
+
+	cl_mem buffer;
+	size_t offset;
+	size_t cb;
+	const void * ptr;
+} * command_write_buffer;
+
+
+typedef struct command_copy_buffer_t {
+	CL_COMMAND
+	
+	cl_mem src_buffer;
+	cl_mem dst_buffer;
+	size_t src_offset;
+	size_t dst_offset;
+	size_t cb;
+} * command_copy_buffer;
+
+
+typedef struct command_map_buffer_t {
+	CL_COMMAND
+
+	cl_mem buffer;
+	cl_map_flags map_flags;
+	size_t offset;
+	size_t cb;
+	cl_event event;
+} * command_map_buffer;
+
+
+typedef struct command_unmap_mem_object_t {
+	CL_COMMAND
+
+	cl_mem buffer;
+	void * ptr;
+} * command_unmap_mem_object;
+
+
+typedef struct command_marker_t {
+	CL_COMMAND
+} * command_marker;
+
+/*************************
+ * Constructor functions
+ *************************/
+
+command_ndrange_kernel command_ndrange_kernel_create (
+		cl_kernel        kernel,
+		cl_uint          work_dim,
+		const size_t *   global_work_offset,
+		const size_t *   global_work_size,
+		const size_t *   local_work_size);
+
+command_ndrange_kernel command_task_create (cl_kernel kernel);
+
+command_marker command_barrier_create ();
+
+command_marker command_marker_create ();
+
+command_map_buffer command_map_buffer_create(
+		cl_mem buffer,
+		cl_map_flags map_flags,
+		size_t offset,
+		size_t cb,
+		cl_event event);
+
+command_unmap_mem_object command_unmap_mem_object_create(
+		cl_mem buffer,
+		void * ptr);
+
+command_read_buffer command_read_buffer_create(
+		cl_mem buffer,
+		size_t offset,
+		size_t cb,
+		void * ptr);
+
+command_write_buffer command_write_buffer_create(
+		cl_mem buffer,
+		size_t offset,
+		size_t cb,
+		const void * ptr);
+
+command_copy_buffer command_copy_buffer_create(
+		cl_mem src_buffer,
+		cl_mem dst_buffer,
+		size_t src_offset,
+		size_t dst_offset,
+		size_t cb);
+
+/*************************
+ * Submit functions
+ *************************/
+cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd);
+cl_int command_read_buffer_submit(command_read_buffer cmd);
+cl_int command_write_buffer_submit(command_write_buffer cmd);
+cl_int command_copy_buffer_submit(command_copy_buffer cmd);
+cl_int command_map_buffer_submit(command_map_buffer cmd);
+cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd);
+cl_int command_marker_submit(command_marker cmd);
+
+
+#endif /* SOCL_COMMANDS_H */
+

+ 40 - 0
socl/src/command_list.c

@@ -0,0 +1,40 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "socl.h"
+
+command_list command_list_cons(cl_command cmd, command_list ls) {
+	command_list e = malloc(sizeof(struct command_list_t));
+	e->cmd = cmd;
+	e->next = ls;
+	if (ls != NULL)
+		ls->prev = e;
+	return e;
+}
+
+command_list command_list_remove(command_list l, cl_command cmd) {
+	command_list e = l;
+	while (e != NULL) {
+		if (e->cmd == cmd) {
+			if (e->prev != NULL) e->prev->next = e->next;
+			if (e->next != NULL) e->next->prev = e->prev;
+			command_list next = e->next;
+			free(e);
+			if (e == l) return next;
+		}
+	}
+	return l;
+}

+ 28 - 0
socl/src/command_list.h

@@ -0,0 +1,28 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "socl.h"
+
+typedef struct command_list_t * command_list;
+
+struct command_list_t {
+	cl_command cmd;
+	command_list next;
+	command_list prev;
+};
+
+command_list command_list_cons(cl_command cmd, command_list ls);
+command_list command_list_remove(command_list l, cl_command cmd);

+ 49 - 65
socl/src/command_queue.c

@@ -47,10 +47,10 @@ void command_queue_dependencies_implicit(
 
 	/* Add dependencies to out-of-order events (if any) */
 	if (is_barrier) {
-		cl_event ev = cq->events;
-		while (ev != NULL) {
+		command_list cl = cq->commands;
+		while (cl != NULL) {
 			ndeps++;
-			ev = ev->next;
+			cl = cl->next;
 		}
 	}
 
@@ -63,14 +63,14 @@ void command_queue_dependencies_implicit(
 
 	/* Add dependency to last barrier if applicable */
 	if (cq->barrier != NULL)
-		evs[n++] = cq->barrier;
+		evs[n++] = cq->barrier->event;
 
 	/* Add dependencies to out-of-order events (if any) */
 	if (is_barrier) {
-		cl_event ev = cq->events;
-		while (ev != NULL) {
-			evs[n++] = ev;
-			ev = ev->next;
+		command_list cl = cq->commands;
+		while (cl != NULL) {
+			evs[n++] = cl->cmd->event;
+			cl = cl->next;
 		}
 	}
 
@@ -79,39 +79,30 @@ void command_queue_dependencies_implicit(
 }
 	
 /**
- * Insert a task in the command queue
+ * Insert a command in the command queue
  * The command queue must be locked!
  */
 void command_queue_insert(
-	cl_command_queue cq, 	/* Command queue */
-	cl_event task_event,	/* Event for the task */
-	char is_barrier		/* Is the task a barrier */
+	cl_command_queue 	cq, 	/* Command queue */
+	cl_command 		cmd,	/* Command */
+	int 			is_barrier		/* Is the task a barrier */
 ) {
 
 	int in_order = !(cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
 
-	/*********************
-	 * Insert event
-	 *********************/
-
 	if (is_barrier)
-		cq->events = NULL;
-
-	/* Add event to the list of out-of-order events */
-	if (!in_order) {
-		task_event->next = cq->events;
-		task_event->prev = NULL;
-		if (cq->events != NULL)
-			cq->events->prev = task_event;
-		cq->events = task_event;
-	}
+		cq->commands = NULL;
+
+	/* Add command to the list of out-of-order commands */
+	if (!in_order)
+		cq->commands = command_list_cons(cmd, cq->commands);
 
 	/* Register this event as last barrier */
 	if (is_barrier || in_order)
-		cq->barrier = task_event;
+		cq->barrier = cmd;
 
 	/* Add reference to the command queue */
-	gc_entity_store(&task_event->cq, cq);
+	gc_entity_store(&cmd->event->cq, cq);
 }
 
 /**
@@ -119,12 +110,12 @@ void command_queue_insert(
  * The command queue must be locked!
  */
 void command_queue_dependencies(
-	cl_command_queue cq,	/* Command queue */
-	char is_barrier,	/* Is the task a barrier */
-	cl_int num_events,	/* Number of explicit dependencies */
-	const cl_event events,	/* Explicit dependencies */
-	cl_int * ret_num_events,	/* Returned number of dependencies */
-	cl_event ** ret_events	/* Returned dependencies */
+	cl_command_queue 	cq,		/* Command queue */
+	int 			is_barrier,	/* Is the task a barrier */
+	cl_int 			num_events,	/* Number of explicit dependencies */
+	const cl_event *	events,		/* Explicit dependencies */
+	cl_int * 		ret_num_events,	/* Returned number of dependencies */
+	cl_event ** 		ret_events	/* Returned dependencies */
 ) {
 	cl_int implicit_num_events;
 	cl_event * implicit_events;
@@ -142,43 +133,36 @@ void command_queue_dependencies(
 	*ret_events = evs;
 }
 
-/**
- * Enqueue the given task and put ev into the command queue.
- */
-void command_queue_enqueue(
-	cl_command_queue cq, 		/* Command queue */
-	cl_event ev,			/* Event triggered on task completion (can be NULL if task event should be used)*/
-	cl_int is_barrier,			/* True if the task acts as a barrier */
-	cl_int num_events,		/* Number of dependencies */
-	const cl_event * events,	/* Dependencies */
-	cl_int * ret_num_events,	/* Returned number of events */
-	cl_event ** ret_events		/* Returned events */
-	) {
-
-	/* Lock command queue */
-	pthread_spin_lock(&cq->spin);
-
-	command_queue_dependencies(cq, is_barrier, num_events, events, ret_num_events, ret_events);
-
-	command_queue_insert(cq, ev, is_barrier);
+void command_queue_enqueue_ex(cl_command_queue cq, cl_command cmd, cl_uint num_events, const cl_event * events) {
 
-	/* Unlock command queue */
-	pthread_spin_unlock(&cq->spin);
-}
+	/* Check if the command is a barrier */
+	int is_barrier = 0;
+	if (cmd->typ == CL_COMMAND_BARRIER) {
+		is_barrier = 1;
+		/* OpenCL has no CL_COMMAND_BARRIER type, so we fall back on CL_COMMAND_MARKER */
+		cmd->typ = CL_COMMAND_MARKER;
+	}
 
+	/* Set command queue field */
+	cmd->cq = cq;
 
-cl_event command_queue_barrier(cl_command_queue cq) {
+	/* Lock command queue */
+	pthread_mutex_lock(&cq->mutex);
 
-	cl_int ndeps;
-	cl_event *deps;
+	//FIXME: crappy separation (command_queue_dependencies + command_queue_insert)
 
-	//CL_COMMAND_MARKER has been chosen as CL_COMMAND_BARRIER doesn't exist
-	starpu_task * task = task_create(CL_COMMAND_MARKER);
+	/* Get all (explicit + implicit) dependencies */
+	cl_int all_num_events;
+	cl_event * all_events;
+	command_queue_dependencies(cq, is_barrier, num_events, events, &all_num_events, &all_events);
 
-	DEBUG_MSG("Submitting barrier task (event %d)\n", task->tag_id);
-	command_queue_enqueue(cq, task_event(task), 1, 0, NULL, &ndeps, &deps);
+	/* Make all dependencies explicit for the command */
+	cmd->num_events = all_num_events;
+	cmd->events = all_events;
 
-	task_submit(task, ndeps, deps);
+	/* Insert command in the queue */
+	command_queue_insert(cq, cmd, is_barrier);
 
-	return task_event(task);
+	/* Unlock command queue */
+	pthread_mutex_unlock(&cq->mutex);
 }

+ 7 - 9
socl/src/command_queue.h

@@ -17,16 +17,14 @@
 #ifndef SOCL_COMMAND_QUEUE_H
 #define SOCl_COMMAND_QUEUE_H
 
-void command_queue_enqueue(
-	cl_command_queue cq, 		/* Command queue */
-	cl_event ev,			/* Event triggered on task completion (can be NULL if task event should be used)*/
-	cl_int is_barrier,			/* True if the task acts as a barrier */
-	cl_int num_events,		/* Number of dependencies */
-	const cl_event * events,	/* Dependencies */
-	cl_int * ret_num_events,	/* Returned number of events */
-	cl_event ** ret_events		/* Returned events */
+void command_queue_enqueue_ex(
+	cl_command_queue 	cq,		/* Command queue */
+	cl_command		cmd,		/* Command to enqueue */
+	cl_uint			num_events,	/* Number of explicit dependencies */
+	const cl_event *	events		/* Explicit dependencies */
 	);
 
-cl_event command_queue_barrier(cl_command_queue cq);
+#define command_queue_enqueue(cq, cmd, num_events, events)\
+	command_queue_enqueue_ex(cq, (cl_command)cmd, num_events, events)
 
 #endif /* SOCl_COMMAND_QUEUE_H */

+ 76 - 0
socl/src/debug.c

@@ -0,0 +1,76 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "socl.h"
+
+#ifdef STARPU_VERBOSE
+void DEBUG_CL(char *s, cl_int err) {
+   #define ERR_CASE(a) case a: DEBUG_MSG("[OpenCL] %s CL error: %s\n", s, #a); break;
+   switch(err) {
+      case CL_SUCCESS:
+         DEBUG_MSG("[OpenCL] %s SUCCESS.\n", s);
+         break;
+      ERR_CASE(CL_DEVICE_NOT_FOUND)
+      ERR_CASE(CL_DEVICE_NOT_AVAILABLE)
+      ERR_CASE(CL_COMPILER_NOT_AVAILABLE)
+      ERR_CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE)
+      ERR_CASE(CL_OUT_OF_RESOURCES)
+      ERR_CASE(CL_OUT_OF_HOST_MEMORY)
+      ERR_CASE(CL_PROFILING_INFO_NOT_AVAILABLE)
+      ERR_CASE(CL_MEM_COPY_OVERLAP)
+      ERR_CASE(CL_IMAGE_FORMAT_MISMATCH)
+      ERR_CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED)
+      ERR_CASE(CL_BUILD_PROGRAM_FAILURE)
+      ERR_CASE(CL_MAP_FAILURE)
+      ERR_CASE(CL_INVALID_VALUE)
+      ERR_CASE(CL_INVALID_DEVICE_TYPE)
+      ERR_CASE(CL_INVALID_PLATFORM)
+      ERR_CASE(CL_INVALID_DEVICE)
+      ERR_CASE(CL_INVALID_CONTEXT)
+      ERR_CASE(CL_INVALID_QUEUE_PROPERTIES)
+      ERR_CASE(CL_INVALID_COMMAND_QUEUE)
+      ERR_CASE(CL_INVALID_HOST_PTR)
+      ERR_CASE(CL_INVALID_MEM_OBJECT)
+      ERR_CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
+      ERR_CASE(CL_INVALID_IMAGE_SIZE)
+      ERR_CASE(CL_INVALID_SAMPLER)
+      ERR_CASE(CL_INVALID_BINARY)
+      ERR_CASE(CL_INVALID_BUILD_OPTIONS)
+      ERR_CASE(CL_INVALID_PROGRAM)
+      ERR_CASE(CL_INVALID_PROGRAM_EXECUTABLE)
+      ERR_CASE(CL_INVALID_KERNEL_NAME)
+      ERR_CASE(CL_INVALID_KERNEL_DEFINITION)
+      ERR_CASE(CL_INVALID_KERNEL)
+      ERR_CASE(CL_INVALID_ARG_INDEX)
+      ERR_CASE(CL_INVALID_ARG_VALUE)
+      ERR_CASE(CL_INVALID_ARG_SIZE)
+      ERR_CASE(CL_INVALID_KERNEL_ARGS)
+      ERR_CASE(CL_INVALID_WORK_DIMENSION)
+      ERR_CASE(CL_INVALID_WORK_GROUP_SIZE)
+      ERR_CASE(CL_INVALID_WORK_ITEM_SIZE)
+      ERR_CASE(CL_INVALID_GLOBAL_OFFSET)
+      ERR_CASE(CL_INVALID_EVENT_WAIT_LIST)
+      ERR_CASE(CL_INVALID_EVENT)
+      ERR_CASE(CL_INVALID_OPERATION)
+      ERR_CASE(CL_INVALID_GL_OBJECT)
+      ERR_CASE(CL_INVALID_BUFFER_SIZE)
+      ERR_CASE(CL_INVALID_MIP_LEVEL)
+      ERR_CASE(CL_INVALID_GLOBAL_WORK_SIZE)
+      default:
+         DEBUG_MSG("%s CL error: Error message not supported by DEBUG_CL macro (%d).\n", s, err);
+   }
+}
+#endif

+ 4 - 57
socl/src/debug.h

@@ -17,10 +17,12 @@
 #ifndef SOCL_DEBUG_H
 #define SOCL_DEBUG_H
 
+#include <../src/common/config.h>
+
 #ifdef STARPU_VERBOSE
 #define DEBUG
 #include <stdio.h>
-   #define DEBUG_MSG(...) do { fprintf(stderr, "[SOCL] [%s] ", __func__); fprintf(stderr, __VA_ARGS__); } while (0);
+   #define DEBUG_MSG(...) do { fprintf(stderr, "[SOCL] [%s] ", __func__); fprintf(stderr, __VA_ARGS__);} while (0);
    #define DEBUG_MSG_NOHEAD(...) fprintf(stderr, __VA_ARGS__)
    #define DEBUG_ERROR(...) do { fprintf(stderr, "[SOCL] ERROR: "__VA_ARGS__); exit(1); } while (0);
 #else
@@ -35,62 +37,7 @@
 #define ERROR_STOP(...) do { ERROR_MSG(__VA_ARGS__); exit(1); } while(0);
 
 #ifdef STARPU_VERBOSE
-void DEBUG_CL(char *s, cl_int err) {
-   #define ERR_CASE(a) case a: DEBUG_MSG("[OpenCL] %s CL error: %s\n", s, #a); break;
-   switch(err) {
-      case CL_SUCCESS:
-         DEBUG_MSG("[OpenCL] %s SUCCESS.\n", s);
-         break;
-      ERR_CASE(CL_DEVICE_NOT_FOUND)
-      ERR_CASE(CL_DEVICE_NOT_AVAILABLE)
-      ERR_CASE(CL_COMPILER_NOT_AVAILABLE)
-      ERR_CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE)
-      ERR_CASE(CL_OUT_OF_RESOURCES)
-      ERR_CASE(CL_OUT_OF_HOST_MEMORY)
-      ERR_CASE(CL_PROFILING_INFO_NOT_AVAILABLE)
-      ERR_CASE(CL_MEM_COPY_OVERLAP)
-      ERR_CASE(CL_IMAGE_FORMAT_MISMATCH)
-      ERR_CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED)
-      ERR_CASE(CL_BUILD_PROGRAM_FAILURE)
-      ERR_CASE(CL_MAP_FAILURE)
-      ERR_CASE(CL_INVALID_VALUE)
-      ERR_CASE(CL_INVALID_DEVICE_TYPE)
-      ERR_CASE(CL_INVALID_PLATFORM)
-      ERR_CASE(CL_INVALID_DEVICE)
-      ERR_CASE(CL_INVALID_CONTEXT)
-      ERR_CASE(CL_INVALID_QUEUE_PROPERTIES)
-      ERR_CASE(CL_INVALID_COMMAND_QUEUE)
-      ERR_CASE(CL_INVALID_HOST_PTR)
-      ERR_CASE(CL_INVALID_MEM_OBJECT)
-      ERR_CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
-      ERR_CASE(CL_INVALID_IMAGE_SIZE)
-      ERR_CASE(CL_INVALID_SAMPLER)
-      ERR_CASE(CL_INVALID_BINARY)
-      ERR_CASE(CL_INVALID_BUILD_OPTIONS)
-      ERR_CASE(CL_INVALID_PROGRAM)
-      ERR_CASE(CL_INVALID_PROGRAM_EXECUTABLE)
-      ERR_CASE(CL_INVALID_KERNEL_NAME)
-      ERR_CASE(CL_INVALID_KERNEL_DEFINITION)
-      ERR_CASE(CL_INVALID_KERNEL)
-      ERR_CASE(CL_INVALID_ARG_INDEX)
-      ERR_CASE(CL_INVALID_ARG_VALUE)
-      ERR_CASE(CL_INVALID_ARG_SIZE)
-      ERR_CASE(CL_INVALID_KERNEL_ARGS)
-      ERR_CASE(CL_INVALID_WORK_DIMENSION)
-      ERR_CASE(CL_INVALID_WORK_GROUP_SIZE)
-      ERR_CASE(CL_INVALID_WORK_ITEM_SIZE)
-      ERR_CASE(CL_INVALID_GLOBAL_OFFSET)
-      ERR_CASE(CL_INVALID_EVENT_WAIT_LIST)
-      ERR_CASE(CL_INVALID_EVENT)
-      ERR_CASE(CL_INVALID_OPERATION)
-      ERR_CASE(CL_INVALID_GL_OBJECT)
-      ERR_CASE(CL_INVALID_BUFFER_SIZE)
-      ERR_CASE(CL_INVALID_MIP_LEVEL)
-      ERR_CASE(CL_INVALID_GLOBAL_WORK_SIZE)
-      default:
-         DEBUG_MSG("%s CL error: Error message not supported by print_cl_error (%d).\n", s, err);
-   }
-}
+void DEBUG_CL(char *s, cl_int err);
 #else
    #define DEBUG_CL(...) while(0);
 #endif

+ 13 - 15
socl/src/event.c

@@ -20,21 +20,24 @@
 
 static void release_callback_event(void * e);
 
+int event_unique_id() {
+   static int id = 1;
+
+   return __sync_fetch_and_add(&id,1);
+}
+
 /**
  * Create a new event
  *
  * Events have one-to-one relation with tag. Tag number is event ID
  */
 cl_event event_create(void) {
-   static int id = 1;
    cl_event ev;
    ev = gc_entity_alloc(sizeof(struct _cl_event), release_callback_event);
 
-   ev->next = NULL;
-   ev->prev = NULL;
-   ev->id = __sync_fetch_and_add(&id,1);
+   ev->id = event_unique_id();
    ev->status = CL_SUBMITTED;
-   ev->type = 0;
+   ev->command = NULL;
    ev->profiling_info = NULL;
    ev->cq = NULL;
 
@@ -49,22 +52,17 @@ static void release_callback_event(void * e) {
   /* Remove from command queue */
   if (cq != NULL) {
     /* Lock command queue */
-    pthread_spin_lock(&cq->spin);
+    pthread_mutex_lock(&cq->mutex);
 
     /* Remove barrier if applicable */
-    if (cq->barrier == event)
+    if (cq->barrier == event->command)
       cq->barrier = NULL;
 
-    /* Remove from the list of out-of-order events */
-    if (event->prev != NULL)
-      event->prev->next = event->next;
-    if (event->next != NULL)
-      event->next->prev = event->prev;
-    if (cq->events == event)
-      cq->events = event->next;
+    /* Remove from the list of out-of-order commands */
+    cq->commands = command_list_remove(cq->commands, event->command);
 
     /* Unlock command queue */
-    pthread_spin_unlock(&cq->spin);
+    pthread_mutex_unlock(&cq->mutex);
 
     gc_entity_unstore(&cq);
   }

+ 5 - 0
socl/src/event.h

@@ -26,4 +26,9 @@
  */
 cl_event event_create(void);
 
+/**
+ * Generate a unique tag id
+ */
+int event_unique_id();
+
 #endif /* SOCL_EVENT_H */

+ 6 - 2
socl/src/gc.c

@@ -104,13 +104,15 @@ void gc_stop(void) {
   pthread_join(gc_thread, NULL);
 }
 
-void gc_entity_release_ex(entity e) {
+int gc_entity_release_ex(entity e) {
 
   /* Decrement reference count */
   int refs = __sync_sub_and_fetch(&e->refs, 1);
 
   if (refs != 0)
-    return;
+    return 0;
+
+  DEBUG_MSG("Releasing entity %lx\n", e);
 
   GC_LOCK;
 
@@ -127,6 +129,8 @@ void gc_entity_release_ex(entity e) {
   gc_list = e;
 
   GC_UNLOCK;
+
+  return 1;
 }
 
 

+ 1 - 1
socl/src/gc.h

@@ -27,7 +27,7 @@ void * gc_entity_alloc(unsigned int size, void (*release_callback)(void*));
 void gc_entity_retain(void *arg);
 
 /** Decrement reference counter and release entity if applicable */
-void gc_entity_release_ex(entity e);
+int gc_entity_release_ex(entity e);
 
 int gc_active_entity_count(void);
 

+ 0 - 115
socl/src/graph.c

@@ -1,115 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010,2011 University of Bordeaux
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "socl.h"
-#include "graph.h"
-#include "event.h"
-
-static pthread_spinlock_t graph_lock;
-static graph_node graph_nodes = NULL;
-
-
-/**
- * Initialize graph structure
- */
-void graph_init(void) {
-	pthread_spin_init(&graph_lock, PTHREAD_PROCESS_PRIVATE);
-}
-
-/**
- * Release graph structure
- */
-void graph_destroy(void) {
-	pthread_spin_destroy(&graph_lock);
-}
-
-/**
- * Initialize a graph node
- */
-void graph_node_init(graph_node node) {
-	node->id = -1;
-	node->next = NULL;
-	node->event = event_create();
-}
-
-/**
- * Store a node in the graph
- */
-void graph_store(void * node) {
-	pthread_spin_lock(&graph_lock);
-
-	graph_node n = (graph_node)node;
-	n->next = graph_nodes;
-	graph_nodes = n;
-
-	pthread_spin_unlock(&graph_lock);
-}
-
-/**
- * Free a node
- */
-void graph_free(void * node) {
-	free(node);
-}
-
-
-#define nullOrDup(name,size) s->name = memdup_safe(name,size)
-#define nodeNullOrDup(name,size) s->node.name = memdup_safe(name,size)
-#define dup(name) s->name = name
-#define nodeDup(name) s->node.name = name
-
-
-node_enqueue_kernel graph_create_enqueue_kernel(char is_task,
-		cl_command_queue cq,
-		cl_kernel        kernel,
-		cl_uint          work_dim,
-		const size_t *   global_work_offset,
-		const size_t *   global_work_size,
-		const size_t *   local_work_size,
-		cl_uint          num_events,
-		const cl_event * events,
-		cl_uint 		num_args,
-		size_t *		arg_sizes,
-		enum kernel_arg_type * arg_types,
-		void **		args)
-{
-	node_enqueue_kernel s = malloc(sizeof(struct node_enqueue_kernel_t));
-	graph_node_init(&s->node);
-	s->node.id = NODE_ENQUEUE_KERNEL;
-
-	nodeDup(num_events);
-	nodeNullOrDup(events, num_events * sizeof(cl_event));
-
-	dup(is_task);
-	dup(cq);
-	dup(kernel);
-	dup(work_dim);
-	nullOrDup(global_work_offset, work_dim*sizeof(size_t));
-	nullOrDup(global_work_size, work_dim*sizeof(size_t));
-	nullOrDup(local_work_size, work_dim*sizeof(size_t));
-	dup(num_args);
-	nullOrDup(arg_sizes, num_args * sizeof(size_t));
-	nullOrDup(arg_types, num_args * sizeof(enum kernel_arg_type));
-	nullOrDup(args, num_args * sizeof(void*));
-
-	return s;
-}
-
-#undef nullOrDup
-#undef nodeNullOrDup
-#undef dup
-#undef nodeDup
-#undef memdup

+ 0 - 73
socl/src/graph.h

@@ -1,73 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010,2011 University of Bordeaux
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef SOCL_GRAPH_H
-#define SOCL_GRAPH_H
-
-#include "socl.h"
-
-typedef struct graph_node_t * graph_node;
-
-struct graph_node_t {
-	int 		id; 		/* Kind of node */
-	graph_node 	next; 		/* Linked-list of nodes... */
-	cl_uint 	num_events;	/* Number of dependencies */
-	cl_event * 	events;		/* Dependencies */
-	cl_event  	event;		/* Event for this node */
-};
-
-void graph_init(void);
-void graph_destroy(void);
-void graph_node_init(graph_node node);
-void graph_store(void * node);
-void graph_free(void * node);
-
-#define NODE_ENQUEUE_KERNEL 1
-
-
-typedef struct node_enqueue_kernel_t {
-	struct graph_node_t node;
-
-	char 		 is_task; /* Set if clEnqueueTask is used */
-	cl_command_queue cq;
-	cl_kernel        kernel;
-	cl_uint          work_dim;
-	const size_t *   global_work_offset;
-	const size_t *   global_work_size;
-	const size_t *   local_work_size;
-	cl_uint 	 num_args;
-	size_t *	 arg_sizes;
-	enum kernel_arg_type * arg_types;
-	void **		 args;
-} * node_enqueue_kernel;
-
-node_enqueue_kernel graph_create_enqueue_kernel(char is_task,
-		cl_command_queue cq,
-		cl_kernel        kernel,
-		cl_uint          work_dim,
-		const size_t *   global_work_offset,
-		const size_t *   global_work_size,
-		const size_t *   local_work_size,
-		cl_uint          num_events,
-		const cl_event * events,
-		cl_uint		 num_args,
-		size_t *	 arg_sizes,
-		enum kernel_arg_type * arg_types,
-		void **		args);
-
-cl_int graph_play_enqueue_kernel(node_enqueue_kernel n);
-
-#endif /* SOCL_GRAPH_H */

+ 0 - 4
socl/src/init.c

@@ -15,7 +15,6 @@
  */
 
 #include "socl.h"
-#include "graph.h"
 #include "gc.h"
 #include "mem_objects.h"
 
@@ -25,7 +24,6 @@
 __attribute__((constructor)) static void socl_init() {
   
   mem_object_init();
-  graph_init();
 
   starpu_init(NULL);
   
@@ -51,7 +49,5 @@ __attribute__((destructor)) static void socl_shutdown() {
   if (active_entities != 0)
     fprintf(stderr, "Unreleased entities: %d\n", active_entities);
 
-  graph_destroy();
-
   starpu_shutdown();
 }

+ 39 - 18
socl/src/socl.h

@@ -23,6 +23,9 @@
 #include CL_HEADERS "CL/cl.h"
 #endif
 
+/* Additional command type */
+#define CL_COMMAND_BARRIER 0x99987
+
 #include <string.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -35,7 +38,7 @@
 #include <starpu_profiling.h>
 #include <starpu_task.h>
 
-typedef struct starpu_task starpu_task;
+typedef struct starpu_task * starpu_task;
 
 #ifdef UNUSED
 #elif defined(__GNUC__)
@@ -50,12 +53,13 @@ typedef struct starpu_task starpu_task;
  */
 typedef struct entity * entity;
 
+#include "command.h"
+#include "command_list.h"
 #include "command_queue.h"
 #include "debug.h"
 #include "devices.h"
 #include "event.h"
 #include "gc.h"
-#include "graph.h"
 #include "mem_objects.h"
 #include "task.h"
 #include "util.h"
@@ -79,11 +83,32 @@ struct entity {
 
 struct _cl_platform_id {};
 
-#define RETURN_OR_RELEASE_EVENT(ev, event) \
-   if (event != NULL) \
-      *event = ev; \
-   else\
-      gc_entity_release(ev);
+#define RETURN_EVENT(cmd, event) \
+	if (event != NULL) { \
+		cl_event ev = command_event_get(cmd);\
+		gc_entity_retain(ev);\
+		*event = ev; \
+	}
+
+#define RETURN_CUSTOM_EVENT(src, tgt) \
+	if (tgt != NULL) { \
+		gc_entity_retain(src); \
+		*tgt = src; \
+	}
+
+#define MAY_BLOCK(blocking) \
+	if ((blocking) == CL_TRUE) {\
+		cl_event ev = command_event_get(cmd);\
+		soclWaitForEvents(1, &ev);\
+		gc_entity_release(ev);\
+	}
+
+#define MAY_BLOCK_CUSTOM(blocking,event) \
+	if ((blocking) == CL_TRUE) {\
+		cl_event ev = (event);\
+		soclWaitForEvents(1, &ev);\
+		gc_entity_release(ev);\
+	}
 
 /* Constants */
 struct _cl_platform_id socl_platform;
@@ -121,14 +146,14 @@ struct _cl_command_queue {
   cl_device_id device;
   cl_context context;
 
-  /* Stored command events */
-  cl_event events;
+  /* Stored commands */
+  command_list commands;
 
   /* Last enqueued barrier-like event */
-  cl_event barrier;
+  cl_command barrier;
 
   /* Mutex */
-  pthread_spinlock_t spin;
+  pthread_mutex_t mutex;
 
   /* ID  */
 #ifdef DEBUG
@@ -142,12 +167,8 @@ struct _cl_event {
   /* Command queue */
   cl_command_queue cq;
 
-  /* Command type */
-  cl_command_type type;
-
-  /* Command queue list */
-  cl_event prev;
-  cl_event next;
+  /* Command */
+  cl_command command;
 
   /* Event status */
   cl_int status;
@@ -244,7 +265,7 @@ struct _cl_kernel {
   cl_int *errcodes;
 
   /* Arguments */
-  unsigned int arg_count;
+  unsigned int num_args;
   size_t *arg_size;
   enum kernel_arg_type  *arg_type;
   void  **arg_value;

+ 48 - 42
socl/src/task.c

@@ -18,80 +18,86 @@
 #include "gc.h"
 #include "event.h"
 
-cl_event task_event(starpu_task *task) {
-  return (cl_event)task->callback_arg;
-}
-
 static void task_release_callback(void *arg) {
-  starpu_task *task = starpu_get_current_task();
-  cl_event ev = (cl_event)arg;
+  starpu_task task = starpu_get_current_task();
+  cl_command cmd = (cl_command)arg;
   
+  cl_event ev = command_event_get(cmd);
   ev->status = CL_COMPLETE;
 
+  DEBUG_MSG("notifying tag %x as well as task tag %x\n", ev->id, task->tag_id);
+
+  /* Trigger the tag associated to the command event */
+  starpu_tag_notify_from_apps(ev->id);
+
   if (task->profiling_info != NULL && (intptr_t)task->profiling_info != -ENOSYS) {
     ev->profiling_info = malloc(sizeof(*task->profiling_info));
     memcpy(ev->profiling_info, task->profiling_info, sizeof(*task->profiling_info));
   }
 
   gc_entity_release(ev);
+
+  /* Release the command */
+  //TODO
 }
 
 
 /*
  * Create a StarPU task
- *
- * Task's callback_arg is event
- * Task's tag is set to event ID
  */
-starpu_task * task_create(cl_command_type type) {
-   cl_event event;
+starpu_task task_create() {
+	struct starpu_task * task;
 
-   /* Create event */
-   event = event_create();
+	/* Create StarPU task */
+	task = starpu_task_create();
 
-   return task_create_with_event(type, event);
-}
+	/* Set task common settings */
+	task->destroy = 1;
+	task->detach = 1;
 
+	task->use_tag = 1;
+	task->tag_id = event_unique_id();
 
-starpu_task * task_create_with_event(cl_command_type type, cl_event event) {
-   struct starpu_task * task;
+	DEBUG_MSG("creating task with tag %x\n", task->tag_id);
 
-   event->type = type;
+	return task;
+}
 
-   /* Create StarPU task */
-   task = starpu_task_create();
 
-   /* Task tag is set to event id */
-   task->use_tag = 1;
-   task->tag_id = event->id;
+void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events) {
 
-   /* Set task common settings */
-   task->destroy = 1;
-   task->detach = 1;
-   task->callback_func = task_release_callback;
-   task->callback_arg = event;
+	if (num_events != 0) {
+		cl_uint i;
 
-   return task;
-}
+		starpu_tag_t * tags = malloc(num_events * sizeof(starpu_tag_t));	
 
+		if (num_events != 0)
+			DEBUG_MSG("Tag %d depends on %u tags:", task->tag_id, num_events);
 
-void task_dependency_add(starpu_task * task, cl_uint num, const cl_event *events) {
-   unsigned int i;
+		for (i=0; i<num_events; i++) {
+			tags[i] = events[i]->id;
+			DEBUG_MSG_NOHEAD(" %u", events[i]->id);
+		}
+		DEBUG_MSG_NOHEAD("\n");
 
-   for (i=0; i<num; i++) {
-      starpu_tag_t tag = events[i]->id;
-      DEBUG_MSG("Event %d depends on event %d\n", task->tag_id, events[i]->id);
-      starpu_tag_declare_deps_array(task->tag_id, 1, &tag);
-   }
+		starpu_tag_declare_deps_array(task->tag_id, num_events, tags);
+
+		free(tags);
+	}
 }
 
-cl_int task_submit(starpu_task * task, cl_int num_events, cl_event * events) {
+cl_int task_submit_ex(starpu_task task, cl_command cmd) {
+
+	/* Associated the task to the command */
+	cmd->task = task;
+
+	task_depends_on(task, command_num_events_get(cmd), command_events_get(cmd));
 
-	task_dependency_add(task, num_events, events);
+	task->callback_func = task_release_callback;
+	task->callback_arg = cmd;
 
 	/* Submit task */
 	int ret = starpu_task_submit(task);
-	gc_entity_retain(task_event(task));
 	if (ret != 0)
 		DEBUG_ERROR("Unable to submit a task. Error %d\n", ret);
 
@@ -126,14 +132,14 @@ static starpu_codelet cputask_codelet = {
    .cpu_func = &cputask_task
 };
 
-starpu_task * task_create_cpu(cl_command_type type, void (*callback)(void*), void *arg, int free_arg) {
+starpu_task task_create_cpu(void (*callback)(void*), void *arg, int free_arg) {
   
   struct cputask_arg * a = malloc(sizeof(struct cputask_arg));
   a->callback = callback;
   a->arg = arg;
   a->free_arg = free_arg;
 
-  starpu_task *task = task_create(type);
+  starpu_task task = task_create();
   task->cl = &cputask_codelet;
   task->cl_arg = a;
 

+ 12 - 9
socl/src/task.h

@@ -19,19 +19,22 @@
 
 #include "socl.h"
 
-starpu_task * task_create(cl_command_type type);
-starpu_task * task_create_with_event(cl_command_type type, cl_event event);
-void task_dependency_add(starpu_task * task, cl_uint num, const cl_event *events);
-starpu_task * task_create_cpu(cl_command_type type, void (*callback)(void*), void *arg, int free_arg);
+starpu_task task_create();
+void task_dependency_add(starpu_task task, cl_uint num_events, cl_event *events);
 
-/** 
- * Return event associated to a task
+starpu_task task_create_cpu(void (*callback)(void*), void *arg, int free_arg);
+
+/**
+ * Associate a StarPU task to a command and submit it
+ *
+ * When the task terminates, the command is set as terminated too
  */
-cl_event task_event(starpu_task *task);
+cl_int task_submit_ex(starpu_task task, cl_command cmd);
+#define task_submit(task,cmd) task_submit_ex(task, (cl_command)cmd)
 
 /**
- * Submit "task" with "events" dependencies
+ * Add task dependencies
  */
-cl_int task_submit(starpu_task * task, cl_int num_events, cl_event * events);
+void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events);
 
 #endif /* SOCL_TASK_H */