Browse Source

SOCL: fix performance model allocation

While codelets have to be allocated each time a kernel is enqueued (parameter access modes are specific to buffers used as parameters and not specific to the kernel... yes it is stupid...) we only need to allocate a single performance model for each kernel.
Sylvain Henry 12 years ago
parent
commit
2b14755849
5 changed files with 26 additions and 23 deletions
  1. 9 1
      socl/src/cl_createkernel.c
  2. 3 5
      socl/src/cl_enqueuendrangekernel.c
  3. 10 16
      socl/src/command.c
  4. 1 1
      socl/src/command.h
  5. 3 0
      socl/src/socl.h

+ 9 - 1
socl/src/cl_createkernel.c

@@ -16,7 +16,6 @@
 
 #include "socl.h"
 
-
 static void soclCreateKernel_task(void *data) {
    struct _cl_kernel *k = (struct _cl_kernel *)data;
 
@@ -92,6 +91,9 @@ static void release_callback_kernel(void * e) {
   //Release real kernels...
   starpu_execute_on_each_worker(rk_task, kernel, STARPU_OPENCL);
 
+  //Release perfmodel
+  free(kernel->perfmodel);
+
   gc_entity_unstore(&kernel->program);
 
   free(kernel->kernel_name);
@@ -125,6 +127,12 @@ soclCreateKernel(cl_program    program,
    
    gc_entity_store(&k->program, program);
    k->kernel_name = strdup(kernel_name);
+
+   k->perfmodel = malloc(sizeof(struct starpu_perfmodel));
+   memset(k->perfmodel, 0, sizeof(struct starpu_perfmodel));
+   k->perfmodel->type = STARPU_HISTORY_BASED;
+   k->perfmodel->symbol = k->kernel_name;
+
    k->num_args = 0;
    k->arg_value = NULL;
    k->arg_size = NULL;

+ 3 - 5
socl/src/cl_enqueuendrangekernel.c

@@ -97,9 +97,6 @@ static void cleaning_task_callback(void *args) {
 
 	free(cmd->buffers);
 
-	free(cmd->codelet);
-	cmd->codelet = NULL;
-
 	if (cmd->global_work_offset != NULL) {
 	  free((void*)cmd->global_work_offset);
 	  cmd->global_work_offset = NULL;
@@ -123,7 +120,8 @@ static void cleaning_task_callback(void *args) {
 cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) {
 
 	starpu_task task = task_create();
-	task->cl = cmd->codelet;
+	task->cl = &cmd->codelet;
+	task->cl->model = cmd->kernel->perfmodel;
 	task->cl_arg = cmd;
 	task->cl_arg_size = sizeof(cmd);
 
@@ -133,7 +131,7 @@ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) {
 	  task->workerid = cmd->_command.cq->device->worker_id;
 	}
 
-	struct starpu_codelet * codelet = cmd->codelet;
+	struct starpu_codelet * codelet = task->cl;
 
 	/* We need to detect which parameters are OpenCL's memory objects and
 	 * we retrieve their corresponding StarPU buffers */

+ 10 - 16
socl/src/command.c

@@ -17,6 +17,9 @@
 #include "socl.h"
 #include <string.h>
 
+/* Forward extern declaration */
+extern void soclEnqueueNDRangeKernel_task(void *descr[], void *args);
+
 void command_init_ex(cl_command cmd, cl_command_type typ) {
 	cmd->typ = typ;
 	cmd->num_events = 0;
@@ -88,8 +91,6 @@ void command_graph_dump_ex(cl_command cmd) {
 #define dup(name) cmd->name = name
 #define dupEntity(name) do { cmd->name = name; gc_entity_retain(name); } while (0);
 
-void soclEnqueueNDRangeKernel_task(void *descr[], void *args);
-
 command_ndrange_kernel command_ndrange_kernel_create (
 		cl_kernel        kernel,
 		cl_uint          work_dim,
@@ -106,20 +107,13 @@ command_ndrange_kernel command_ndrange_kernel_create (
 	nullOrDup(global_work_size, work_dim*sizeof(size_t));
 	nullOrDup(local_work_size, work_dim*sizeof(size_t));
 
-   	/* Codelet */
-   	cmd->codelet = (struct starpu_codelet*)malloc(sizeof(struct starpu_codelet));
-	starpu_codelet_init(cmd->codelet);
-	struct starpu_codelet * codelet = cmd->codelet;
-	codelet->where = STARPU_OPENCL;
-	codelet->power_model = NULL;
-	codelet->opencl_funcs[0] = &soclEnqueueNDRangeKernel_task;
-	codelet->opencl_funcs[1] = NULL;
-	codelet->model = malloc(sizeof(struct starpu_perfmodel));
-   memset(codelet->model, 0, sizeof(struct starpu_perfmodel));
-   codelet->model->type = STARPU_HISTORY_BASED;
-   codelet->model->symbol = kernel->kernel_name;
-
-   	/* Kernel is mutable, so we duplicate its parameters... */
+	starpu_codelet_init(&cmd->codelet);
+	cmd->codelet.where = STARPU_OPENCL;
+	cmd->codelet.power_model = NULL;
+	cmd->codelet.opencl_funcs[0] = &soclEnqueueNDRangeKernel_task;
+	cmd->codelet.opencl_funcs[1] = NULL;
+
+	/* Kernel is mutable, so we duplicate its parameters... */
 	cmd->num_args = kernel->num_args;
 	cmd->arg_sizes = memdup(kernel->arg_size, sizeof(size_t) * kernel->num_args);
 	cmd->arg_types = memdup(kernel->arg_type, sizeof(enum kernel_arg_type) * kernel->num_args);

+ 1 - 1
socl/src/command.h

@@ -67,6 +67,7 @@ typedef struct command_ndrange_kernel_t {
 	CL_COMMAND
 
 	cl_kernel        kernel;
+	struct starpu_codelet codelet;
 	cl_uint          work_dim;
 	const size_t *   global_work_offset;
 	const size_t *   global_work_size;
@@ -75,7 +76,6 @@ typedef struct command_ndrange_kernel_t {
 	size_t *	 arg_sizes;
 	enum kernel_arg_type * arg_types;
 	void **		 args;
-	struct starpu_codelet * codelet;
 	cl_uint		 num_buffers;
 	cl_mem *	 buffers;
 } * command_ndrange_kernel;

+ 3 - 0
socl/src/socl.h

@@ -248,6 +248,9 @@ struct _cl_kernel {
   /* Associated program */
   cl_program program;
 
+  /* StarPU codelet */
+  struct starpu_perfmodel * perfmodel;
+
   /* Kernel name */
   char * kernel_name;