Explorar o código

- merge trunk

Olivier Aumage %!s(int64=11) %!d(string=hai) anos
pai
achega
18cb0e9e8c
Modificáronse 48 ficheiros con 78 adicións e 166 borrados
  1. 1 0
      examples/axpy/axpy.c
  2. 1 6
      examples/axpy/axpy_opencl.c
  3. 1 0
      examples/basic_examples/multiformat_conversion_codelets.c
  4. 1 6
      examples/basic_examples/multiformat_conversion_codelets_opencl.c
  5. 1 0
      examples/basic_examples/vector_scal.c
  6. 2 7
      examples/basic_examples/vector_scal_opencl.c
  7. 1 6
      examples/filters/custom_mf/conversion_opencl.c
  8. 1 0
      examples/filters/custom_mf/custom_conversion_codelets.c
  9. 1 0
      examples/filters/custom_mf/custom_mf_filter.c
  10. 1 6
      examples/filters/custom_mf/custom_opencl.c
  11. 1 0
      examples/filters/fblock.c
  12. 2 7
      examples/filters/fblock_opencl.c
  13. 1 0
      examples/interface/complex.c
  14. 1 6
      examples/interface/complex_kernels_opencl.c
  15. 4 5
      examples/mandelbrot/mandelbrot.c
  16. 3 7
      examples/matvecmult/matvecmult.c
  17. 4 12
      examples/reductions/dot_product.c
  18. 1 0
      examples/spmv/spmv.c
  19. 2 7
      examples/spmv/spmv_kernels.c
  20. 2 5
      examples/stencil/life_opencl.c
  21. 2 5
      examples/stencil/shadow_opencl.c
  22. 5 24
      examples/stencil/stencil-kernels.c
  23. 1 0
      tests/datawizard/acquire_release.c
  24. 1 0
      tests/datawizard/acquire_release2.c
  25. 1 6
      tests/datawizard/acquire_release_opencl.c
  26. 1 1
      tests/datawizard/data_invalidation.c
  27. 1 1
      tests/datawizard/handle_to_pointer.c
  28. 3 3
      tests/datawizard/increment_redux.c
  29. 3 3
      tests/datawizard/increment_redux_lazy.c
  30. 4 3
      tests/datawizard/increment_redux_v2.c
  31. 1 1
      tests/datawizard/lazy_allocation.c
  32. 1 0
      tests/datawizard/mpi_like.c
  33. 1 0
      tests/datawizard/mpi_like_async.c
  34. 1 6
      tests/datawizard/opencl_codelet_unsigned_inc.c
  35. 1 0
      tests/datawizard/partition_lazy.c
  36. 2 6
      tests/datawizard/scal.c
  37. 1 0
      tests/datawizard/scratch.c
  38. 1 6
      tests/datawizard/scratch_opencl.c
  39. 1 0
      tests/datawizard/specific_node.c
  40. 2 0
      tests/datawizard/sync_and_notify_data.c
  41. 2 0
      tests/datawizard/sync_and_notify_data_implicit.c
  42. 3 12
      tests/datawizard/sync_and_notify_data_opencl.c
  43. 1 1
      tests/datawizard/write_only_tmp_buffer.c
  44. 1 1
      tests/datawizard/wt_broadcast.c
  45. 1 1
      tests/datawizard/wt_host.c
  46. 1 0
      tests/perfmodels/non_linear_regression_based.c
  47. 1 6
      tests/perfmodels/opencl_memset.c
  48. 2 0
      tests/perfmodels/regression_based.c

+ 1 - 0
examples/axpy/axpy.c

@@ -87,6 +87,7 @@ static struct starpu_codelet axpy_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {axpy_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_RW},

+ 1 - 6
examples/axpy/axpy_opencl.c

@@ -60,14 +60,9 @@ void axpy_opencl(void *buffers[], void *_args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
examples/basic_examples/multiformat_conversion_codelets.c

@@ -69,6 +69,7 @@ extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
 struct starpu_codelet cpu_to_opencl_cl =
 {
 	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 	.nbuffers = 1
 };
 

+ 1 - 6
examples/basic_examples/multiformat_conversion_codelets_opencl.c

@@ -84,15 +84,10 @@ void cpu_to_opencl_opencl_func(void *buffers[], void *args)
 					&local,
 					0,
 					NULL,
-					&event);
+					NULL);
 
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
examples/basic_examples/vector_scal.c

@@ -88,6 +88,7 @@ static struct starpu_codelet cl =
 #ifdef STARPU_USE_OPENCL
 	/* OpenCL implementation of the codelet */
 	.opencl_funcs = {scal_opencl_func, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 1,
 	.modes = {STARPU_RW},

+ 2 - 7
examples/basic_examples/vector_scal_opencl.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Institut National de Recherche en Informatique et Automatique
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -61,13 +61,8 @@ void scal_opencl_func(void *buffers[], void *_args)
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
                 if (local > global) local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 6
examples/filters/custom_mf/conversion_opencl.c

@@ -87,15 +87,10 @@ void cpu_to_opencl_opencl_func(void *buffers[], void *args)
 				&local,  /* local_work_size */
 				0,       /* num_events_in_wait_list */
 				NULL,    /* event_wait_list */
-				&event);
+				NULL);
 
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
examples/filters/custom_mf/custom_conversion_codelets.c

@@ -77,6 +77,7 @@ extern void cpu_to_opencl_opencl_func(void *buffers[], void *arg);
 struct starpu_codelet cpu_to_opencl_cl =
 {
 	.opencl_funcs = { cpu_to_opencl_opencl_func, NULL },
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 	.modes = { STARPU_RW },
 	.nbuffers = 1,
 	.name = "codelet_cpu_to_opencl"

+ 1 - 0
examples/filters/custom_mf/custom_mf_filter.c

@@ -171,6 +171,7 @@ extern void custom_scal_opencl_func(void *buffers[], void *args);
 static struct starpu_codelet opencl_cl =
 {
 	.opencl_funcs = { custom_scal_opencl_func, NULL },
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 	.nbuffers = 1,
 	.modes = { STARPU_RW },
 	.name = "opencl_codelet"

+ 1 - 6
examples/filters/custom_mf/custom_opencl.c

@@ -86,15 +86,10 @@ void custom_scal_opencl_func(void *buffers[], void *args)
 				&local,  /* local_work_size */
 				0,       /* num_events_in_wait_list */
 				NULL,    /* event_wait_list */
-				&event);
+				NULL);
 
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
examples/filters/fblock.c

@@ -99,6 +99,7 @@ int main(int argc, char **argv)
 #endif
 #ifdef STARPU_USE_OPENCL
                 .opencl_funcs = {opencl_func, NULL},
+		.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 		.nbuffers = 1,
                 .modes = {STARPU_RW},

+ 2 - 7
examples/filters/fblock_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -61,14 +61,9 @@ void opencl_func(void *buffers[], void *cl_arg)
 
 	{
 		size_t global=nx*ny*nz;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }
 

+ 1 - 0
examples/interface/complex.c

@@ -57,6 +57,7 @@ struct starpu_codelet cl_copy =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {copy_complex_codelet_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_W},

+ 1 - 6
examples/interface/complex_kernels_opencl.c

@@ -66,14 +66,9 @@ void copy_complex_codelet_opencl(void *buffers[], void *_args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 4 - 5
examples/mandelbrot/mandelbrot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -261,10 +261,7 @@ static void compute_block_opencl(void *descr[], void *cl_arg)
 	unsigned dim = 16;
 	size_t local[2] = {dim, 1};
 	size_t global[2] = {width, block_size};
-	clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, &event);
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
+	clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
 	starpu_opencl_release_kernel(kernel);
 }
 #endif
@@ -378,6 +375,7 @@ static struct starpu_codelet spmd_mandelbrot_cl =
 	.cpu_funcs = {compute_block_spmd, NULL},
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {compute_block_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 1
 };
@@ -388,6 +386,7 @@ static struct starpu_codelet mandelbrot_cl =
 	.cpu_funcs = {compute_block, NULL},
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {compute_block_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 1
 };

+ 3 - 7
examples/matvecmult/matvecmult.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -50,14 +50,9 @@ void opencl_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 
 	{
 		size_t global=nx*ny;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
 	starpu_opencl_release_kernel(kernel);
 }
 #endif
@@ -130,6 +125,7 @@ static struct starpu_codelet cl =
 {
 #ifdef STARPU_USE_OPENCL
         .opencl_funcs[0] = opencl_codelet,
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
         .nbuffers = 3,
 	.modes[0] = STARPU_R,

+ 4 - 12
examples/reductions/dot_product.c

@@ -174,15 +174,10 @@ void redux_opencl_func(void *buffers[], void *args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }
 #endif
@@ -198,6 +193,7 @@ static struct starpu_codelet redux_codelet =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {redux_opencl_func, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.modes = {STARPU_RW, STARPU_R},
 	.nbuffers = 2,
@@ -293,15 +289,10 @@ void dot_opencl_func(void *buffers[], void *args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }
 #endif
@@ -317,6 +308,7 @@ static struct starpu_codelet dot_codelet =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {dot_opencl_func, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_REDUX},

+ 1 - 0
examples/spmv/spmv.c

@@ -102,6 +102,7 @@ static struct starpu_codelet spmv_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
         .opencl_funcs = {spmv_kernel_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_W},

+ 2 - 7
examples/spmv/spmv_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -90,14 +90,9 @@ void spmv_kernel_opencl(void *descr[], void *args)
 
 	{
                 size_t global=nrow;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
         starpu_opencl_release_kernel(kernel);
 }
 

+ 2 - 5
examples/stencil/life_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -109,8 +109,5 @@ opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int
   clSetKernelArg(kernel, 8, sizeof(iter), &iter);
 
   cl_event ev;
-  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
-  clWaitForEvents(1, &ev);
-  starpu_opencl_collect_stats(ev);
-  clReleaseEvent(ev);
+  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
 }

+ 2 - 5
examples/stencil/shadow_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -105,11 +105,8 @@ opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz,
         clSetKernelArg(kernel, 7, sizeof(i), &i);
 
         cl_event ev;
-        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
+        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
         if (err != CL_SUCCESS)
                 STARPU_OPENCL_REPORT_ERROR(err);
-        clWaitForEvents(1, &ev);
-        starpu_opencl_collect_stats(ev);
-        clReleaseEvent(ev);
 }
 

+ 5 - 24
examples/stencil/stencil-kernels.c

@@ -289,11 +289,8 @@ static void load_subblock_from_buffer_opencl(struct starpu_block_interface *bloc
 
         cl_command_queue cq;
         starpu_opencl_get_current_queue(&cq);
-        cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, &event);
+        cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL);
 	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
-
-	clWaitForEvents(1, &event);
-	clReleaseEvent(event);
 }
 
 /*
@@ -358,17 +355,9 @@ static void update_func_opencl(void *descr[], void *arg)
                 cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event);
 		if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
 
-		clWaitForEvents(1, &event);
-		clReleaseEvent(event);
 #endif /* LIFE */
 	}
 
-#ifndef LIFE
-	cl_int err;
-	if ((err = clFinish(cq)))
-		STARPU_OPENCL_REPORT_ERROR(err);
-#endif
-
 	if (block->bz == 0)
 		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
 }
@@ -465,6 +454,7 @@ struct starpu_codelet cl_update =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {update_func_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.model = &cl_update_model,
 	.nbuffers = 6,
@@ -532,11 +522,8 @@ static void load_subblock_into_buffer_opencl(struct starpu_block_interface *bloc
         starpu_opencl_get_current_queue(&cq);
 	cl_event event;
 
-        cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, &event);
+        cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL);
 	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
-
-	clWaitForEvents(1, &event);
-	clReleaseEvent(event);
 }
 #endif /* STARPU_USE_OPENCL */
 
@@ -619,10 +606,6 @@ static void dummy_func_top_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *a
 
 	load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z);
 	load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z);
-
-        cl_command_queue cq;
-        starpu_opencl_get_current_queue(&cq);
-        clFinish(cq);
 }
 
 /* bottom save, OPENCL version */
@@ -636,10 +619,6 @@ static void dummy_func_bottom_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void
 
 	load_subblock_into_buffer_opencl(descr[0], descr[2], K);
 	load_subblock_into_buffer_opencl(descr[1], descr[3], K);
-
-        cl_command_queue cq;
-        starpu_opencl_get_current_queue(&cq);
-        clFinish(cq);
 }
 #endif /* STARPU_USE_OPENCL */
 
@@ -666,6 +645,7 @@ struct starpu_codelet save_cl_bottom =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {dummy_func_bottom_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.model = &save_cl_bottom_model,
 	.nbuffers = 4,
@@ -682,6 +662,7 @@ struct starpu_codelet save_cl_top =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {dummy_func_top_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.model = &save_cl_top_model,
 	.nbuffers = 4,

+ 1 - 0
tests/datawizard/acquire_release.c

@@ -48,6 +48,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"increment_cpu", NULL},
 	.nbuffers = 1

+ 1 - 0
tests/datawizard/acquire_release2.c

@@ -48,6 +48,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"increment_cpu", NULL},
 	.nbuffers = 1

+ 1 - 6
tests/datawizard/acquire_release_opencl.c

@@ -43,14 +43,9 @@ void increment_opencl(void *buffers[], void *args)
 		size_t global=1;
 		size_t local=1;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 1
tests/datawizard/data_invalidation.c

@@ -73,7 +73,6 @@ static void opencl_memset_codelet(void *buffers[], void *args)
 			     0,      /* num_events_in_wait_list */
 			     NULL,   /* event_wait_list */
 			     NULL    /* event */);
-	clFinish(queue);
 }
 #endif /* !STARPU_USE_OPENCL */
 
@@ -96,6 +95,7 @@ static struct starpu_codelet memset_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_memset_codelet, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_memset_codelet", NULL},
 	.nbuffers = 1,

+ 1 - 1
tests/datawizard/handle_to_pointer.c

@@ -77,7 +77,6 @@ static void opencl_task(void *buffers[], void *args)
 				NULL,           /* event_wait_list */
 				NULL            /* event */);
 	}
-	clFinish(queue);
 }
 #endif
 
@@ -90,6 +89,7 @@ static struct starpu_codelet cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_task, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_task", NULL},
 	.nbuffers = 1,

+ 3 - 3
tests/datawizard/increment_redux.c

@@ -78,7 +78,6 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 	h_dst += h_src;
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 
 static void neutral_opencl_kernel(void *descr[], void *arg)
@@ -92,7 +91,6 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 	starpu_opencl_get_current_queue(&queue);
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -121,6 +119,7 @@ static struct starpu_codelet redux_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {redux_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {redux_cpu_kernel, NULL},
 	.cpu_funcs_name = {"redux_cpu_kernel", NULL},
@@ -136,6 +135,7 @@ static struct starpu_codelet neutral_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {neutral_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {neutral_cpu_kernel, NULL},
 	.cpu_funcs_name = {"neutral_cpu_kernel", NULL},
@@ -162,7 +162,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -201,6 +200,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.cpu_funcs_name = {"increment_cpu_kernel", NULL},

+ 3 - 3
tests/datawizard/increment_redux_lazy.c

@@ -70,7 +70,6 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 	h_dst += h_src;
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 
 static void neutral_opencl_kernel(void *descr[], void *arg)
@@ -82,7 +81,6 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 	starpu_opencl_get_current_queue(&queue);
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -107,6 +105,7 @@ static struct starpu_codelet redux_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {redux_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {redux_cpu_kernel, NULL},
 	.cpu_funcs_name = {"redux_cpu_kernel", NULL},
@@ -122,6 +121,7 @@ static struct starpu_codelet neutral_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {neutral_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {neutral_cpu_kernel, NULL},
 	.cpu_funcs_name = {"neutral_cpu_kernel", NULL},
@@ -146,7 +146,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -181,6 +180,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.cpu_funcs_name = {"increment_cpu_kernel", NULL},

+ 4 - 3
tests/datawizard/increment_redux_v2.c

@@ -77,7 +77,6 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 	h_dst += h_src;
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 
 static void neutral_opencl_kernel(void *descr[], void *arg)
@@ -91,7 +90,6 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 	starpu_opencl_get_current_queue(&queue);
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -120,6 +118,7 @@ static struct starpu_codelet redux_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {redux_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {redux_cpu_kernel, NULL},
 	.cpu_funcs_name = {"redux_cpu_kernel", NULL},
@@ -135,6 +134,7 @@ static struct starpu_codelet neutral_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {neutral_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {neutral_cpu_kernel, NULL},
 	.cpu_funcs_name = {"neutral_cpu_kernel", NULL},
@@ -161,7 +161,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -200,6 +199,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.cpu_funcs_name = {"increment_cpu_kernel", NULL},
@@ -215,6 +215,7 @@ struct starpu_codelet increment_cl_redux =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.cpu_funcs_name = {"increment_cpu_kernel", NULL},

+ 1 - 1
tests/datawizard/lazy_allocation.c

@@ -68,7 +68,6 @@ static void opencl_memset_codelet(void *buffers[], void *args)
 			     0,      /* num_events_in_wait_list */
 			     NULL,   /* event_wait_list */
 			     NULL    /* event */);
-	clFinish(queue);
 }
 #endif
 
@@ -91,6 +90,7 @@ static struct starpu_codelet memset_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_memset_codelet, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_memset_codelet", NULL},
 	.nbuffers = 1,

+ 1 - 0
tests/datawizard/mpi_like.c

@@ -64,6 +64,7 @@ static struct starpu_codelet increment_handle_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_codelet_unsigned_inc, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"increment_handle_cpu_kernel", NULL},
 	.nbuffers = 1

+ 1 - 0
tests/datawizard/mpi_like_async.c

@@ -92,6 +92,7 @@ static struct starpu_codelet increment_handle_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = { opencl_codelet_unsigned_inc, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"increment_handle_cpu_kernel", NULL},
 	.nbuffers = 1

+ 1 - 6
tests/datawizard/opencl_codelet_unsigned_inc.c

@@ -45,14 +45,9 @@ void opencl_codelet_unsigned_inc(void *buffers[], void *args)
 		size_t global=1;
 		size_t local=1;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
tests/datawizard/partition_lazy.c

@@ -23,6 +23,7 @@ struct starpu_codelet mycodelet =
 	.cpu_funcs = { scal_func_cpu, NULL },
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = { scal_func_opencl, NULL },
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = { scal_func_cuda, NULL },

+ 2 - 6
tests/datawizard/scal.c

@@ -74,14 +74,9 @@ void scal_func_opencl(void *buffers[], void *_args)
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
                 if (local > global) local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }
 #endif
@@ -92,6 +87,7 @@ struct starpu_codelet scal_codelet =
 	.cpu_funcs = { scal_func_cpu, NULL },
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = { scal_func_opencl, NULL },
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = { scal_func_cuda, NULL },

+ 1 - 0
tests/datawizard/scratch.c

@@ -71,6 +71,7 @@ static struct starpu_codelet cl_f =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_f, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_f", NULL},
 	.nbuffers = 2,

+ 1 - 6
tests/datawizard/scratch_opencl.c

@@ -71,14 +71,9 @@ void opencl_f(void *buffers[], void *args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 0
tests/datawizard/specific_node.c

@@ -64,6 +64,7 @@ static struct starpu_codelet cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_codelet_unsigned_inc, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.nbuffers = 1,
 	.modes = {STARPU_RW},

+ 2 - 0
tests/datawizard/sync_and_notify_data.c

@@ -109,6 +109,7 @@ int main(int argc, char **argv)
 #endif
 #ifdef STARPU_USE_OPENCL
 				.opencl_funcs = {opencl_codelet_incA, NULL},
+				.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 				.cpu_funcs_name = {"cpu_codelet_incA", NULL},
 				.nbuffers = 1,
@@ -147,6 +148,7 @@ int main(int argc, char **argv)
 #endif
 #ifdef STARPU_USE_OPENCL
 				.opencl_funcs = {opencl_codelet_incC, NULL},
+				.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 				.cpu_funcs_name = {"cpu_codelet_incC", NULL},
 				.nbuffers = 1,

+ 2 - 0
tests/datawizard/sync_and_notify_data_implicit.c

@@ -82,6 +82,7 @@ static struct starpu_codelet cl_inc_a =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_codelet_incA, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_codelet_incA", NULL},
 	.nbuffers = 1,
@@ -98,6 +99,7 @@ struct starpu_codelet cl_inc_c =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_codelet_incC, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_codelet_incC", NULL},
 	.nbuffers = 1,

+ 3 - 12
tests/datawizard/sync_and_notify_data_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,13 +43,9 @@ void opencl_codelet_incA(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	{
 		size_t global=100;
 		size_t local=100;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
 	starpu_opencl_release_kernel(kernel);
 }
 
@@ -75,13 +71,8 @@ void opencl_codelet_incC(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	{
 		size_t global=100;
 		size_t local=100;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 1 - 1
tests/datawizard/write_only_tmp_buffer.c

@@ -40,7 +40,6 @@ static void opencl_codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_ar
 
         starpu_opencl_get_queue(devid, &queue);
         clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, sizeof(char), &ptr, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -83,6 +82,7 @@ static struct starpu_codelet cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {opencl_codelet_null, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs_name = {"cpu_codelet_null", NULL},
 	.nbuffers = 1,

+ 1 - 1
tests/datawizard/wt_broadcast.c

@@ -37,7 +37,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -72,6 +71,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.nbuffers = 1,

+ 1 - 1
tests/datawizard/wt_host.c

@@ -39,7 +39,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
-	clFinish(queue);
 }
 #endif
 
@@ -78,6 +77,7 @@ static struct starpu_codelet increment_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {increment_opencl_kernel, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {increment_cpu_kernel, NULL},
 	.nbuffers = 1,

+ 1 - 0
tests/perfmodels/non_linear_regression_based.c

@@ -60,6 +60,7 @@ static struct starpu_codelet memset_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {memset_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.cpu_funcs_name = {"memset_cpu", NULL},

+ 1 - 6
tests/perfmodels/opencl_memset.c

@@ -56,14 +56,9 @@ void memset_opencl(void *buffers[], void *args)
                 if (local > global)
 			local=global;
 
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
 		if (err != CL_SUCCESS)
 			STARPU_OPENCL_REPORT_ERROR(err);
 	}
-
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
-
 	starpu_opencl_release_kernel(kernel);
 }

+ 2 - 0
tests/perfmodels/regression_based.c

@@ -67,6 +67,7 @@ static struct starpu_codelet memset_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {memset_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.cpu_funcs_name = {"memset_cpu", NULL},
@@ -82,6 +83,7 @@ static struct starpu_codelet nl_memset_cl =
 #endif
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {memset_opencl, NULL},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
 #endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.cpu_funcs_name = {"memset_cpu", NULL},