13 years ago · 9bb769dbf0
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -118,7 +118,7 @@ This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environ
 
																 @item @code{int disable_asynchronous_copy} (default = 0)
															
 
																 This flag should be set to 1 to disable asynchronous copies between
															
 
																 CPUs and accelerators. This can also be specified with the
															
 
																-@code{STARPU_DISABLE_ASYNCHRONOUS_COPY} environment variable.
															
 
																+@code{DISABLE_STARPU_ASYNCHRONOUS_COPY} environment variable.
															
 
																 The AMD implementation of OpenCL is known to
															
 
																 fail when copying data asynchronously. When using this implementation,
															
 
																 it is therefore necessary to disable asynchronous data transfers.
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -38,6 +38,7 @@ EXTRA_DIST = 					\
 
																 	basic_examples/multiformat_opencl_kernel.cl  \
															
 
																 	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \
															
 
																 	common/blas_model.c			\
															
 
																+	spmd/vector_scal_spmd.c			\
															
 
																 	spmv/spmv_cuda.cu			\
															
 
																 	spmv/spmv_opencl.cl			\
															
 
																 	gordon/null_kernel_gordon.c		\
															
@@ -184,6 +185,7 @@ examplebin_PROGRAMS +=				\
 
																 	tag_example/tag_example3		\
															
 
																 	tag_example/tag_example4		\
															
 
																 	tag_example/tag_restartable		\
															
 
																+	spmd/vector_scal_spmd			\
															
 
																 	spmv/spmv				\
															
 
																 	callback/callback			\
															
 
																 	incrementer/incrementer			\
															
@@ -248,6 +250,7 @@ STARPU_EXAMPLES +=				\
 
																 	tag_example/tag_example3		\
															
 
																 	tag_example/tag_example4		\
															
 
																 	tag_example/tag_restartable		\
															
 
																+	spmd/vector_scal_spmd			\
															
 
																 	spmv/spmv				\
															
 
																 	callback/callback			\
															
 
																 	incrementer/incrementer			\
															
@@ -666,6 +669,13 @@ cg_cg_LDADD =					\
 
																 endif
															
 
																 ################
															
 
																+# SPMD example #
															
 
																+################
															
 
																+
															
 
																+spmd_vector_scal_spmd_SOURCES =			\
															
 
																+	spmd/vector_scal_spmd.c
															
 
																+
															
 
																+################
															
 
																 # SpMV example #
															
 
																 ################
															
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -21,6 +21,7 @@
 
																 #include <stdio.h>
															
 
																 #include <assert.h>
															
 
																 #include <sys/time.h>
															
 
																+#include <math.h>
															
 
																 #include <common/blas.h>
															
--- a/examples/basic_examples/block_opencl.c
+++ b/examples/basic_examples/block_opencl.c
@@ -36,11 +36,11 @@ void opencl_codelet(void *descr[], void *_args)
 
																 	cl_event event;
															
 
																 	int id, devid, err;
															
 
																 	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]);
															
 
																-	uint32_t nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
															
 
																-	uint32_t ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
															
 
																-	uint32_t nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
															
 
																-        uint32_t ldy = STARPU_BLOCK_GET_LDY(descr[0]);
															
 
																-        uint32_t ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
															
 
																+	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
															
 
																+	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
															
 
																+	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
															
 
																+        int ldy = (int)STARPU_BLOCK_GET_LDY(descr[0]);
															
 
																+        int ldz = (int) STARPU_BLOCK_GET_LDZ(descr[0]);
															
 
																         float *multiplier = (float *)_args;
															
 
																         id = starpu_worker_get_id();
															
--- a/examples/basic_examples/block_opencl_kernel.cl
+++ b/examples/basic_examples/block_opencl_kernel.cl
@@ -14,9 +14,7 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-#include <stdint.h>
															
 
																-
															
 
																-__kernel void block(__global float *b, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t ldy, uint32_t ldz, float multiplier)
															
 
																+__kernel void block(__global float *b, int nx, int ny, int nz, int ldy, int ldz, float multiplier)
															
 
																 {
															
 
																      const int i = get_global_id(0);
															
 
																      if (i < (nz*ldz)+(ny*ldy)+nx)
															
--- a/examples/spmd/vector_scal_spmd.c
+++ b/examples/spmd/vector_scal_spmd.c
@@ -0,0 +1,146 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/* gcc build:
															
 
																+
															
 
																+   gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)
															
 
																+
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <stdio.h>
															
 
																+#include <limits.h>
															
 
																+
															
 
																+#define MIN(a,b)        ((a)<(b)?(a):(b))
															
 
																+
															
 
																+#define	NX	2048000
															
 
																+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																+
															
 
																+static int get_first_element_rank(int nel, int rank, int nb_workers)
															
 
																+{
															
 
																+	if(rank == 0)
															
 
																+		return 0;
															
 
																+
															
 
																+	/* We get the  number of bigger parts which stand before the part */
															
 
																+	int nb_big_parts = MIN(nel % nb_workers, rank);
															
 
																+
															
 
																+	return nb_big_parts * (nel / nb_workers + 1) + (rank - nb_big_parts) * (nel / nb_workers);
															
 
																+}
															
 
																+
															
 
																+void scal_cpu_func(void *buffers[], void *_args)
															
 
																+{
															
 
																+	unsigned i;
															
 
																+	float *factor = _args, f = *factor;
															
 
																+	struct starpu_vector_interface *vector = buffers[0];
															
 
																+	unsigned n = STARPU_VECTOR_GET_NX(vector);
															
 
																+	float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
															
 
																+
															
 
																+	int nb_workers = starpu_combined_worker_get_size();
															
 
																+	int rank = starpu_combined_worker_get_rank();
															
 
																+
															
 
																+	if (rank == 0)
															
 
																+		FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size());
															
 
																+
															
 
																+	/* We add 1 to the (nel_total % nb_workers) first workers, thus we get an evenly splitted data. */
															
 
																+	int nel_worker = (n / nb_workers) + ((rank < (n % nb_workers)) ? 1 : 0);
															
 
																+
															
 
																+	int begin = get_first_element_rank(n, rank, nb_workers);
															
 
																+
															
 
																+
															
 
																+	for (i = 0; i < nel_worker; i++) {
															
 
																+		int rank = i + begin;
															
 
																+
															
 
																+		float v = val[rank];
															
 
																+		int j;
															
 
																+		for (j = 0; j < 100; j++)
															
 
																+			v = v * f;
															
 
																+		val[rank] = v;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static struct starpu_perfmodel vector_scal_model =
															
 
																+{
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = "vector_scale_parallel"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl =
															
 
																+{
															
 
																+	.modes = { STARPU_RW },
															
 
																+	.where = STARPU_CPU,
															
 
																+	.type = STARPU_SPMD,
															
 
																+	.max_parallelism = INT_MAX,
															
 
																+	.cpu_funcs = {scal_cpu_func, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.model = &vector_scal_model,
															
 
																+};
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	struct starpu_conf conf;
															
 
																+	float *vector;
															
 
																+	unsigned i;
															
 
																+	int ret;
															
 
																+
															
 
																+	vector = malloc(NX*sizeof(*vector));
															
 
																+
															
 
																+	for (i = 0; i < NX; i++)
															
 
																+		vector[i] = (i+1.0f);
															
 
																+
															
 
																+	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
															
 
																+	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
															
 
																+
															
 
																+	starpu_conf_init(&conf);
															
 
																+
															
 
																+	conf.single_combined_worker = 1;
															
 
																+	conf.sched_policy_name = "pheft";
															
 
																+
															
 
																+	ret = starpu_init(&conf);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	starpu_data_handle_t vector_handle;
															
 
																+	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
															
 
																+
															
 
																+	float factor = 1.001;
															
 
																+
															
 
																+	for (i = 0; i < 100; i++) {
															
 
																+		struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+		task->cl = &cl;
															
 
																+
															
 
																+		task->handles[0] = vector_handle;
															
 
																+		task->cl_arg = &factor;
															
 
																+		task->cl_arg_size = sizeof(factor);
															
 
																+
															
 
																+		ret = starpu_task_submit(task);
															
 
																+		if (ret == -ENODEV) {
															
 
																+		     ret = 77;
															
 
																+		     break;
															
 
																+		}
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	starpu_data_unregister(vector_handle);
															
 
																+
															
 
																+	/* terminate StarPU, no task can be submitted after */
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
															
 
																+	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
															
 
																+
															
 
																+	return ret;
															
 
																+}
															
--- a/examples/spmv/spmv_kernels.c
+++ b/examples/spmv/spmv_kernels.c
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -28,18 +28,18 @@ void spmv_kernel_opencl(void *descr[], void *args)
 
																 	cl_event event;
															
 
																 	int id, devid, err, n;
															
 
																-	uint32_t nnz = STARPU_CSR_GET_NNZ(descr[0]);
															
 
																-	uint32_t nrow = STARPU_CSR_GET_NROW(descr[0]);
															
 
																+	int nnz = (int) STARPU_CSR_GET_NNZ(descr[0]);
															
 
																+	int nrow = (int) STARPU_CSR_GET_NROW(descr[0]);
															
 
																 	cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(descr[0]);
															
 
																-	uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]);
															
 
																-	uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]);
															
 
																-	uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]);
															
 
																+	cl_mem colind = (cl_mem)STARPU_CSR_GET_COLIND(descr[0]);
															
 
																+	cl_mem rowptr = (cl_mem)STARPU_CSR_GET_ROWPTR(descr[0]);
															
 
																+	int firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]);
															
 
																 	cl_mem vecin = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]);
															
 
																-	uint32_t nx_in = STARPU_VECTOR_GET_NX(descr[1]);
															
 
																+	int nx_in = (int)STARPU_VECTOR_GET_NX(descr[1]);
															
 
																 	cl_mem vecout = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]);
															
 
																-	uint32_t nx_out = STARPU_VECTOR_GET_NX(descr[2]);
															
 
																+	int nx_out = (int)STARPU_VECTOR_GET_NX(descr[2]);
															
 
																         id = starpu_worker_get_id();
															
 
																         devid = starpu_worker_get_devid(id);
															
--- a/examples/spmv/spmv_opencl.cl
+++ b/examples/spmv/spmv_opencl.cl
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,15 +14,13 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-__kernel void spmv(unsigned nnz, unsigned nrow,
															
 
																+__kernel void spmv(int nnz, int nrow,
															
 
																                    __global float* nzval, __global unsigned* colind,
															
 
																-                   __global unsigned* rowptr, unsigned firstentry,
															
 
																-                   __global float *vecin, unsigned nx_in,
															
 
																-                   __global float *vecout, unsigned nx_out)
															
 
																+                   __global unsigned* rowptr, int firstentry,
															
 
																+                   __global float *vecin, int nx_in,
															
 
																+                   __global float *vecout, int nx_out)
															
 
																 {
															
 
																-	unsigned row;
															
 
																-	// for (row = 0; row < nrow; row++)
															
 
																-	row = get_global_id(0);
															
 
																+	const int row = get_global_id(0);
															
 
																 	if (row < nrow)
															
 
																 	{
															
 
																 		float tmp = 0.0f;
															
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -107,7 +107,7 @@ unsigned starpu_cuda_worker_get_count(void);
 
																 unsigned starpu_spu_worker_get_count(void);
															
 
																 unsigned starpu_opencl_worker_get_count(void);
															
 
																-int starpu_disable_asynchronous_copy();
															
 
																+int starpu_asynchronous_copy_disabled();
															
 
																 /* Return the identifier of the thread in case this is associated to a worker.
															
 
																  * This will return -1 if this function is called directly from the application
															
--- a/socl/examples/clinfo/clinfo.c
+++ b/socl/examples/clinfo/clinfo.c
@@ -20,7 +20,7 @@
 
																 #include <CL/cl.h>
															
 
																-static inline void 
															
 
																+static inline void
															
 
																 checkErr(cl_int err, const char * name) {
															
 
																     if (err != CL_SUCCESS) {
															
 
																         fprintf(stderr, "ERROR: %s (%d)\n", name, err);
															
@@ -45,8 +45,8 @@ main(void) {
 
																    platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms);
															
 
																    err = clGetPlatformIDs(num_platforms, platforms, NULL);
															
 
																    checkErr(err, "Unable to get platform list");
															
 
																-   
															
 
																-   
															
 
																+
															
 
																+
															
 
																    // Iteratate over platforms
															
 
																    printf("Number of platforms:\t\t\t\t %d\n", num_platforms);
															
@@ -54,25 +54,25 @@ main(void) {
 
																       unsigned int i;
															
 
																       for (i=0; i<num_platforms; i++) {
															
 
																          char str[256];
															
 
																-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, sizeof(str), &str, NULL);
															
 
																+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, sizeof(str), str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_PROFILE)");
															
 
																-         printf("  Plaform Profile:\t\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Profile:\t\t\t\t %s\n", str);
															
 
																-         err= clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), &str, NULL);
															
 
																+         err= clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VERSION)");
															
 
																-         printf("  Plaform Version:\t\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Version:\t\t\t\t %s\n", str);
															
 
																-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
															
 
																+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
															
 
																-         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);
															
 
																-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(str), &str, NULL);
															
 
																+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(str), str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VENDOR)");
															
 
																-         printf("  Plaform Vendor:\t\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Vendor:\t\t\t\t %s\n", str);
															
 
																-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(str), &str, NULL);
															
 
																+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(str), str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_EXTENSIONS)");
															
 
																-         printf("  Plaform Extensions:\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Extensions:\t\t\t %s\n", str);
															
 
																       }
															
 
																    }
															
@@ -88,12 +88,12 @@ main(void) {
 
																          err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
															
 
																          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
															
 
																-         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
															
 
																+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);
															
 
																          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
															
 
																          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
															
 
																          devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
															
 
																-         
															
 
																+
															
 
																          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
															
 
																          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
															
@@ -103,7 +103,7 @@ main(void) {
 
																             for (j=0; j<num_devices; j++) {
															
 
																                cl_device_type dev_type;
															
 
																                printf("\n  DEVICE %d\n", j);
															
 
																-               
															
 
																+
															
 
																                err = clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
															
 
																                checkErr(err, "clGetDeviceInfo(CL_DEVICE_TYPE)");
															
@@ -123,13 +123,13 @@ main(void) {
 
																                   cl_uint vendor_id;
															
 
																                   err = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
															
 
																                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_VENDOR_ID)");
															
 
																-                  printf("  Device ID:\t\t\t\t\t %d\n", vendor_id); 
															
 
																+                  printf("  Device ID:\t\t\t\t\t %d\n", vendor_id);
															
 
																                }
															
 
																                {
															
 
																                   cl_uint units;
															
 
																                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
															
 
																                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS)");
															
 
																-                  printf("  Max compute units:\t\t\t\t %d\n", units); 
															
 
																+                  printf("  Max compute units:\t\t\t\t %d\n", units);
															
 
																                }
															
 
																                {
															
@@ -137,12 +137,12 @@ main(void) {
 
																                   size_t *sizes;
															
 
																                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(dims), &dims, NULL);
															
 
																                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)");
															
 
																-                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
															
 
																+                  printf("  Max work item dimensions:\t\t\t %d\n", dims);
															
 
																                   sizes = (size_t*)malloc(dims * sizeof(size_t));
															
 
																                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*dims, sizes, NULL);
															
 
																                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES)");
															
 
																-                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
															
 
																+                  printf("  Max work item dimensions:\t\t\t %d\n", dims);
															
 
																                   {
															
 
																                      unsigned int k;
															
@@ -165,7 +165,7 @@ main(void) {
 
																 #define GET_STRING(CL_D,str,size) { \
															
 
																    char val[size]; \
															
 
																-   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
															
 
																+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), val, NULL);	\
															
 
																    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
															
 
																    printf(str, val); \
															
 
																 }
															
@@ -204,9 +204,9 @@ main(void) {
 
																    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
															
 
																    printf(str, ((val & test) == CL_TRUE ? "Yes" : "No")); \
															
 
																 }
															
 
																-      
															
 
																+
															
 
																                GET_SIZET(CL_DEVICE_MAX_WORK_GROUP_SIZE, "  Max work group size:\t\t\t\t %u\n")
															
 
																-               
															
 
																+
															
 
																                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "  Preferred vector width char:\t\t\t %u\n")
															
 
																                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "  Preferred vector width short:\t\t\t %u\n")
															
 
																                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "  Preferred vector width int:\t\t\t %u\n")
															
@@ -291,7 +291,7 @@ main(void) {
 
																                GET_STRING(CL_DEVICE_PROFILE, "  Profile:\t\t\t\t\t %s\n", 30);
															
 
																                GET_STRING(CL_DEVICE_VERSION, "  Version:\t\t\t\t\t %s\n", 50);
															
 
																                GET_STRING(CL_DEVICE_EXTENSIONS, "  Extensions:\t\t\t\t\t %s\n", 4096);
															
 
																-            
															
 
																+
															
 
																                printf("\n");
															
 
																             }
															
 
																          }
															
--- a/socl/src/cl_getdeviceinfo.c
+++ b/socl/src/cl_getdeviceinfo.c
@@ -79,12 +79,12 @@ soclGetDeviceInfo(cl_device_id    device,
 
																          cl_platform_id p = &socl_platform;
															
 
																          INFO_CASE_EX2(p);
															
 
																       }
															
 
																-      INFO_CASE(CL_DEVICE_NAME, device->name)
															
 
																-      INFO_CASE(CL_DEVICE_VENDOR, SOCL_VENDOR)
															
 
																-      INFO_CASE(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
															
 
																-      INFO_CASE(CL_DEVICE_PROFILE, SOCL_PROFILE)
															
 
																-      INFO_CASE(CL_DEVICE_VERSION, SOCL_VERSION)
															
 
																-      INFO_CASE(CL_DEVICE_EXTENSIONS, device->extensions)
															
 
																+      INFO_CASE_STRING(CL_DEVICE_NAME, device->name)
															
 
																+      INFO_CASE_STRING(CL_DEVICE_VENDOR, SOCL_VENDOR)
															
 
																+      INFO_CASE_STRING(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
															
 
																+      INFO_CASE_STRING(CL_DEVICE_PROFILE, SOCL_PROFILE)
															
 
																+      INFO_CASE_STRING(CL_DEVICE_VERSION, SOCL_VERSION)
															
 
																+      INFO_CASE_STRING(CL_DEVICE_EXTENSIONS, device->extensions)
															
 
																       default:
															
 
																          return CL_INVALID_VALUE;
															
 
																    }
															
--- a/socl/src/cl_getplatforminfo.c
+++ b/socl/src/cl_getplatforminfo.c
@@ -30,15 +30,16 @@ soclGetPlatformInfo(cl_platform_id   platform,
 
																                   size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
															
 
																 {
															
 
																    if (platform != NULL && platform != &socl_platform)
															
 
																-      return CL_INVALID_PLATFORM;
															
 
																+	return CL_INVALID_PLATFORM;
															
 
																-   switch (param_name) {
															
 
																-      INFO_CASE(CL_PLATFORM_PROFILE, SOCL_PROFILE)
															
 
																-      INFO_CASE(CL_PLATFORM_VERSION, SOCL_VERSION)
															
 
																-      INFO_CASE(CL_PLATFORM_NAME,    SOCL_PLATFORM_NAME)
															
 
																-      INFO_CASE(CL_PLATFORM_VENDOR,  SOCL_VENDOR)
															
 
																-      INFO_CASE(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS)
															
 
																-      default:
															
 
																+   switch (param_name)
															
 
																+   {
															
 
																+	INFO_CASE_STRING(CL_PLATFORM_PROFILE, SOCL_PROFILE);
															
 
																+	INFO_CASE_STRING(CL_PLATFORM_VERSION, SOCL_VERSION);
															
 
																+	INFO_CASE_STRING(CL_PLATFORM_NAME,    SOCL_PLATFORM_NAME);
															
 
																+	INFO_CASE_STRING(CL_PLATFORM_VENDOR,  SOCL_VENDOR);
															
 
																+	INFO_CASE_STRING(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS);
															
 
																+   default:
															
 
																          return CL_INVALID_VALUE;
															
 
																    }
															
--- a/socl/src/getinfo.h
+++ b/socl/src/getinfo.h
@@ -29,6 +29,18 @@
 
																 #define INFO_CASE(param, var) case param: \
															
 
																    INFO_CASE_EX2(var)
															
 
																+#define INFO_CASE_STRING_EX2(var) if (param_value != NULL) { \
															
 
																+      if (param_value_size < strlen(var)) \
															
 
																+         return CL_INVALID_VALUE; \
															
 
																+      strcpy(param_value, var); \
															
 
																+   } \
															
 
																+   if (param_value_size_ret != NULL) \
															
 
																+      *param_value_size_ret = strlen(var); \
															
 
																+   break;
															
 
																+
															
 
																+#define INFO_CASE_STRING(param, var) case param: \
															
 
																+   INFO_CASE_STRING_EX2(var)
															
 
																+
															
 
																 #define INFO_CASE_VALUE(param, type, value) case param: {\
															
 
																       type tmp = (value);\
															
 
																       INFO_CASE_EX2(tmp);\
															
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -204,8 +204,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 
																 			_starpu_clock_gettime(&task->profiling_info->callback_end_time);
															
 
																 	}
															
 
																+	/* If the job was executed on a combined worker there is no need for the
															
 
																+	 * scheduler to process it : the task structure doesn't contain any valuable
															
 
																+	 * data as it's not linked to an actual worker */
															
 
																 	/* control task should not execute post_exec_hook */
															
 
																-	if(task->cl != NULL && !task->control_task)
															
 
																+	if(j->task_size == 1 && task->cl != NULL && !task->control_task)
															
 
																 	{
															
 
																 	  _starpu_sched_post_exec_hook(task);
															
 
																 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -458,16 +458,16 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	if (user_conf)
															
 
																 	{
															
 
																-	     int disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
															
 
																-	     if (disable_asynchronous_copy == 1)
															
 
																+	     int asynchronous_copy_disabled = starpu_get_env_number("DISABLE_STARPU_ASYNCHRONOUS_COPY");
															
 
																+	     if (asynchronous_copy_disabled == 1)
															
 
																 		  config.disable_asynchronous_copy = 1;
															
 
																 	     else
															
 
																 		  config.disable_asynchronous_copy = (user_conf->disable_asynchronous_copy == 1);
															
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-	     int disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
															
 
																-	     config.disable_asynchronous_copy = (disable_asynchronous_copy == 1);
															
 
																+	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
															
 
																+	     config.disable_asynchronous_copy = (asynchronous_copy_disabled == 1);
															
 
																 	}
															
 
																 	_starpu_init_all_sched_ctxs(&config);
															
@@ -721,7 +721,7 @@ unsigned starpu_spu_worker_get_count(void)
 
																 	return config.topology.ngordon_spus;
															
 
																 }
															
 
																-int starpu_disable_asynchronous_copy()
															
 
																+int starpu_asynchronous_copy_disabled()
															
 
																 {
															
 
																 	return config.disable_asynchronous_copy;
															
 
																 }
															
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -288,8 +288,8 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 
																 	*handleptr = handle;
															
 
																 	handle->mf_node = home_node;
															
 
																-	int disable_asynchronous_copy = starpu_disable_asynchronous_copy();
															
 
																-	if (STARPU_UNLIKELY(disable_asynchronous_copy))
															
 
																+	int asynchronous_copy_disabled = starpu_asynchronous_copy_disabled();
															
 
																+	if (STARPU_UNLIKELY(asynchronous_copy_disabled))
															
 
																 	{
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	     if (ops->copy_methods->ram_to_cuda_async)
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -28,9 +28,14 @@
 
																 #include <core/sched_policy.h>
															
 
																 #include <core/sched_ctx.h>
															
 
																-static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
															
 
																+/* Actually launch the job on a cpu worker.
															
 
																+ * Handle binding CPUs on cores.
															
 
																+ * In the case of a combined worker WORKER_TASK != J->TASK */
															
 
																+
															
 
																+static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *cpu_args, int rank, enum starpu_perf_archtype perf_arch)
															
 
																 {
															
 
																 	int ret;
															
 
																+	int is_parallel_task = (j->task_size > 1);
															
 
																 	int profiling = starpu_profiling_status_get();
															
 
																 	struct timespec codelet_start, codelet_end;
															
@@ -51,8 +56,14 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
																 	}
															
 
																 	if (is_parallel_task)
															
 
																+	{
															
 
																 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
															
 
																+		/* In the case of a combined worker, the scheduler needs to know
															
 
																+		 * when each actual worker begins the execution */
															
 
																+		_starpu_sched_pre_exec_hook(worker_task);
															
 
																+	}
															
 
																+
															
 
																 	/* Give profiling variable */
															
 
																 	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
															
@@ -167,10 +178,6 @@ void *_starpu_cpu_worker(void *arg)
 
																 		/* Get the rank in case it is a parallel task */
															
 
																 		if (is_parallel_task)
															
 
																 		{
															
 
																-			/* We can release the fake task */
															
 
																-			STARPU_ASSERT(task != j->task);
															
 
																-			free(task);
															
 
																-
															
 
																 			_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																 			rank = j->active_task_alias_count++;
															
 
																 			_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
@@ -194,7 +201,7 @@ void *_starpu_cpu_worker(void *arg)
 
																 		_starpu_set_current_task(j->task);
															
 
																 		cpu_arg->current_task = j->task;
															
 
																-                res = execute_job_on_cpu(j, cpu_arg, is_parallel_task, rank, perf_arch);
															
 
																+                res = execute_job_on_cpu(j, task, cpu_arg, rank, perf_arch);
															
 
																 		_starpu_set_current_task(NULL);
															
 
																 		cpu_arg->current_task = NULL;
															
@@ -211,6 +218,15 @@ void *_starpu_cpu_worker(void *arg)
 
																 			}
															
 
																 		}
															
 
																+		/* In the case of combined workers, we need to inform the
															
 
																+		 * scheduler each worker's execution is over.
															
 
																+		 * Then we free the workers' task alias */
															
 
																+		if (is_parallel_task)
															
 
																+		{
															
 
																+			_starpu_sched_post_exec_hook(task);
															
 
																+			free(task);
															
 
																+		}
															
 
																+
															
 
																 		if (rank == 0)
															
 
																 			_starpu_handle_job_termination(j, workerid);
															
 
																         }
															
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -39,7 +39,10 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 
																 	if (cl->model && cl->model->benchmarking)
															
 
																 		calibrate_model = 1;
															
 
																-	if (rank == 0)
															
 
																+	/* If the job is executed on a combined worker there is no need for the
															
 
																+	 * scheduler to process it : it doesn't contain any valuable data
															
 
																+	 * as it's not linked to an actual worker */
															
 
																+	if (j->task_size == 1)
															
 
																 		_starpu_sched_pre_exec_hook(task);
															
 
																 	args->status = STATUS_EXECUTING;
															
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -58,7 +58,9 @@ static void limit_gpu_mem_if_needed(int devid)
 
																 	/* Request the size of the current device's memory */
															
 
																 	cl_ulong totalGlobalMem;
															
 
																-	clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
															
 
																+	err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	/* How much memory to waste ? */
															
 
																 	size_t to_waste = (size_t)totalGlobalMem - (size_t)limit*1024*1024;
															
@@ -76,7 +78,9 @@ static void unlimit_gpu_mem_if_needed(int devid)
 
																 {
															
 
																 	if (wasted_memory[devid])
															
 
																 	{
															
 
																-		clReleaseMemObject(wasted_memory[devid]);
															
 
																+		cl_int err = clReleaseMemObject(wasted_memory[devid]);
															
 
																+		if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 		wasted_memory[devid] = NULL;
															
 
																 	}
															
 
																 }
															
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -353,6 +353,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 			{
															
 
																 				ntasks_best_end = ntasks_end;
															
 
																 				ntasks_best = worker;
															
 
																+				nimpl_best = nimpl;
															
 
																 			}
															
 
																 			if (isnan(local_task_length[worker_ctx][nimpl]))
															
@@ -417,13 +418,11 @@ static int push_conversion_tasks(struct starpu_task *task, unsigned int workerid
 
																 		conversion_task->execute_on_a_specific_worker = 1;
															
 
																 		conversion_task->workerid = workerid;
															
 
																 		conversion_task->mf_skip = 1;
															
 
																+		handle->mf_node = node;
															
 
																 		ret = _starpu_task_submit_conversion_task(conversion_task, workerid);
															
 
																 		STARPU_ASSERT(ret == 0);
															
 
																 	}
															
 
																-	for (i = 0; i < task->cl->nbuffers; i++)
															
 
																-		task->handles[i]->mf_node = node;
															
 
																-
															
 
																 	task->execute_on_a_specific_worker = 1;
															
 
																 	task->workerid = workerid;
															
 
																 	task->mf_skip= 1;
															
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -130,6 +130,11 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 		j->task_size = worker_size;
															
 
																 		j->combined_workerid = best_workerid;
															
 
																 		j->active_task_alias_count = 0;
															
 
																+
															
 
																+		/* This task doesn't belong to an actual worker, it belongs
															
 
																+		 * to a combined worker and thus the scheduler doesn't care
															
 
																+		 * of its predicted values which are insignificant */
															
 
																+		task->predicted = 0;
															
 
																 		task->predicted_transfer = 0;
															
 
																 		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
@@ -368,16 +373,11 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
																 		{
															
 
																 			worker = workers->has_next(workers) ? workers->get_next(workers) : worker_ctx;
															
 
																-			unsigned incremented = 0;
															
 
																 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 			{
															
 
																 				if (skip_worker[worker_ctx][nimpl])
															
 
																 				{
															
 
																 					/* no one on that queue may execute this task */
															
 
																-					if(!incremented)
															
 
																-						worker_ctx++;
															
 
																-
															
 
																-					incremented = 1;
															
 
																 					continue;
															
 
																 				}
															
@@ -402,7 +402,6 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
																 			//	fprintf(stderr, "FITNESS worker %d -> %e local_exp_end %e - local_data_penalty %e\n", worker, fitness[worker][nimpl], local_exp_end[worker][nimpl] - best_exp_end, local_data_penalty[worker][nimpl]);
															
 
																 			}
															
 
																-			if(!incremented)
															
 
																 				worker_ctx++;
															
 
																 		}
															
 
																 	}
															
--- a/tests/datawizard/handle_to_pointer.c
+++ b/tests/datawizard/handle_to_pointer.c
@@ -80,7 +80,7 @@ static void opencl_task(void *buffers[], void *args)
 
																 				NULL,           /* event_wait_list */
															
 
																 				NULL            /* event */);
															
 
																 	}
															
 
																-			
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 #endif
															
@@ -146,12 +146,24 @@ int main(int argc, char *argv[])
 
																 	starpu_data_acquire(handle, STARPU_R);
															
 
																 	/* Make sure we have a local pointer to it.  */
															
 
																+	ret = EXIT_SUCCESS;
															
 
																 	pointer = (int *) starpu_handle_to_pointer(handle, 0);
															
 
																-	STARPU_ASSERT(pointer != NULL);
															
 
																-	for(i = 0; i < count; i++)
															
 
																+	if (pointer == NULL)
															
 
																 	{
															
 
																-		int *numbers = (int *)pointer;
															
 
																-		STARPU_ASSERT(numbers[i] == i);
															
 
																+	     FPRINTF(stderr, "pointer should be non NULL\n");
															
 
																+	     ret = EXIT_FAILURE;
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+	     for(i = 0; i < count; i++)
															
 
																+	     {
															
 
																+		  int *numbers = (int *)pointer;
															
 
																+		  if (numbers[i] != i)
															
 
																+		  {
															
 
																+		       FPRINTF(stderr, "Incorrect value numbers[%d] == %d should be %d\n", i, numbers[i], i);
															
 
																+		       ret = EXIT_FAILURE;
															
 
																+		  }
															
 
																+	     }
															
 
																 	}
															
 
																 	starpu_data_release(handle);
															
@@ -159,5 +171,5 @@ int main(int argc, char *argv[])
 
																 	starpu_shutdown();
															
 
																-	return EXIT_SUCCESS;
															
 
																+	return ret;
															
 
																 }
															
--- a/tests/datawizard/increment_redux_lazy.c
+++ b/tests/datawizard/increment_redux_lazy.c
@@ -79,6 +79,7 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 
																 	h_dst += h_src;
															
 
																 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 static void neutral_opencl_kernel(void *descr[], void *arg)
															
@@ -90,6 +91,7 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 
																 	starpu_opencl_get_current_queue(&queue);
															
 
																 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 #endif
															
@@ -151,6 +153,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 	h_token++;
															
 
																 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 #endif
															
--- a/tests/datawizard/interfaces/test_interfaces.c
+++ b/tests/datawizard/interfaces/test_interfaces.c
@@ -589,7 +589,7 @@ out:
 
																 static void
															
 
																 run_async(void)
															
 
																 {
															
 
																-	int async = starpu_disable_asynchronous_copy();
															
 
																+	int async = starpu_asynchronous_copy_disabled();
															
 
																 	if (async == 1) {
															
 
																 		FPRINTF(stderr, "Asynchronous copies have been disabled\n");
															
 
																 		return;
															
--- a/tests/datawizard/readonly.c
+++ b/tests/datawizard/readonly.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
																      if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																      STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																-     int copy = starpu_disable_asynchronous_copy();
															
 
																+     int copy = starpu_asynchronous_copy_disabled();
															
 
																      FPRINTF(stderr, "copy %d\n", copy);
															
 
																      starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(var));
															
--- a/tests/datawizard/wt_broadcast.c
+++ b/tests/datawizard/wt_broadcast.c
@@ -46,6 +46,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 	h_token++;
															
 
																 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 #endif
															
--- a/tests/datawizard/wt_host.c
+++ b/tests/datawizard/wt_host.c
@@ -46,6 +46,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 	h_token++;
															
 
																 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																+	clFinish(queue);
															
 
																 }
															
 
																 #endif