Browse Source

new merge

Andra Hugo 13 years ago
parent
commit
9bb769dbf0

+ 1 - 1
doc/chapters/basic-api.texi

@@ -118,7 +118,7 @@ This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environ
 @item @code{int disable_asynchronous_copy} (default = 0)
 @item @code{int disable_asynchronous_copy} (default = 0)
 This flag should be set to 1 to disable asynchronous copies between
 This flag should be set to 1 to disable asynchronous copies between
 CPUs and accelerators. This can also be specified with the
 CPUs and accelerators. This can also be specified with the
-@code{STARPU_DISABLE_ASYNCHRONOUS_COPY} environment variable.
+@code{DISABLE_STARPU_ASYNCHRONOUS_COPY} environment variable.
 The AMD implementation of OpenCL is known to
 The AMD implementation of OpenCL is known to
 fail when copying data asynchronously. When using this implementation,
 fail when copying data asynchronously. When using this implementation,
 it is therefore necessary to disable asynchronous data transfers.
 it is therefore necessary to disable asynchronous data transfers.

+ 10 - 0
examples/Makefile.am

@@ -38,6 +38,7 @@ EXTRA_DIST = 					\
 	basic_examples/multiformat_opencl_kernel.cl  \
 	basic_examples/multiformat_opencl_kernel.cl  \
 	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \
 	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \
 	common/blas_model.c			\
 	common/blas_model.c			\
+	spmd/vector_scal_spmd.c			\
 	spmv/spmv_cuda.cu			\
 	spmv/spmv_cuda.cu			\
 	spmv/spmv_opencl.cl			\
 	spmv/spmv_opencl.cl			\
 	gordon/null_kernel_gordon.c		\
 	gordon/null_kernel_gordon.c		\
@@ -184,6 +185,7 @@ examplebin_PROGRAMS +=				\
 	tag_example/tag_example3		\
 	tag_example/tag_example3		\
 	tag_example/tag_example4		\
 	tag_example/tag_example4		\
 	tag_example/tag_restartable		\
 	tag_example/tag_restartable		\
+	spmd/vector_scal_spmd			\
 	spmv/spmv				\
 	spmv/spmv				\
 	callback/callback			\
 	callback/callback			\
 	incrementer/incrementer			\
 	incrementer/incrementer			\
@@ -248,6 +250,7 @@ STARPU_EXAMPLES +=				\
 	tag_example/tag_example3		\
 	tag_example/tag_example3		\
 	tag_example/tag_example4		\
 	tag_example/tag_example4		\
 	tag_example/tag_restartable		\
 	tag_example/tag_restartable		\
+	spmd/vector_scal_spmd			\
 	spmv/spmv				\
 	spmv/spmv				\
 	callback/callback			\
 	callback/callback			\
 	incrementer/incrementer			\
 	incrementer/incrementer			\
@@ -666,6 +669,13 @@ cg_cg_LDADD =					\
 endif
 endif
 
 
 ################
 ################
+# SPMD example #
+################
+
+spmd_vector_scal_spmd_SOURCES =			\
+	spmd/vector_scal_spmd.c
+
+################
 # SpMV example #
 # SpMV example #
 ################
 ################
 
 

+ 1 - 0
examples/axpy/axpy.c

@@ -21,6 +21,7 @@
 #include <stdio.h>
 #include <stdio.h>
 #include <assert.h>
 #include <assert.h>
 #include <sys/time.h>
 #include <sys/time.h>
+#include <math.h>
 
 
 #include <common/blas.h>
 #include <common/blas.h>
 
 

+ 5 - 5
examples/basic_examples/block_opencl.c

@@ -36,11 +36,11 @@ void opencl_codelet(void *descr[], void *_args)
 	cl_event event;
 	cl_event event;
 	int id, devid, err;
 	int id, devid, err;
 	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]);
 	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]);
-	uint32_t nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
-	uint32_t ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
-	uint32_t nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
-        uint32_t ldy = STARPU_BLOCK_GET_LDY(descr[0]);
-        uint32_t ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
+	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
+	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
+	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
+        int ldy = (int)STARPU_BLOCK_GET_LDY(descr[0]);
+        int ldz = (int) STARPU_BLOCK_GET_LDZ(descr[0]);
         float *multiplier = (float *)_args;
         float *multiplier = (float *)_args;
 
 
         id = starpu_worker_get_id();
         id = starpu_worker_get_id();

+ 1 - 3
examples/basic_examples/block_opencl_kernel.cl

@@ -14,9 +14,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include <stdint.h>
-
-__kernel void block(__global float *b, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t ldy, uint32_t ldz, float multiplier)
+__kernel void block(__global float *b, int nx, int ny, int nz, int ldy, int ldz, float multiplier)
 {
 {
      const int i = get_global_id(0);
      const int i = get_global_id(0);
      if (i < (nz*ldz)+(ny*ldy)+nx)
      if (i < (nz*ldz)+(ny*ldy)+nx)

+ 146 - 0
examples/spmd/vector_scal_spmd.c

@@ -0,0 +1,146 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* gcc build:
+
+   gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)
+
+ */
+
+#include <starpu.h>
+#include <stdio.h>
+#include <limits.h>
+
+#define MIN(a,b)        ((a)<(b)?(a):(b))
+
+#define	NX	2048000
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+static int get_first_element_rank(int nel, int rank, int nb_workers)
+{
+	if(rank == 0)
+		return 0;
+
+	/* We get the  number of bigger parts which stand before the part */
+	int nb_big_parts = MIN(nel % nb_workers, rank);
+
+	return nb_big_parts * (nel / nb_workers + 1) + (rank - nb_big_parts) * (nel / nb_workers);
+}
+
+void scal_cpu_func(void *buffers[], void *_args)
+{
+	unsigned i;
+	float *factor = _args, f = *factor;
+	struct starpu_vector_interface *vector = buffers[0];
+	unsigned n = STARPU_VECTOR_GET_NX(vector);
+	float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
+
+	int nb_workers = starpu_combined_worker_get_size();
+	int rank = starpu_combined_worker_get_rank();
+
+	if (rank == 0)
+		FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size());
+
+	/* We add 1 to the (nel_total % nb_workers) first workers, thus we get an evenly splitted data. */
+	int nel_worker = (n / nb_workers) + ((rank < (n % nb_workers)) ? 1 : 0);
+
+	int begin = get_first_element_rank(n, rank, nb_workers);
+
+
+	for (i = 0; i < nel_worker; i++) {
+		int rank = i + begin;
+
+		float v = val[rank];
+		int j;
+		for (j = 0; j < 100; j++)
+			v = v * f;
+		val[rank] = v;
+	}
+}
+
+static struct starpu_perfmodel vector_scal_model =
+{
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "vector_scale_parallel"
+};
+
+static struct starpu_codelet cl =
+{
+	.modes = { STARPU_RW },
+	.where = STARPU_CPU,
+	.type = STARPU_SPMD,
+	.max_parallelism = INT_MAX,
+	.cpu_funcs = {scal_cpu_func, NULL},
+	.nbuffers = 1,
+	.model = &vector_scal_model,
+};
+
+int main(int argc, char **argv)
+{
+	struct starpu_conf conf;
+	float *vector;
+	unsigned i;
+	int ret;
+
+	vector = malloc(NX*sizeof(*vector));
+
+	for (i = 0; i < NX; i++)
+		vector[i] = (i+1.0f);
+
+	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
+	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
+
+	starpu_conf_init(&conf);
+
+	conf.single_combined_worker = 1;
+	conf.sched_policy_name = "pheft";
+
+	ret = starpu_init(&conf);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_data_handle_t vector_handle;
+	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+
+	float factor = 1.001;
+
+	for (i = 0; i < 100; i++) {
+		struct starpu_task *task = starpu_task_create();
+
+		task->cl = &cl;
+
+		task->handles[0] = vector_handle;
+		task->cl_arg = &factor;
+		task->cl_arg_size = sizeof(factor);
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) {
+		     ret = 77;
+		     break;
+		}
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	starpu_data_unregister(vector_handle);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
+	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
+
+	return ret;
+}

+ 8 - 8
examples/spmv/spmv_kernels.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,18 +28,18 @@ void spmv_kernel_opencl(void *descr[], void *args)
 	cl_event event;
 	cl_event event;
 	int id, devid, err, n;
 	int id, devid, err, n;
 
 
-	uint32_t nnz = STARPU_CSR_GET_NNZ(descr[0]);
-	uint32_t nrow = STARPU_CSR_GET_NROW(descr[0]);
+	int nnz = (int) STARPU_CSR_GET_NNZ(descr[0]);
+	int nrow = (int) STARPU_CSR_GET_NROW(descr[0]);
 	cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(descr[0]);
 	cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(descr[0]);
-	uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]);
-	uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]);
-	uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]);
+	cl_mem colind = (cl_mem)STARPU_CSR_GET_COLIND(descr[0]);
+	cl_mem rowptr = (cl_mem)STARPU_CSR_GET_ROWPTR(descr[0]);
+	int firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]);
 
 
 	cl_mem vecin = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]);
 	cl_mem vecin = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]);
-	uint32_t nx_in = STARPU_VECTOR_GET_NX(descr[1]);
+	int nx_in = (int)STARPU_VECTOR_GET_NX(descr[1]);
 
 
 	cl_mem vecout = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]);
 	cl_mem vecout = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]);
-	uint32_t nx_out = STARPU_VECTOR_GET_NX(descr[2]);
+	int nx_out = (int)STARPU_VECTOR_GET_NX(descr[2]);
 
 
         id = starpu_worker_get_id();
         id = starpu_worker_get_id();
         devid = starpu_worker_get_devid(id);
         devid = starpu_worker_get_devid(id);

+ 6 - 8
examples/spmv/spmv_opencl.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -14,15 +14,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-__kernel void spmv(unsigned nnz, unsigned nrow,
+__kernel void spmv(int nnz, int nrow,
                    __global float* nzval, __global unsigned* colind,
                    __global float* nzval, __global unsigned* colind,
-                   __global unsigned* rowptr, unsigned firstentry,
-                   __global float *vecin, unsigned nx_in,
-                   __global float *vecout, unsigned nx_out)
+                   __global unsigned* rowptr, int firstentry,
+                   __global float *vecin, int nx_in,
+                   __global float *vecout, int nx_out)
 {
 {
-	unsigned row;
-	// for (row = 0; row < nrow; row++)
-	row = get_global_id(0);
+	const int row = get_global_id(0);
 	if (row < nrow)
 	if (row < nrow)
 	{
 	{
 		float tmp = 0.0f;
 		float tmp = 0.0f;

+ 1 - 1
include/starpu.h

@@ -107,7 +107,7 @@ unsigned starpu_cuda_worker_get_count(void);
 unsigned starpu_spu_worker_get_count(void);
 unsigned starpu_spu_worker_get_count(void);
 unsigned starpu_opencl_worker_get_count(void);
 unsigned starpu_opencl_worker_get_count(void);
 
 
-int starpu_disable_asynchronous_copy();
+int starpu_asynchronous_copy_disabled();
 
 
 /* Return the identifier of the thread in case this is associated to a worker.
 /* Return the identifier of the thread in case this is associated to a worker.
  * This will return -1 if this function is called directly from the application
  * This will return -1 if this function is called directly from the application

+ 24 - 24
socl/examples/clinfo/clinfo.c

@@ -20,7 +20,7 @@
 
 
 #include <CL/cl.h>
 #include <CL/cl.h>
 
 
-static inline void 
+static inline void
 checkErr(cl_int err, const char * name) {
 checkErr(cl_int err, const char * name) {
     if (err != CL_SUCCESS) {
     if (err != CL_SUCCESS) {
         fprintf(stderr, "ERROR: %s (%d)\n", name, err);
         fprintf(stderr, "ERROR: %s (%d)\n", name, err);
@@ -45,8 +45,8 @@ main(void) {
    platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms);
    platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms);
    err = clGetPlatformIDs(num_platforms, platforms, NULL);
    err = clGetPlatformIDs(num_platforms, platforms, NULL);
    checkErr(err, "Unable to get platform list");
    checkErr(err, "Unable to get platform list");
-   
-   
+
+
    // Iteratate over platforms
    // Iteratate over platforms
    printf("Number of platforms:\t\t\t\t %d\n", num_platforms);
    printf("Number of platforms:\t\t\t\t %d\n", num_platforms);
 
 
@@ -54,25 +54,25 @@ main(void) {
       unsigned int i;
       unsigned int i;
       for (i=0; i<num_platforms; i++) {
       for (i=0; i<num_platforms; i++) {
          char str[256];
          char str[256];
-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, sizeof(str), &str, NULL);
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, sizeof(str), str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_PROFILE)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_PROFILE)");
-         printf("  Plaform Profile:\t\t\t\t %s\n", str);    
+         printf("  Plaform Profile:\t\t\t\t %s\n", str);
 
 
-         err= clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), &str, NULL);
+         err= clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VERSION)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VERSION)");
-         printf("  Plaform Version:\t\t\t\t %s\n", str);    
+         printf("  Plaform Version:\t\t\t\t %s\n", str);
 
 
-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
-         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);
 
 
-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(str), &str, NULL);
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(str), str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VENDOR)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VENDOR)");
-         printf("  Plaform Vendor:\t\t\t\t %s\n", str);    
+         printf("  Plaform Vendor:\t\t\t\t %s\n", str);
 
 
-         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(str), &str, NULL);
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(str), str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_EXTENSIONS)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_EXTENSIONS)");
-         printf("  Plaform Extensions:\t\t\t %s\n", str);    
+         printf("  Plaform Extensions:\t\t\t %s\n", str);
       }
       }
    }
    }
 
 
@@ -88,12 +88,12 @@ main(void) {
 
 
          err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
          err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
          checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
-         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);
 
 
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
          devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
          devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
-         
+
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
          err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
          checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
 
 
@@ -103,7 +103,7 @@ main(void) {
             for (j=0; j<num_devices; j++) {
             for (j=0; j<num_devices; j++) {
                cl_device_type dev_type;
                cl_device_type dev_type;
                printf("\n  DEVICE %d\n", j);
                printf("\n  DEVICE %d\n", j);
-               
+
                err = clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
                err = clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
                checkErr(err, "clGetDeviceInfo(CL_DEVICE_TYPE)");
                checkErr(err, "clGetDeviceInfo(CL_DEVICE_TYPE)");
 
 
@@ -123,13 +123,13 @@ main(void) {
                   cl_uint vendor_id;
                   cl_uint vendor_id;
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_VENDOR_ID)");
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_VENDOR_ID)");
-                  printf("  Device ID:\t\t\t\t\t %d\n", vendor_id); 
+                  printf("  Device ID:\t\t\t\t\t %d\n", vendor_id);
                }
                }
                {
                {
                   cl_uint units;
                   cl_uint units;
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS)");
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS)");
-                  printf("  Max compute units:\t\t\t\t %d\n", units); 
+                  printf("  Max compute units:\t\t\t\t %d\n", units);
                }
                }
 
 
                {
                {
@@ -137,12 +137,12 @@ main(void) {
                   size_t *sizes;
                   size_t *sizes;
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(dims), &dims, NULL);
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(dims), &dims, NULL);
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)");
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)");
-                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
+                  printf("  Max work item dimensions:\t\t\t %d\n", dims);
 
 
                   sizes = (size_t*)malloc(dims * sizeof(size_t));
                   sizes = (size_t*)malloc(dims * sizeof(size_t));
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*dims, sizes, NULL);
                   err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*dims, sizes, NULL);
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES)");
                   checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES)");
-                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
+                  printf("  Max work item dimensions:\t\t\t %d\n", dims);
 
 
                   {
                   {
                      unsigned int k;
                      unsigned int k;
@@ -165,7 +165,7 @@ main(void) {
 
 
 #define GET_STRING(CL_D,str,size) { \
 #define GET_STRING(CL_D,str,size) { \
    char val[size]; \
    char val[size]; \
-   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), val, NULL);	\
    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
    printf(str, val); \
    printf(str, val); \
 }
 }
@@ -204,9 +204,9 @@ main(void) {
    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
    checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
    printf(str, ((val & test) == CL_TRUE ? "Yes" : "No")); \
    printf(str, ((val & test) == CL_TRUE ? "Yes" : "No")); \
 }
 }
-      
+
                GET_SIZET(CL_DEVICE_MAX_WORK_GROUP_SIZE, "  Max work group size:\t\t\t\t %u\n")
                GET_SIZET(CL_DEVICE_MAX_WORK_GROUP_SIZE, "  Max work group size:\t\t\t\t %u\n")
-               
+
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "  Preferred vector width char:\t\t\t %u\n")
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "  Preferred vector width char:\t\t\t %u\n")
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "  Preferred vector width short:\t\t\t %u\n")
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "  Preferred vector width short:\t\t\t %u\n")
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "  Preferred vector width int:\t\t\t %u\n")
                GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "  Preferred vector width int:\t\t\t %u\n")
@@ -291,7 +291,7 @@ main(void) {
                GET_STRING(CL_DEVICE_PROFILE, "  Profile:\t\t\t\t\t %s\n", 30);
                GET_STRING(CL_DEVICE_PROFILE, "  Profile:\t\t\t\t\t %s\n", 30);
                GET_STRING(CL_DEVICE_VERSION, "  Version:\t\t\t\t\t %s\n", 50);
                GET_STRING(CL_DEVICE_VERSION, "  Version:\t\t\t\t\t %s\n", 50);
                GET_STRING(CL_DEVICE_EXTENSIONS, "  Extensions:\t\t\t\t\t %s\n", 4096);
                GET_STRING(CL_DEVICE_EXTENSIONS, "  Extensions:\t\t\t\t\t %s\n", 4096);
-            
+
                printf("\n");
                printf("\n");
             }
             }
          }
          }

+ 6 - 6
socl/src/cl_getdeviceinfo.c

@@ -79,12 +79,12 @@ soclGetDeviceInfo(cl_device_id    device,
          cl_platform_id p = &socl_platform;
          cl_platform_id p = &socl_platform;
          INFO_CASE_EX2(p);
          INFO_CASE_EX2(p);
       }
       }
-      INFO_CASE(CL_DEVICE_NAME, device->name)
-      INFO_CASE(CL_DEVICE_VENDOR, SOCL_VENDOR)
-      INFO_CASE(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
-      INFO_CASE(CL_DEVICE_PROFILE, SOCL_PROFILE)
-      INFO_CASE(CL_DEVICE_VERSION, SOCL_VERSION)
-      INFO_CASE(CL_DEVICE_EXTENSIONS, device->extensions)
+      INFO_CASE_STRING(CL_DEVICE_NAME, device->name)
+      INFO_CASE_STRING(CL_DEVICE_VENDOR, SOCL_VENDOR)
+      INFO_CASE_STRING(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
+      INFO_CASE_STRING(CL_DEVICE_PROFILE, SOCL_PROFILE)
+      INFO_CASE_STRING(CL_DEVICE_VERSION, SOCL_VERSION)
+      INFO_CASE_STRING(CL_DEVICE_EXTENSIONS, device->extensions)
       default:
       default:
          return CL_INVALID_VALUE;
          return CL_INVALID_VALUE;
    }
    }

+ 9 - 8
socl/src/cl_getplatforminfo.c

@@ -30,15 +30,16 @@ soclGetPlatformInfo(cl_platform_id   platform,
                   size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
                   size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
 {
 {
    if (platform != NULL && platform != &socl_platform)
    if (platform != NULL && platform != &socl_platform)
-      return CL_INVALID_PLATFORM;
+	return CL_INVALID_PLATFORM;
 
 
-   switch (param_name) {
-      INFO_CASE(CL_PLATFORM_PROFILE, SOCL_PROFILE)
-      INFO_CASE(CL_PLATFORM_VERSION, SOCL_VERSION)
-      INFO_CASE(CL_PLATFORM_NAME,    SOCL_PLATFORM_NAME)
-      INFO_CASE(CL_PLATFORM_VENDOR,  SOCL_VENDOR)
-      INFO_CASE(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS)
-      default:
+   switch (param_name)
+   {
+	INFO_CASE_STRING(CL_PLATFORM_PROFILE, SOCL_PROFILE);
+	INFO_CASE_STRING(CL_PLATFORM_VERSION, SOCL_VERSION);
+	INFO_CASE_STRING(CL_PLATFORM_NAME,    SOCL_PLATFORM_NAME);
+	INFO_CASE_STRING(CL_PLATFORM_VENDOR,  SOCL_VENDOR);
+	INFO_CASE_STRING(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS);
+   default:
          return CL_INVALID_VALUE;
          return CL_INVALID_VALUE;
    }
    }
 
 

+ 12 - 0
socl/src/getinfo.h

@@ -29,6 +29,18 @@
 #define INFO_CASE(param, var) case param: \
 #define INFO_CASE(param, var) case param: \
    INFO_CASE_EX2(var)
    INFO_CASE_EX2(var)
 
 
+#define INFO_CASE_STRING_EX2(var) if (param_value != NULL) { \
+      if (param_value_size < strlen(var)) \
+         return CL_INVALID_VALUE; \
+      strcpy(param_value, var); \
+   } \
+   if (param_value_size_ret != NULL) \
+      *param_value_size_ret = strlen(var); \
+   break;
+
+#define INFO_CASE_STRING(param, var) case param: \
+   INFO_CASE_STRING_EX2(var)
+
 #define INFO_CASE_VALUE(param, type, value) case param: {\
 #define INFO_CASE_VALUE(param, type, value) case param: {\
       type tmp = (value);\
       type tmp = (value);\
       INFO_CASE_EX2(tmp);\
       INFO_CASE_EX2(tmp);\

+ 4 - 1
src/core/jobs.c

@@ -204,8 +204,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 			_starpu_clock_gettime(&task->profiling_info->callback_end_time);
 			_starpu_clock_gettime(&task->profiling_info->callback_end_time);
 	}
 	}
 
 
+	/* If the job was executed on a combined worker there is no need for the
+	 * scheduler to process it : the task structure doesn't contain any valuable
+	 * data as it's not linked to an actual worker */
 	/* control task should not execute post_exec_hook */
 	/* control task should not execute post_exec_hook */
-	if(task->cl != NULL && !task->control_task)
+	if(j->task_size == 1 && task->cl != NULL && !task->control_task)
 	{
 	{
 	  _starpu_sched_post_exec_hook(task);
 	  _starpu_sched_post_exec_hook(task);
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR

+ 5 - 5
src/core/workers.c

@@ -458,16 +458,16 @@ int starpu_init(struct starpu_conf *user_conf)
 
 
 	if (user_conf)
 	if (user_conf)
 	{
 	{
-	     int disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
-	     if (disable_asynchronous_copy == 1)
+	     int asynchronous_copy_disabled = starpu_get_env_number("DISABLE_STARPU_ASYNCHRONOUS_COPY");
+	     if (asynchronous_copy_disabled == 1)
 		  config.disable_asynchronous_copy = 1;
 		  config.disable_asynchronous_copy = 1;
 	     else
 	     else
 		  config.disable_asynchronous_copy = (user_conf->disable_asynchronous_copy == 1);
 		  config.disable_asynchronous_copy = (user_conf->disable_asynchronous_copy == 1);
 	}
 	}
 	else
 	else
 	{
 	{
-	     int disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
-	     config.disable_asynchronous_copy = (disable_asynchronous_copy == 1);
+	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
+	     config.disable_asynchronous_copy = (asynchronous_copy_disabled == 1);
 	}
 	}
 
 
 	_starpu_init_all_sched_ctxs(&config);
 	_starpu_init_all_sched_ctxs(&config);
@@ -721,7 +721,7 @@ unsigned starpu_spu_worker_get_count(void)
 	return config.topology.ngordon_spus;
 	return config.topology.ngordon_spus;
 }
 }
 
 
-int starpu_disable_asynchronous_copy()
+int starpu_asynchronous_copy_disabled()
 {
 {
 	return config.disable_asynchronous_copy;
 	return config.disable_asynchronous_copy;
 }
 }

+ 2 - 2
src/datawizard/interfaces/data_interface.c

@@ -288,8 +288,8 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 	*handleptr = handle;
 	*handleptr = handle;
 	handle->mf_node = home_node;
 	handle->mf_node = home_node;
 
 
-	int disable_asynchronous_copy = starpu_disable_asynchronous_copy();
-	if (STARPU_UNLIKELY(disable_asynchronous_copy))
+	int asynchronous_copy_disabled = starpu_asynchronous_copy_disabled();
+	if (STARPU_UNLIKELY(asynchronous_copy_disabled))
 	{
 	{
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	     if (ops->copy_methods->ram_to_cuda_async)
 	     if (ops->copy_methods->ram_to_cuda_async)

+ 22 - 6
src/drivers/cpu/driver_cpu.c

@@ -28,9 +28,14 @@
 #include <core/sched_policy.h>
 #include <core/sched_policy.h>
 #include <core/sched_ctx.h>
 #include <core/sched_ctx.h>
 
 
-static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
+/* Actually launch the job on a cpu worker.
+ * Handle binding CPUs on cores.
+ * In the case of a combined worker WORKER_TASK != J->TASK */
+
+static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *cpu_args, int rank, enum starpu_perf_archtype perf_arch)
 {
 {
 	int ret;
 	int ret;
+	int is_parallel_task = (j->task_size > 1);
 	int profiling = starpu_profiling_status_get();
 	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start, codelet_end;
 
 
@@ -51,8 +56,14 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	}
 	}
 
 
 	if (is_parallel_task)
 	if (is_parallel_task)
+	{
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
 
 
+		/* In the case of a combined worker, the scheduler needs to know
+		 * when each actual worker begins the execution */
+		_starpu_sched_pre_exec_hook(worker_task);
+	}
+
 	/* Give profiling variable */
 	/* Give profiling variable */
 	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
 	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
 
 
@@ -167,10 +178,6 @@ void *_starpu_cpu_worker(void *arg)
 		/* Get the rank in case it is a parallel task */
 		/* Get the rank in case it is a parallel task */
 		if (is_parallel_task)
 		if (is_parallel_task)
 		{
 		{
-			/* We can release the fake task */
-			STARPU_ASSERT(task != j->task);
-			free(task);
-
 			_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 			_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 			rank = j->active_task_alias_count++;
 			rank = j->active_task_alias_count++;
 			_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 			_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
@@ -194,7 +201,7 @@ void *_starpu_cpu_worker(void *arg)
 		_starpu_set_current_task(j->task);
 		_starpu_set_current_task(j->task);
 		cpu_arg->current_task = j->task;
 		cpu_arg->current_task = j->task;
 
 
-                res = execute_job_on_cpu(j, cpu_arg, is_parallel_task, rank, perf_arch);
+                res = execute_job_on_cpu(j, task, cpu_arg, rank, perf_arch);
 
 
 		_starpu_set_current_task(NULL);
 		_starpu_set_current_task(NULL);
 		cpu_arg->current_task = NULL;
 		cpu_arg->current_task = NULL;
@@ -211,6 +218,15 @@ void *_starpu_cpu_worker(void *arg)
 			}
 			}
 		}
 		}
 
 
+		/* In the case of combined workers, we need to inform the
+		 * scheduler each worker's execution is over.
+		 * Then we free the workers' task alias */
+		if (is_parallel_task)
+		{
+			_starpu_sched_post_exec_hook(task);
+			free(task);
+		}
+
 		if (rank == 0)
 		if (rank == 0)
 			_starpu_handle_job_termination(j, workerid);
 			_starpu_handle_job_termination(j, workerid);
         }
         }

+ 4 - 1
src/drivers/driver_common/driver_common.c

@@ -39,7 +39,10 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	if (cl->model && cl->model->benchmarking)
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 		calibrate_model = 1;
 
 
-	if (rank == 0)
+	/* If the job is executed on a combined worker there is no need for the
+	 * scheduler to process it : it doesn't contain any valuable data
+	 * as it's not linked to an actual worker */
+	if (j->task_size == 1)
 		_starpu_sched_pre_exec_hook(task);
 		_starpu_sched_pre_exec_hook(task);
 
 
 	args->status = STATUS_EXECUTING;
 	args->status = STATUS_EXECUTING;

+ 6 - 2
src/drivers/opencl/driver_opencl.c

@@ -58,7 +58,9 @@ static void limit_gpu_mem_if_needed(int devid)
 
 
 	/* Request the size of the current device's memory */
 	/* Request the size of the current device's memory */
 	cl_ulong totalGlobalMem;
 	cl_ulong totalGlobalMem;
-	clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
+	err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
 
 
 	/* How much memory to waste ? */
 	/* How much memory to waste ? */
 	size_t to_waste = (size_t)totalGlobalMem - (size_t)limit*1024*1024;
 	size_t to_waste = (size_t)totalGlobalMem - (size_t)limit*1024*1024;
@@ -76,7 +78,9 @@ static void unlimit_gpu_mem_if_needed(int devid)
 {
 {
 	if (wasted_memory[devid])
 	if (wasted_memory[devid])
 	{
 	{
-		clReleaseMemObject(wasted_memory[devid]);
+		cl_int err = clReleaseMemObject(wasted_memory[devid]);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
 		wasted_memory[devid] = NULL;
 		wasted_memory[devid] = NULL;
 	}
 	}
 }
 }

+ 2 - 3
src/sched_policies/heft.c

@@ -353,6 +353,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			{
 			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best_end = ntasks_end;
 				ntasks_best = worker;
 				ntasks_best = worker;
+				nimpl_best = nimpl;
 			}
 			}
 
 
 			if (isnan(local_task_length[worker_ctx][nimpl]))
 			if (isnan(local_task_length[worker_ctx][nimpl]))
@@ -417,13 +418,11 @@ static int push_conversion_tasks(struct starpu_task *task, unsigned int workerid
 		conversion_task->execute_on_a_specific_worker = 1;
 		conversion_task->execute_on_a_specific_worker = 1;
 		conversion_task->workerid = workerid;
 		conversion_task->workerid = workerid;
 		conversion_task->mf_skip = 1;
 		conversion_task->mf_skip = 1;
+		handle->mf_node = node;
 		ret = _starpu_task_submit_conversion_task(conversion_task, workerid);
 		ret = _starpu_task_submit_conversion_task(conversion_task, workerid);
 		STARPU_ASSERT(ret == 0);
 		STARPU_ASSERT(ret == 0);
 	}
 	}
 
 
-	for (i = 0; i < task->cl->nbuffers; i++)
-		task->handles[i]->mf_node = node;
-
 	task->execute_on_a_specific_worker = 1;
 	task->execute_on_a_specific_worker = 1;
 	task->workerid = workerid;
 	task->workerid = workerid;
 	task->mf_skip= 1;
 	task->mf_skip= 1;

+ 5 - 6
src/sched_policies/parallel_heft.c

@@ -130,6 +130,11 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		j->task_size = worker_size;
 		j->task_size = worker_size;
 		j->combined_workerid = best_workerid;
 		j->combined_workerid = best_workerid;
 		j->active_task_alias_count = 0;
 		j->active_task_alias_count = 0;
+
+		/* This task doesn't belong to an actual worker, it belongs
+		 * to a combined worker and thus the scheduler doesn't care
+		 * of its predicted values which are insignificant */
+		task->predicted = 0;
 		task->predicted_transfer = 0;
 		task->predicted_transfer = 0;
 
 
 		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
 		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
@@ -368,16 +373,11 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 		{
 		{
 			worker = workers->has_next(workers) ? workers->get_next(workers) : worker_ctx;
 			worker = workers->has_next(workers) ? workers->get_next(workers) : worker_ctx;
 
 
-			unsigned incremented = 0;
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 			{
 			{
 				if (skip_worker[worker_ctx][nimpl])
 				if (skip_worker[worker_ctx][nimpl])
 				{
 				{
 					/* no one on that queue may execute this task */
 					/* no one on that queue may execute this task */
-					if(!incremented)
-						worker_ctx++;
-
-					incremented = 1;
 					continue;
 					continue;
 				}
 				}
 
 
@@ -402,7 +402,6 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
 
 			//	fprintf(stderr, "FITNESS worker %d -> %e local_exp_end %e - local_data_penalty %e\n", worker, fitness[worker][nimpl], local_exp_end[worker][nimpl] - best_exp_end, local_data_penalty[worker][nimpl]);
 			//	fprintf(stderr, "FITNESS worker %d -> %e local_exp_end %e - local_data_penalty %e\n", worker, fitness[worker][nimpl], local_exp_end[worker][nimpl] - best_exp_end, local_data_penalty[worker][nimpl]);
 			}
 			}
-			if(!incremented)
 				worker_ctx++;
 				worker_ctx++;
 		}
 		}
 	}
 	}

+ 18 - 6
tests/datawizard/handle_to_pointer.c

@@ -80,7 +80,7 @@ static void opencl_task(void *buffers[], void *args)
 				NULL,           /* event_wait_list */
 				NULL,           /* event_wait_list */
 				NULL            /* event */);
 				NULL            /* event */);
 	}
 	}
-			
+	clFinish(queue);
 }
 }
 #endif
 #endif
 
 
@@ -146,12 +146,24 @@ int main(int argc, char *argv[])
 	starpu_data_acquire(handle, STARPU_R);
 	starpu_data_acquire(handle, STARPU_R);
 
 
 	/* Make sure we have a local pointer to it.  */
 	/* Make sure we have a local pointer to it.  */
+	ret = EXIT_SUCCESS;
 	pointer = (int *) starpu_handle_to_pointer(handle, 0);
 	pointer = (int *) starpu_handle_to_pointer(handle, 0);
-	STARPU_ASSERT(pointer != NULL);
-	for(i = 0; i < count; i++)
+	if (pointer == NULL)
 	{
 	{
-		int *numbers = (int *)pointer;
-		STARPU_ASSERT(numbers[i] == i);
+	     FPRINTF(stderr, "pointer should be non NULL\n");
+	     ret = EXIT_FAILURE;
+	}
+	else
+	{
+	     for(i = 0; i < count; i++)
+	     {
+		  int *numbers = (int *)pointer;
+		  if (numbers[i] != i)
+		  {
+		       FPRINTF(stderr, "Incorrect value numbers[%d] == %d should be %d\n", i, numbers[i], i);
+		       ret = EXIT_FAILURE;
+		  }
+	     }
 	}
 	}
 	starpu_data_release(handle);
 	starpu_data_release(handle);
 
 
@@ -159,5 +171,5 @@ int main(int argc, char *argv[])
 
 
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	return EXIT_SUCCESS;
+	return ret;
 }
 }

+ 3 - 0
tests/datawizard/increment_redux_lazy.c

@@ -79,6 +79,7 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 	h_dst += h_src;
 	h_dst += h_src;
 
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
+	clFinish(queue);
 }
 }
 
 
 static void neutral_opencl_kernel(void *descr[], void *arg)
 static void neutral_opencl_kernel(void *descr[], void *arg)
@@ -90,6 +91,7 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 	starpu_opencl_get_current_queue(&queue);
 	starpu_opencl_get_current_queue(&queue);
 
 
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
 	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
+	clFinish(queue);
 }
 }
 #endif
 #endif
 
 
@@ -151,6 +153,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
+	clFinish(queue);
 }
 }
 #endif
 #endif
 
 

+ 1 - 1
tests/datawizard/interfaces/test_interfaces.c

@@ -589,7 +589,7 @@ out:
 static void
 static void
 run_async(void)
 run_async(void)
 {
 {
-	int async = starpu_disable_asynchronous_copy();
+	int async = starpu_asynchronous_copy_disabled();
 	if (async == 1) {
 	if (async == 1) {
 		FPRINTF(stderr, "Asynchronous copies have been disabled\n");
 		FPRINTF(stderr, "Asynchronous copies have been disabled\n");
 		return;
 		return;

+ 1 - 1
tests/datawizard/readonly.c

@@ -45,7 +45,7 @@ int main(int argc, char **argv)
      if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
      if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
      STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
      STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
-     int copy = starpu_disable_asynchronous_copy();
+     int copy = starpu_asynchronous_copy_disabled();
      FPRINTF(stderr, "copy %d\n", copy);
      FPRINTF(stderr, "copy %d\n", copy);
 
 
      starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(var));
      starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(var));

+ 1 - 0
tests/datawizard/wt_broadcast.c

@@ -46,6 +46,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
+	clFinish(queue);
 }
 }
 #endif
 #endif
 
 

+ 1 - 0
tests/datawizard/wt_host.c

@@ -46,6 +46,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	h_token++;
 	h_token++;
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
 	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
+	clFinish(queue);
 }
 }
 #endif
 #endif