Browse Source

merge trunk@6701:6750

Nathalie Furmento 12 years ago
parent
commit
6a27a71c66

+ 2 - 0
ChangeLog

@@ -19,6 +19,8 @@ StarPU 1.1.0 (svn revision xxxx)
 
 New features:
   * OpenGL interoperability support.
+  * Capability to store compiled OpenCL kernels on the file system
+  * Capability to load compiled OpenCL kernels
 
 Changes:
   * The FxT code can now be used on systems other than Linux.

+ 1 - 1
configure.ac

@@ -1013,7 +1013,7 @@ AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of worker
 AC_MSG_CHECKING(maximum number of implementations)
 AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=<number>],
 		[maximum number of implementations])],
-		maximplementations=$enableval, maximplementations=4)
+		maximplementations=$enableval, maximplementations=8)
 AC_MSG_RESULT($maximplementations)
 AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations],
 		[maximum number of implementations])

+ 1 - 1
doc/chapters/advanced-api.texi

@@ -161,7 +161,7 @@ struct starpu_complex_interface
 @end cartouche
 
 Registering such a data to StarPU is easily done using the function
-@code{starpu_data_register} (@pxref{Basic Data Library API}). The last
+@code{starpu_data_register} (@pxref{Basic Data Management API}). The last
 parameter of the function, @code{interface_complex_ops}, will be
 described below.
 

+ 1 - 1
doc/chapters/advanced-examples.texi

@@ -235,7 +235,7 @@ starpu_data_partition(handle, &f);
 @end smallexample
 @end cartouche
 
-The task submission then uses @code{starpu_data_get_sub_data} to retrive the
+The task submission then uses @code{starpu_data_get_sub_data} to retrieve the
 sub-handles to be passed as tasks parameters.
 
 @cartouche

+ 78 - 14
doc/chapters/basic-api.texi

@@ -9,7 +9,7 @@
 @menu
 * Initialization and Termination::  Initialization and Termination methods
 * Workers' Properties::         Methods to enumerate workers' properties
-* Data Library::                Methods to manipulate data
+* Data Management::                Methods to manipulate data
 * Data Interfaces::
 * Data Partition::
 * Codelets and Tasks::          Methods to construct tasks
@@ -298,12 +298,12 @@ this function should be used in the allocation function to determine
 on which device the memory needs to be allocated.
 @end deftypefun
 
-@node Data Library
-@section Data Library
+@node Data Management
+@section Data Management
 
 @menu
-* Introduction to Data Library::
-* Basic Data Library API::
+* Introduction to Data Management::
+* Basic Data Management API::
 * Access registered data from the application::
 @end menu
 
@@ -312,7 +312,7 @@ This section describes the data management facilities provided by StarPU.
 We show how to use existing data interfaces in @ref{Data Interfaces}, but developers can
 design their own data interfaces if required.
 
-@node Introduction to Data Library
+@node Introduction to Data Management
 @subsection Introduction
 Data management is done at a high-level in StarPU: rather than accessing a mere
 list of contiguous buffers, the tasks may manipulate data that are described by
@@ -340,8 +340,8 @@ to StarPU, the specified memory node indicates where the piece of data
 initially resides (we also call this memory node the home node of a piece of
 data).
 
-@node Basic Data Library API
-@subsection Basic Data Library API
+@node Basic Data Management API
+@subsection Basic Data Management API
 
 @deftypefun int starpu_malloc (void **@var{A}, size_t @var{dim})
 This function allocates data of the given size in main memory. It will also try to pin it in
@@ -612,12 +612,12 @@ starpu_block_data_register(&block_handle, 0, (uintptr_t)block,
 @deftypefun void starpu_bcsr_data_register (starpu_data_handle_t *@var{handle}, uint32_t @var{home_node}, uint32_t @var{nnz}, uint32_t @var{nrow}, uintptr_t @var{nzval}, uint32_t *@var{colind}, uint32_t *@var{rowptr}, uint32_t @var{firstentry}, uint32_t @var{r}, uint32_t @var{c}, size_t @var{elemsize})
 This variant of @code{starpu_data_register} uses the BCSR (Blocked
 Compressed Sparse Row Representation) sparse matrix interface.
-Register the sparse matrix made of @var{nnz} non-zero values of size
+Register the sparse matrix made of @var{nnz} non-zero blocks of elements of size
 @var{elemsize} stored in @var{nzval} and initializes @var{handle} to represent
 it. Blocks have size @var{r} * @var{c}. @var{nrow} is the number of rows (in
-terms of blocks), @var{colind} is the list of positions of the non-zero entries
-on the row, @var{rowptr} is the index (in nzval) of the first entry of the row.
-@var{fristentry} is the index of the first entry of the given arrays (usually 0
+terms of blocks), @var{colind[i]} is the block-column index for block @code{i}
+in @code{nzval}, @var{rowptr[i]} is the block-index (in nzval) of the first block of row @code{i}.
+@var{firstentry} is the index of the first entry of the given arrays (usually 0
 or 1).
 @end deftypefun
 
@@ -1148,6 +1148,18 @@ vector represented by @var{father_interface} once partitioned in
 @var{nparts} chunks of equal size.
 @end deftypefun
 
+@deftypefun void starpu_block_shadow_filter_func_vector (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
+Return in @code{*@var{child_interface}} the @var{id}th element of the
+vector represented by @var{father_interface} once partitioned in
+@var{nparts} chunks of equal size with a shadow border @code{filter_arg_ptr}
+
+The @code{filter_arg_ptr} field must be the shadow size casted into @code{void*}.
+
+IMPORTANT: This can only be used for read-only access, as no coherency is
+enforced for the shadowed parts.
+
+A usage example is available in examples/filters/shadow.c
+@end deftypefun
 
 @deftypefun void starpu_vector_list_filter_func (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
 Return in @code{*@var{child_interface}} the @var{id}th element of the
@@ -2245,8 +2257,23 @@ Return the computation kernel command queue of the current worker.
 Sets the arguments of a given kernel. The list of arguments must be given as
 (size_t @var{size_of_the_argument}, cl_mem * @var{pointer_to_the_argument}).
 The last argument must be 0. Returns the number of arguments that were
-successfully set. In case of failure, @var{err} is set to the error returned by
-OpenCL.
+successfully set. In case of failure, returns the id of the argument
+that could not be set and @var{err} is set to the error returned by
+OpenCL. Otherwise, returns the number of arguments that were set.
+
+@cartouche
+@smallexample
+int n;
+cl_int err;
+cl_kernel kernel;
+n = starpu_opencl_set_kernel_args(&err, 2, &kernel,
+                                  sizeof(foo), &foo,
+                                  sizeof(bar), &bar,
+                                  0);
+if (n != 2)
+   fprintf(stderr, "Error : %d\n", err);
+@end smallexample
+@end cartouche
 @end deftypefun
 
 @node Compiling OpenCL kernels
@@ -2282,6 +2309,43 @@ This function compiles an OpenCL source code stored in a string.
 This function unloads an OpenCL compiled code.
 @end deftypefun
 
+@deftypefun void starpu_opencl_load_program_source ({const char *}@var{source_file_name}, char *@var{located_file_name}, char *@var{located_dir_name}, char *@var{opencl_program_source})
+Store the contents of the file @var{source_file_name} in the buffer
+@var{opencl_program_source}. The file @var{source_file_name} can be
+located in the current directory, or in the directory specified by the
+environment variable @code{STARPU_OPENCL_PROGRAM_DIR}, or in the
+directory @code{share/starpu/opencl} of the installation directory of
+StarPU, or in the source directory of StarPU.
+When the file is found, @code{located_file_name} is the full name of
+the file as it has been located on the system, @code{located_dir_name}
+the directory where it has been located. Otherwise, they are both set
+to the empty string.
+@end deftypefun
+
+@deftypefun int starpu_opencl_compile_opencl_from_file ({const char *}@var{source_file_name}, {const char*} @var{build_options})
+Compile the OpenCL kernel stored in the file @code{source_file_name}
+with the given options @code{build_options} and stores the result in
+the directory @code{$STARPU_HOME/.starpu/opencl} with the same
+filename as @code{source_file_name}. The compilation is done for every
+OpenCL device, and the filename is suffixed with the vendor id and the
+device id of the OpenCL device.
+@end deftypefun
+
+@deftypefun int starpu_opencl_compile_opencl_from_string ({const char *}@var{opencl_program_source}, {const char *}@var{file_name}, {const char* }@var{build_options})
+Compile the OpenCL kernel in the string @code{opencl_program_source}
+with the given options @code{build_options} and stores the result in
+the directory @code{$STARPU_HOME/.starpu/opencl} with the filename
+@code{file_name}. The compilation is done for every
+OpenCL device, and the filename is suffixed with the vendor id and the
+device id of the OpenCL device.
+@end deftypefun
+
+@deftypefun int starpu_opencl_load_binary_opencl ({const char *}@var{kernel_id}, {struct starpu_opencl_program *}@var{opencl_programs})
+Compile the binary OpenCL kernel identified with @var{id}. For every
+OpenCL device, the binary OpenCL kernel will be loaded from the file
+@code{$STARPU_HOME/.starpu/opencl/<kernel_id>.<device_type>.vendor_id_<vendor_id>_device_id_<device_id>}.
+@end deftypefun
+
 @node Loading OpenCL kernels
 @subsection Loading OpenCL kernels
 

+ 1 - 1
doc/chapters/c-extensions.texi

@@ -321,7 +321,7 @@ simple way to allocate storage for arrays on the heap:
 This attributes applies to local variables with an array type.  Its
 effect is to automatically allocate the array's storage on
 the heap, using @code{starpu_malloc} under the hood (@pxref{Basic Data
-Library API, starpu_malloc}).  The heap-allocated array is automatically
+Management API, starpu_malloc}).  The heap-allocated array is automatically
 freed when the variable's scope is left, as with
 automatic variables.
 

+ 14 - 0
examples/Makefile.am

@@ -181,6 +181,7 @@ examplebin_PROGRAMS +=				\
 	filters/fvector				\
 	filters/fblock				\
 	filters/fmatrix				\
+	filters/shadow				\
 	tag_example/tag_example			\
 	tag_example/tag_example2		\
 	tag_example/tag_example3		\
@@ -190,6 +191,7 @@ examplebin_PROGRAMS +=				\
 	spmv/spmv				\
 	callback/callback			\
 	incrementer/incrementer			\
+	binary/binary				\
 	interface/complex			\
 	matvecmult/matvecmult			\
 	profiling/profiling			\
@@ -254,6 +256,7 @@ STARPU_EXAMPLES +=				\
 	spmv/spmv				\
 	callback/callback			\
 	incrementer/incrementer			\
+	binary/binary				\
 	interface/complex			\
 	matvecmult/matvecmult			\
 	profiling/profiling			\
@@ -732,6 +735,17 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 	incrementer/incrementer_kernels_opencl_kernel.cl
 endif
 
+##################
+# Binary example #
+##################
+
+binary_binary_SOURCES =	\
+	binary/binary.c
+if STARPU_USE_OPENCL
+binary_binary_SOURCES +=	\
+	incrementer/incrementer_kernels_opencl.c
+endif
+
 #####################
 # interface example #
 #####################

+ 119 - 0
examples/binary/binary.c

@@ -0,0 +1,119 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <pthread.h>
+#include <sys/time.h>
+
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+extern void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args);
+struct starpu_opencl_program opencl_program;
+#endif
+
+struct starpu_codelet cl =
+{
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_codelet, NULL},
+#endif
+	.nbuffers = 1,
+	.modes = {STARPU_RW}
+};
+
+int compute(char *file_name, int load_as_file)
+{
+	float float_array[4] __attribute__ ((aligned (16))) = { 0.0f, 0.0f, 0.0f, 0.0f};
+	starpu_data_handle_t float_array_handle;
+	unsigned i;
+	int ret = 0;
+	unsigned niter = 500;
+
+	starpu_vector_data_register(&float_array_handle, 0, (uintptr_t)&float_array, 4, sizeof(float));
+
+#ifdef STARPU_USE_OPENCL
+	if (load_as_file)
+	{
+		ret = starpu_opencl_compile_opencl_from_file(file_name, NULL);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file");
+		ret = starpu_opencl_load_binary_opencl(file_name, &opencl_program);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl");
+	}
+	else
+	{
+		char located_file_name[1024];
+		char located_dir_name[1024];
+		char opencl_program_source[16384];
+		starpu_opencl_load_program_source(file_name, located_file_name, located_dir_name, opencl_program_source);
+		ret = starpu_opencl_compile_opencl_from_string(opencl_program_source, "incrementer", NULL);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file");
+		ret = starpu_opencl_load_binary_opencl("incrementer", &opencl_program);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl");
+	}
+#endif
+
+	for (i = 0; i < niter; i++)
+	{
+		ret = starpu_insert_task(&cl, STARPU_RW, float_array_handle, 0);
+		if (STARPU_UNLIKELY(ret == -ENODEV))
+		{
+			FPRINTF(stderr, "No worker may execute this task\n");
+			exit(0);
+		}
+	}
+
+	starpu_task_wait_for_all();
+
+	/* update the array in RAM */
+	starpu_data_unregister(float_array_handle);
+
+	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]);
+
+	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3])
+	{
+		FPRINTF(stderr, "Incorrect result\n");
+		ret = 1;
+	}
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 0;
+	struct starpu_conf conf;
+
+	starpu_conf_init(&conf);
+	conf.ncpus = 0;
+	conf.ncuda = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+                FPRINTF(stderr, "This application requires an OpenCL worker.\n");
+		starpu_shutdown();
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 1);
+	if (ret == 0)
+		ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 0);
+
+	starpu_shutdown();
+	return ret;
+}

+ 172 - 0
examples/filters/shadow.c

@@ -0,0 +1,172 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This examplifies the use of the shadow filter: a source vector of NX
+ * elements (plus 2*SHADOW wrap-around elements) is copied into a destination
+ * vector of NX+NPARTS*2*SHADOW elements, thus showing how shadowing shows up.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+
+/* Shadow width */
+#define SHADOW 2
+#define NX    30
+#define PARTS 3
+
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+        unsigned i;
+
+        /* length of the shadowed source vector */
+        unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+        /* local copy of the shadowed source vector pointer */
+        int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
+
+        /* length of the destination vector */
+        unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]);
+        /* local copy of the destination vector pointer */
+        int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]);
+
+	/* If things go right, sizes should match */
+	STARPU_ASSERT(n == n2);
+	for (i = 0; i < n; i++)
+		val2[i] = val[i];
+}
+
+#ifdef STARPU_USE_CUDA
+void cuda_func(void *buffers[], void *cl_arg)
+{
+        /* length of the shadowed source vector */
+        unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+        /* local copy of the shadowed source vector pointer */
+        int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
+
+        /* length of the destination vector */
+        unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]);
+        /* local copy of the destination vector pointer */
+        int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]);
+
+	/* If things go right, sizes should match */
+	STARPU_ASSERT(n == n2);
+	cudaMemcpy(val2, val, n*sizeof(*val), cudaMemcpyDeviceToDevice);
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	unsigned i, j;
+        int vector[NX + 2*SHADOW];
+        int vector2[NX + PARTS*2*SHADOW];
+	starpu_data_handle_t handle, handle2;
+        int factor=1;
+	int ret;
+
+        struct starpu_codelet cl =
+	{
+                .where = STARPU_CPU
+#ifdef STARPU_USE_CUDA
+			|STARPU_CUDA
+#endif
+			,
+                .cpu_funcs = {cpu_func, NULL},
+#ifdef STARPU_USE_CUDA
+                .cuda_funcs = {cuda_func, NULL},
+#endif
+                .nbuffers = 2,
+		.modes = {STARPU_R, STARPU_W}
+        };
+
+        for(i=0 ; i<NX ; i++) vector[SHADOW+i] = i;
+	for(i=0 ; i<SHADOW ; i++) vector[i] = vector[i+NX];
+	for(i=0 ; i<SHADOW ; i++) vector[SHADOW+NX+i] = vector[SHADOW+i];
+        FPRINTF(stderr,"IN  Vector: ");
+        for(i=0 ; i<NX + 2*SHADOW ; i++) FPRINTF(stderr, "%5d ", vector[i]);
+        FPRINTF(stderr,"\n");
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		exit(77);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	/* Declare source vector to StarPU */
+	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
+
+	/* Declare destination vector to StarPU */
+	starpu_vector_data_register(&handle2, 0, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
+
+        /* Partition the source vector in PARTS sub-vectors with shadows */
+	struct starpu_data_filter f =
+	{
+		.filter_func = starpu_block_shadow_filter_func_vector,
+		.nchildren = PARTS,
+		.filter_arg_ptr = (void*)(uintptr_t) SHADOW /* Shadow width */
+	};
+	starpu_data_partition(handle, &f);
+
+        /* Partition the destination vector in PARTS sub-vectors */
+	struct starpu_data_filter f2 =
+	{
+		.filter_func = starpu_block_filter_func_vector,
+		.nchildren = PARTS,
+	};
+	starpu_data_partition(handle2, &f2);
+
+        /* Submit a task on each sub-vector */
+	for (i=0; i<starpu_data_get_nb_children(handle); i++)
+	{
+                starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i);
+                starpu_data_handle_t sub_handle2 = starpu_data_get_sub_data(handle2, 1, i);
+                struct starpu_task *task = starpu_task_create();
+
+                factor *= 10;
+		task->handles[0] = sub_handle;
+		task->handles[1] = sub_handle2;
+                task->cl = &cl;
+                task->synchronous = 1;
+                task->cl_arg = &factor;
+                task->cl_arg_size = sizeof(factor);
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	starpu_data_unpartition(handle, 0);
+	starpu_data_unpartition(handle2, 0);
+        starpu_data_unregister(handle);
+        starpu_data_unregister(handle2);
+	starpu_shutdown();
+
+        FPRINTF(stderr,"OUT Vector: ");
+        for(i=0 ; i<NX + PARTS*2*SHADOW ; i++) FPRINTF(stderr, "%5d ", vector2[i]);
+        FPRINTF(stderr,"\n");
+	for(i=0 ; i<PARTS ; i++)
+		for (j=0 ; j<NX/PARTS ; j++)
+			STARPU_ASSERT(vector2[i*(NX/PARTS+2*SHADOW)+j] == vector[i*(NX/PARTS)+j]);
+
+	return 0;
+
+enodev:
+	FPRINTF(stderr, "WARNING: No one can execute this task\n");
+	starpu_shutdown();
+	return 77;
+}

+ 5 - 0
examples/scheduler/schedulers.sh

@@ -29,6 +29,11 @@ SCHEDULERS=`STARPU_SCHED="help" ./basic_examples/hello_world 2>&1 | awk '/->/ {p
 
 for sched in $SCHEDULERS
 do
+    # XXX pgreedy often hangs, we have to fix it.
+    # Let's just disable it for now.
+    if [ "$sched" == "pgreedy" ] ; then
+        continue
+    fi
     echo "cholesky.$sched"
     STARPU_SCHED=$sched ./cholesky/cholesky_tag
     check_success $?

+ 26 - 10
gcc-plugin/src/starpu.c

@@ -116,7 +116,8 @@ static const char plugin_name[] = "starpu";
 /* Whether to enable verbose output.  */
 static bool verbose_output_p = false;
 
-/* Search path for OpenCL source files, for the `opencl' pragma.  */
+/* Search path for OpenCL source files for the `opencl' pragma, as a
+   `TREE_LIST'.  */
 static tree opencl_include_dirs = NULL_TREE;
 
 /* Names of public attributes.  */
@@ -1069,7 +1070,7 @@ build_variable_from_file_contents (location_t loc,
 				   const_tree search_path)
 {
   gcc_assert (search_path != NULL_TREE
-	      && TREE_CODE (search_path) == STRING_CST);
+	      && TREE_CODE (search_path) == TREE_LIST);
 
   int err, dir_fd;
   struct stat st;
@@ -1078,11 +1079,14 @@ build_variable_from_file_contents (location_t loc,
 
   /* Look for FILE in each directory in SEARCH_PATH, and pick the first one
      that matches.  */
-  for (err = ENOENT, dir_fd = -1, dirs = opencl_include_dirs;
+  for (err = ENOENT, dir_fd = -1, dirs = search_path;
        (err != 0 || err == ENOENT) && dirs != NULL_TREE;
        dirs = TREE_CHAIN (dirs))
     {
-      dir_fd = open (TREE_STRING_POINTER (dirs),
+      gcc_assert (TREE_VALUE (dirs) != NULL_TREE
+		  && TREE_CODE (TREE_VALUE (dirs)) == STRING_CST);
+
+      dir_fd = open (TREE_STRING_POINTER (TREE_VALUE (dirs)),
 		     O_DIRECTORY | O_RDONLY);
       if (dir_fd < 0)
 	err = ENOENT;
@@ -1091,6 +1095,10 @@ build_variable_from_file_contents (location_t loc,
 	  err = fstatat (dir_fd, file, &st, 0);
 	  if (err != 0)
 	    close (dir_fd);
+
+	  /* Leave DIRS unchanged so it can be referred to in diagnostics
+	     below.  */
+	  break;
 	}
     }
 
@@ -1105,7 +1113,7 @@ build_variable_from_file_contents (location_t loc,
     {
       if (verbose_output_p)
 	inform (loc, "found file %qs in %qs",
-		file, TREE_STRING_POINTER (dirs));
+		file, TREE_STRING_POINTER (TREE_VALUE (dirs)));
 
       int fd;
 
@@ -1274,7 +1282,7 @@ build_opencl_set_kernel_arg_calls (location_t loc, tree task_impl,
 static void
 define_opencl_task_implementation (location_t loc, tree task_impl,
 				   const char *file, const_tree kernel,
-				   const_tree groupsize)
+				   tree groupsize)
 {
   gcc_assert (task_implementation_p (task_impl)
 	      && task_implementation_where (task_impl) == STARPU_OPENCL);
@@ -1449,11 +1457,14 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
       /* TODO: Support user-provided values.  */
       append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (group_size_var),
-					group_size_var, (tree)groupsize),
+					group_size_var,
+					fold_convert (TREE_TYPE (group_size_var),
+						      groupsize)),
 				&stmts);
       append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (ngroups_var),
 					ngroups_var,
-					build_int_cst (integer_type_node, 1)),
+					build_int_cst (TREE_TYPE (ngroups_var),
+						       1)),
 				&stmts);
       append_to_statement_list (build4 (TARGET_EXPR, void_type_node,
 					error_var, enqueue_stmts,
@@ -3321,7 +3332,8 @@ plugin_init (struct plugin_name_args *plugin_info,
 		     NULL, &pass_info);
 
   include_dir = getenv ("STARPU_GCC_INCLUDE_DIR");
-  opencl_include_dirs = build_string (1, ".");
+  opencl_include_dirs = tree_cons (NULL_TREE, build_string (1, "."),
+				   NULL_TREE);
 
   int arg;
   for (arg = 0; arg < plugin_info->argc; arg++)
@@ -3344,7 +3356,8 @@ plugin_init (struct plugin_name_args *plugin_info,
 	    {
 	      tree dir = build_string (strlen (plugin_info->argv[arg].value),
 				       plugin_info->argv[arg].value);
-	      opencl_include_dirs = chainon (opencl_include_dirs, dir);
+	      opencl_include_dirs = tree_cons (NULL_TREE, dir,
+					       opencl_include_dirs);
 	    }
 	}
       else if (strcmp (plugin_info->argv[arg].key, "verbose") == 0)
@@ -3354,6 +3367,9 @@ plugin_init (struct plugin_name_args *plugin_info,
 		  plugin_info->argv[arg].key);
     }
 
+  /* Keep the directories in the order in which they appear.  */
+  opencl_include_dirs = nreverse (opencl_include_dirs);
+
   return 0;
 }
 

+ 1 - 1
gcc-plugin/tests/opencl-lacking.c

@@ -23,4 +23,4 @@ static void my_task_opencl (int x, float a[x])
   __attribute__ ((task_implementation ("opencl", my_task)));
 
 #pragma starpu opencl my_task_opencl  /* (note "not generated") */	\
-               "test.cl" "kern"
+               "test.cl" "kern" 8

+ 2 - 1
include/starpu_data_filters.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -66,6 +66,7 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 
 /* for vector */
 void starpu_block_filter_func_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+void starpu_block_shadow_filter_func_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_list_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 

+ 8 - 2
include/starpu_opencl.h

@@ -59,6 +59,12 @@ void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
 void starpu_opencl_get_current_context(cl_context *context);
 void starpu_opencl_get_current_queue(cl_command_queue *queue);
 
+void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
+int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char* build_options);
+int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char* build_options);
+
+int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
+
 int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options);
 int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char* build_options);
 int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
@@ -71,13 +77,13 @@ int starpu_opencl_collect_stats(cl_event event);
 /*
  * Sets the arguments of an OpenCL kernel.
  * Arguments to pass to the kernel should be given as follows :
- * 
+ *
  * 	size of the argument,  pointer to the argument
  *
  * 0 must be passed to this function after the kernel arguments.
  *
  * In case of failure, returns the id of the argument that could not be set,
- * and sets "error" to the error returned. Otherwise, returns the number of 
+ * and sets "error" to the error returned. Otherwise, returns the number of
  * arguments that were set.
  *
  * Example :

+ 2 - 2
include/starpu_util.h

@@ -55,7 +55,7 @@ extern "C"
 } while(0)
 
 #if defined(STARPU_HAVE_STRERROR_R)
-#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) { \
+#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) { \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
 			STARPU_ASSERT(0); }}
@@ -64,7 +64,7 @@ extern "C"
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
 			STARPU_ASSERT(0); }}
 #else
-#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) {		\
+#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) {		\
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
 			STARPU_ASSERT(0); }}
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (err != value) { \

+ 3 - 3
mpi/starpu_mpi_fxt.h

@@ -28,11 +28,11 @@
 
 #ifdef STARPU_USE_FXT
 #define TRACE_MPI_BARRIER(rank, worldsize, key)	\
-	FUT_DO_PROBE4(FUT_MPI_BARRIER, (rank), (worldsize), (key), syscall(SYS_gettid));
+	FUT_DO_PROBE4(FUT_MPI_BARRIER, (rank), (worldsize), (key), _starpu_gettid());
 #define TRACE_MPI_ISEND(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(FUT_MPI_ISEND, (dest), (mpi_tag), (size), syscall(SYS_gettid));
+	FUT_DO_PROBE4(FUT_MPI_ISEND, (dest), (mpi_tag), (size), _starpu_gettid());
 #define TRACE_MPI_IRECV_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(FUT_MPI_IRECV_END, (src), (mpi_tag), syscall(SYS_gettid));
+	FUT_DO_PROBE3(FUT_MPI_IRECV_END, (src), (mpi_tag), _starpu_gettid());
 #define TRACE
 #else
 #define TRACE_MPI_BARRIER(a, b, c)	do {} while(0);

+ 1 - 0
socl/src/cl_createprogramwithsource.c

@@ -80,6 +80,7 @@ soclCreateProgramWithSource(cl_context      context,
       *errcode_ret = CL_SUCCESS;
 
    device_count = starpu_opencl_worker_get_count();
+   assert(device_count > 0);
    DEBUG_MSG("Worker count: %d\n", device_count);
 
    /* Check arguments */

+ 4 - 1
socl/src/command_queue.c

@@ -58,8 +58,11 @@ void command_queue_dependencies_implicit(
 	 * Return dependencies
 	 *********************/
 
-	cl_event * evs = malloc(ndeps * sizeof(cl_event));
 	int n = 0;
+	cl_event * evs = NULL;
+	if (ndeps > 0)
+		evs = malloc(ndeps * sizeof(cl_event));
+
 
 	/* Add dependency to last barrier if applicable */
 	if (cq->barrier != NULL)

+ 4 - 2
src/common/fxt.c

@@ -29,8 +29,10 @@
 #include <windows.h>
 #endif
 
-#ifdef __FreeBSD__
-#include <sys/thr.h> /* for thr_self() */
+#ifdef __linux__
+#include <sys/syscall.h>   /* for SYS_gettid */
+#elif defined(__FreeBSD__)
+#include <sys/thr.h>       /* for thr_self() */
 #endif
 
 #define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)

+ 0 - 1
src/common/fxt.h

@@ -103,7 +103,6 @@
 #define _STARPU_FUT_TASK_WAIT_FOR_ALL	0x513b
 
 #ifdef STARPU_USE_FXT
-#include <sys/syscall.h> /* pour les définitions de SYS_xxx */
 #include <fxt/fxt.h>
 #include <fxt/fut.h>
 

+ 41 - 1
src/common/utils.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -75,6 +75,32 @@ out:
 	return rv;
 }
 
+void _starpu_mkpath_and_check(const char *path, mode_t mode)
+{
+	int ret;
+
+	ret = _starpu_mkpath(path, mode);
+
+	if (ret == -1)
+	{
+		if (errno != EEXIST)
+		{
+			fprintf(stderr,"Error making StarPU directory %s:\n", path);
+			perror("mkdir");
+			STARPU_ASSERT(0);
+		}
+
+		/* make sure that it is actually a directory */
+		struct stat sb;
+		stat(path, &sb);
+		if (!S_ISDIR(sb.st_mode))
+		{
+			fprintf(stderr,"Error: %s is not a directory:\n", path);
+			STARPU_ASSERT(0);
+		}
+	}
+}
+
 int _starpu_check_mutex_deadlock(pthread_mutex_t *mutex)
 {
 	int ret;
@@ -92,3 +118,17 @@ int _starpu_check_mutex_deadlock(pthread_mutex_t *mutex)
 
 	return 1;
 }
+
+char *_starpu_get_home_path()
+{
+	char *path = getenv("XDG_CACHE_HOME");
+	if (!path)
+		path = getenv("STARPU_HOME");
+	if (!path)
+		path = getenv("HOME");
+	if (!path)
+		path = getenv("USERPROFILE");
+	if (!path)
+		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
+	return path;
+}

+ 2 - 0
src/common/utils.h

@@ -54,7 +54,9 @@
 #define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO)
 
 int _starpu_mkpath(const char *s, mode_t mode);
+void _starpu_mkpath_and_check(const char *s, mode_t mode);
 int _starpu_check_mutex_deadlock(pthread_mutex_t *mutex);
+char *_starpu_get_home_path();
 
 /* If FILE is currently on a comment line, eat it.  */
 void _starpu_drop_comments(FILE *f);

+ 5 - 84
src/core/perfmodel/perfmodel.c

@@ -462,16 +462,7 @@ void _starpu_get_perf_model_dir(char *path, size_t maxlen)
 	/* use the directory specified at configure time */
 	snprintf(path, maxlen, "%s", STARPU_PERF_MODEL_DIR);
 #else
-	const char *home_path = getenv("XDG_CACHE_HOME");
-	if (!home_path)
-		home_path = getenv("STARPU_HOME");
-	if (!home_path)
-		home_path = getenv("HOME");
-	if (!home_path)
-		home_path = getenv("USERPROFILE");
-	if (!home_path)
-		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
-	snprintf(path, maxlen, "%s/.starpu/sampling/", home_path);
+	snprintf(path, maxlen, "%s/.starpu/sampling/", _starpu_get_home_path());
 #endif
 }
 
@@ -508,93 +499,23 @@ void _starpu_create_sampling_directory_if_needed(void)
 		   may not be safe: it is possible that the permission are
 		   changed in between. Instead, we create it and check if
 		   it already existed before */
-		int ret;
-		ret = _starpu_mkpath(perf_model_dir, S_IRWXU);
+		_starpu_mkpath_and_check(perf_model_dir, S_IRWXU);
 
-		if (ret == -1)
-		{
-			if (errno != EEXIST) {
-				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
-				perror("mkdir");
-				STARPU_ASSERT(0);
-			}
-
-			/* make sure that it is actually a directory */
-			struct stat sb;
-			stat(perf_model_dir, &sb);
-			if (!S_ISDIR(sb.st_mode)) {
-				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
-				STARPU_ASSERT(0);
-			}
-		}
 
 		/* Per-task performance models */
 		char perf_model_dir_codelets[256];
 		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
-
-		ret = _starpu_mkpath(perf_model_dir_codelets, S_IRWXU);
-		if (ret == -1)
-		{
-			if (errno != EEXIST) {
-				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
-				perror("mkdir");
-				STARPU_ASSERT(0);
-			}
-
-
-			/* make sure that it is actually a directory */
-			struct stat sb;
-			stat(perf_model_dir_codelets, &sb);
-			if (!S_ISDIR(sb.st_mode)) {
-				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
-				STARPU_ASSERT(0);
-			}
-		}
+		_starpu_mkpath_and_check(perf_model_dir_codelets, S_IRWXU);
 
 		/* Performance of the memory subsystem */
 		char perf_model_dir_bus[256];
 		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, 256);
-
-		ret = _starpu_mkpath(perf_model_dir_bus, S_IRWXU);
-		if (ret == -1)
-		{
-			if (errno != EEXIST) {
-				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
-				perror("mkdir");
-				STARPU_ASSERT(0);
-			}
-
-			/* make sure that it is actually a directory */
-			struct stat sb;
-			stat(perf_model_dir_bus, &sb);
-			if (!S_ISDIR(sb.st_mode)) {
-				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
-				STARPU_ASSERT(0);
-			}
-		}
+		_starpu_mkpath_and_check(perf_model_dir_bus, S_IRWXU);
 
 		/* Performance debug measurements */
 		char perf_model_dir_debug[256];
 		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, 256);
-
-		ret = _starpu_mkpath(perf_model_dir_debug, S_IRWXU);
-		if (ret == -1)
-		{
-			if (errno != EEXIST) {
-				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
-				perror("mkdir");
-				STARPU_ASSERT(0);
-			}
-
-
-			/* make sure that it is actually a directory */
-			struct stat sb;
-			stat(perf_model_dir_debug, &sb);
-			if (!S_ISDIR(sb.st_mode)) {
-				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
-				STARPU_ASSERT(0);
-			}
-		}
+		_starpu_mkpath_and_check(perf_model_dir_debug, S_IRWXU);
 
 		directory_existence_was_tested = 1;
 	}

+ 1 - 1
src/datawizard/coherency.h

@@ -42,7 +42,7 @@ enum _starpu_cache_state
 LIST_TYPE(_starpu_data_replicate,
 	starpu_data_handle_t handle;
 
-	/* describe the actual data layout */
+	/* describe the actual data layout, as manipulated by data interfaces in *_interface.c */
 	void *data_interface;
 
 	unsigned memory_node;

+ 2 - 5
src/datawizard/interfaces/bcsr_interface.c

@@ -544,16 +544,13 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 	uint32_t nrow = src_bcsr->nrow;
 	size_t elemsize = src_bcsr->elemsize;
 
-	uint32_t r = src_bcsr->r;
-	uint32_t c = src_bcsr->c;
-
-	memcpy((void *)dst_bcsr->nzval, (void *)src_bcsr->nzval, nnz*elemsize*r*c);
+	memcpy((void *)dst_bcsr->nzval, (void *)src_bcsr->nzval, nnz*elemsize);
 
 	memcpy((void *)dst_bcsr->colind, (void *)src_bcsr->colind, nnz*sizeof(uint32_t));
 
 	memcpy((void *)dst_bcsr->rowptr, (void *)src_bcsr->rowptr, (nrow+1)*sizeof(uint32_t));
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t));
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
 
 	return 0;
 }

+ 33 - 1
src/datawizard/interfaces/vector_filters.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
@@ -49,6 +49,38 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 }
 
 
+void starpu_block_shadow_filter_func_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks)
+{
+        struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
+        struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
+
+        uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+	/* actual number of elements */
+	uint32_t nx = vector_father->nx - 2 * shadow_size;
+	size_t elemsize = vector_father->elemsize;
+
+	STARPU_ASSERT(nchunks <= nx);
+
+	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
+	size_t offset = id*chunk_size*elemsize;
+
+	uint32_t child_nx =
+	  STARPU_MIN(chunk_size, nx - id*chunk_size) + 2 * shadow_size;
+
+	vector_child->nx = child_nx;
+	vector_child->elemsize = elemsize;
+
+	if (vector_father->dev_handle)
+	{
+		if (vector_father->ptr)
+			vector_child->ptr = vector_father->ptr + offset;
+		vector_child->dev_handle = vector_father->dev_handle;
+		vector_child->offset = vector_father->offset + offset;
+	}
+}
+
+
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks)
 {
         /* there cannot be more than 2 chunks */

+ 221 - 16
src/drivers/opencl/driver_opencl_utils.c

@@ -26,6 +26,7 @@
 #include <starpu_opencl.h>
 #include <starpu_profiling.h>
 #include <core/workers.h>
+#include <common/utils.h>
 #include "driver_opencl_utils.h"
 #include "driver_opencl.h"
 
@@ -164,8 +165,83 @@ char *_starpu_opencl_load_program_source(const char *filename)
         return source;
 }
 
-int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs,
-					  const char* build_options)
+
+static
+char *_starpu_opencl_load_program_binary(const char *filename, size_t *len)
+{
+	struct stat statbuf;
+	FILE        *fh;
+	char        *binary;
+
+	fh = fopen(filename, "r");
+	if (fh == 0)
+		return NULL;
+
+	stat(filename, &statbuf);
+
+	binary = (char *) malloc(statbuf.st_size);
+	if (!binary)
+		return binary;
+
+	fread(binary, statbuf.st_size, 1, fh);
+
+	*len = statbuf.st_size;
+	return binary;
+}
+
+void _starpu_opencl_create_binary_directory(char *path, size_t maxlen)
+{
+	static int _directory_created = 0;
+
+	snprintf(path, maxlen, "%s/.starpu/opencl/", _starpu_get_home_path());
+
+	if (_directory_created == 0)
+	{
+		_STARPU_DEBUG("Creating directory %s\n", path);
+		_starpu_mkpath_and_check(path, S_IRWXU);
+		_directory_created = 1;
+	}
+}
+
+char *_starpu_opencl_get_device_type_as_string(int id)
+{
+	cl_device_type type;
+
+	type = _starpu_opencl_get_device_type(id);
+	switch (type)
+	{
+		case CL_DEVICE_TYPE_GPU: return "gpu";
+		case CL_DEVICE_TYPE_ACCELERATOR: return "acc";
+		case CL_DEVICE_TYPE_CPU: return "cpu";
+		default: return "unk";
+	}
+}
+
+int _starpu_opencl_get_binary_name(char *binary_file_name, size_t maxlen, const char *source_file_name, int dev, cl_device_id device)
+{
+	char binary_directory[1024];
+	char *p;
+	cl_int err;
+	cl_uint vendor_id;
+
+	_starpu_opencl_create_binary_directory(binary_directory, 1024);
+
+	p = strrchr(source_file_name, '/');
+	snprintf(binary_file_name, maxlen, "%s/%s", binary_directory, p?p:source_file_name);
+
+	p = strstr(binary_file_name, ".cl");
+	if (p == NULL) p=binary_file_name + strlen(binary_file_name);
+
+	err = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
+	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+
+	sprintf(p, ".%s.vendor_id_%d_device_id_%d", _starpu_opencl_get_device_type_as_string(dev), (int)vendor_id, dev);
+
+	return CL_SUCCESS;
+}
+
+int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program_source, const char* build_options,
+						      struct starpu_opencl_program *opencl_programs, const char* source_file_name)
 {
         unsigned int dev;
         unsigned int nb_devices;
@@ -179,7 +255,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
                 cl_program   program;
                 cl_int       err;
 
-                opencl_programs->programs[dev] = NULL;
+		if (opencl_programs)
+			opencl_programs->programs[dev] = NULL;
 
                 starpu_opencl_get_device(dev, &device);
                 starpu_opencl_get_context(dev, &context);
@@ -220,32 +297,69 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 		}
 
                 // Store program
-                opencl_programs->programs[dev] = program;
+		if (opencl_programs)
+			opencl_programs->programs[dev] = program;
+		else
+		{
+			char binary_file_name[1024];
+			char *binary;
+			size_t binary_len;
+			FILE *fh;
+
+			err = _starpu_opencl_get_binary_name(binary_file_name, 1024, source_file_name, dev, device);
+			if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+
+			err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_len, NULL);
+			if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+			binary = malloc(binary_len);
+
+			err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL);
+			if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+
+			fh = fopen(binary_file_name, "w");
+			if (fh == NULL)
+			{
+				_STARPU_DISP("Error: Failed to open file <%s>\n", binary_file_name);
+				perror("fopen");
+				return EXIT_FAILURE;
+			}
+			fwrite(binary, binary_len, 1, fh);
+			fclose(fh);
+			free(binary);
+			_STARPU_DEBUG("File <%s> created\n", binary_file_name);
+		}
         }
         return EXIT_SUCCESS;
 }
 
-int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs,
-					const char* build_options)
+void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source)
+{
+        // Locate source file
+        _starpu_opencl_locate_file(source_file_name, located_file_name, located_dir_name);
+        _STARPU_DEBUG("Source file name : <%s>\n", located_file_name);
+        _STARPU_DEBUG("Source directory name : <%s>\n", located_dir_name);
+
+        // Load the compute program from disk into a char *
+        char *source = _starpu_opencl_load_program_source(located_file_name);
+        if(!source)
+                _STARPU_ERROR("Failed to load compute program from file <%s>!\n", located_file_name);
+
+	sprintf(opencl_program_source, "%s", source);
+}
+
+int _starpu_opencl_compile_or_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options)
 {
 	int nb_devices;
         char located_file_name[1024];
         char located_dir_name[1024];
 	char new_build_options[1024];
+	char opencl_program_source[16384];
 
 	// Do not try to load and compile the file if there is no devices
 	nb_devices = starpu_opencl_worker_get_count();
 	if (nb_devices == 0) return EXIT_SUCCESS;
 
-        // Locate source file
-        _starpu_opencl_locate_file(source_file_name, located_file_name, located_dir_name);
-        _STARPU_DEBUG("Source file name : <%s>\n", located_file_name);
-        _STARPU_DEBUG("Source directory name : <%s>\n", located_dir_name);
-
-        // Load the compute program from disk into a cstring buffer
-        char *opencl_program_source = _starpu_opencl_load_program_source(located_file_name);
-        if(!opencl_program_source)
-                _STARPU_ERROR("Failed to load compute program from file <%s>!\n", located_file_name);
+	starpu_opencl_load_program_source(source_file_name, located_file_name, located_dir_name, opencl_program_source);
 
 	if (!strcmp(located_dir_name, ""))
 		strcpy(new_build_options, build_options);
@@ -255,7 +369,98 @@ int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct sta
 		sprintf(new_build_options, "-I %s", located_dir_name);
 	_STARPU_DEBUG("Build options: <%s>\n", new_build_options);
 
-        return starpu_opencl_load_opencl_from_string(opencl_program_source, opencl_programs, new_build_options);
+        return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, new_build_options, opencl_programs, source_file_name);
+}
+
+int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char* build_options)
+{
+	return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, NULL, build_options);
+}
+
+int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char* build_options)
+{
+	return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, NULL, file_name);
+}
+
+int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs,
+					  const char* build_options)
+{
+	return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, opencl_programs, NULL);
+}
+
+int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs,
+					const char* build_options)
+{
+	return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, opencl_programs, build_options);
+}
+
+int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs)
+{
+        unsigned int dev;
+        unsigned int nb_devices;
+
+        nb_devices = _starpu_opencl_get_device_count();
+        // Iterate over each device
+        for(dev = 0; dev < nb_devices; dev ++)
+	{
+                cl_device_id device;
+                cl_context   context;
+                cl_program   program;
+                cl_int       err;
+		char        *binary;
+		char         binary_file_name[1024];
+		size_t       length;
+		cl_int       binary_status;
+
+		opencl_programs->programs[dev] = NULL;
+
+                starpu_opencl_get_device(dev, &device);
+                starpu_opencl_get_context(dev, &context);
+                if (context == NULL)
+		{
+                        _STARPU_DEBUG("[%d] is not a valid OpenCL context\n", dev);
+                        continue;
+                }
+
+		// Load the binary buffer
+		err = _starpu_opencl_get_binary_name(binary_file_name, 1024, kernel_id, dev, device);
+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+		binary = _starpu_opencl_load_program_binary(binary_file_name, &length);
+
+                // Create the compute program from the binary buffer
+                program = clCreateProgramWithBinary(context, 1, &device, &length, (const unsigned char **) &binary, &binary_status, &err);
+                if (!program || err != CL_SUCCESS) {
+			_STARPU_DISP("Error: Failed to load program binary!\n");
+			return EXIT_FAILURE;
+		}
+
+                // Build the program executable
+                err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
+
+		// Get the status
+		{
+		     cl_build_status status;
+		     size_t len;
+		     static char buffer[4096] = "";
+
+		     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+		     if (len > 2)
+			  _STARPU_DISP("Compilation output\n%s\n", buffer);
+
+		     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL);
+		     if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS)
+		     {
+			  _STARPU_DISP("Error: Failed to build program executable!\n");
+			  _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status);
+			  return EXIT_FAILURE;
+		     }
+
+		}
+
+                // Store program
+		opencl_programs->programs[dev] = program;
+	}
+	return 0;
 }
 
 int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs)

+ 3 - 1
src/drivers/opencl/driver_opencl_utils.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,8 @@
 
 #include <config.h>
 
+char *_starpu_opencl_get_device_type_as_string(int id);
+
 #define _STARPU_OPENCL_PLATFORM_MAX 4
 
 #endif /* __STARPU_OPENCL_UTILS_H__ */

+ 6 - 2
tests/datawizard/lazy_unregister.c

@@ -56,6 +56,8 @@ int main(void)
 	starpu_task_declare_deps_array(t2, 1, &t1);
 
 	ret = starpu_task_submit(t2);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	starpu_data_unregister_lazy(handle);
 
@@ -63,10 +65,12 @@ int main(void)
 		return EXIT_FAILURE;
 
 	ret = starpu_task_submit(t1);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
-	ret = starpu_task_wait(t2);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	while (starpu_data_lookup(buffer) != NULL)
+		usleep(100000);
 
 	if (starpu_data_lookup(buffer) != NULL)
 		return EXIT_FAILURE;