瀏覽代碼

new merge

Andra Hugo 13 年之前
父節點
當前提交
79b8756aad
共有 58 個文件被更改,包括 1360 次插入249 次删除
  1. 15 8
      Makefile.am
  2. 3 23
      configure.ac
  3. 11 13
      gcc-plugin/examples/vector_scal/vector_scal.c
  4. 1 1
      gcc-plugin/examples/vector_scal/vector_scal_opencl_kernel.cl
  5. 168 28
      gcc-plugin/src/starpu.c
  6. 1 0
      gcc-plugin/tests/Makefile.am
  7. 7 7
      gcc-plugin/tests/base.c
  8. 24 0
      gcc-plugin/tests/mocks.h
  9. 127 0
      gcc-plugin/tests/opencl-types.c
  10. 26 16
      gcc-plugin/tests/output-pointer.c
  11. 28 13
      gcc-plugin/tests/pointers.c
  12. 2 1
      include/starpu.h
  13. 1 0
      include/starpu_config.h.in
  14. 39 0
      include/starpu_rand.h
  15. 9 3
      include/starpu_scheduler.h
  16. 2 2
      include/starpu_task.h
  17. 3 1
      socl/examples/basic/basic.c
  18. 7 7
      src/Makefile.am
  19. 2 1
      src/core/dependencies/implicit_data_deps.c
  20. 19 18
      src/core/sched_policy.c
  21. 2 8
      src/datawizard/data_request.c
  22. 2 1
      src/debug/traces/starpu_fxt.c
  23. 15 6
      src/drivers/opencl/driver_opencl_utils.c
  24. 1 1
      starpu-top/StarPU-Top-common.pri
  25. 1 1
      starpu-top/StarPU-Top.pro.in
  26. 29 0
      starpu-top/config.h.in
  27. 53 10
      starpu-top/main.cpp
  28. 61 1
      tests/Makefile.am
  29. 23 1
      tests/datawizard/acquire_release.c
  30. 25 1
      tests/datawizard/acquire_release2.c
  31. 67 0
      tests/datawizard/acquire_release_opencl.c
  32. 20 0
      tests/datawizard/acquire_release_opencl_kernel.cl
  33. 3 3
      tests/datawizard/data_implicit_deps.c
  34. 36 2
      tests/datawizard/data_invalidation.c
  35. 34 1
      tests/datawizard/handle_to_pointer.c
  36. 2 1
      tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c
  37. 75 3
      tests/datawizard/lazy_allocation.c
  38. 21 1
      tests/datawizard/mpi_like.c
  39. 23 1
      tests/datawizard/mpi_like_async.c
  40. 69 0
      tests/datawizard/opencl_codelet_unsigned_inc.c
  41. 20 0
      tests/datawizard/opencl_codelet_unsigned_inc_kernel.cl
  42. 24 1
      tests/datawizard/scratch.c
  43. 72 0
      tests/datawizard/scratch_opencl.c
  44. 29 0
      tests/datawizard/scratch_opencl_kernel.cl
  45. 1 1
      tests/datawizard/unpartition.c
  46. 1 1
      tests/opt/datawizard/wt_broadcast.c
  47. 1 1
      tests/opt/datawizard/wt_host.c
  48. 1 1
      tests/main/declare_deps_after_submission.c
  49. 1 1
      tests/main/declare_deps_after_submission_synchronous.c
  50. 1 1
      tests/main/declare_deps_in_callback.c
  51. 1 1
      tests/main/get_current_task.c
  52. 1 1
      tests/main/wait_all_regenerable_tasks.c
  53. 0 48
      tests/opt/Makefile.am
  54. 27 3
      tests/perfmodels/non_linear_regression_based.c
  55. 69 0
      tests/perfmodels/opencl_memset.c
  56. 22 0
      tests/perfmodels/opencl_memset_kernel.cl
  57. 29 4
      tests/perfmodels/regression_based.c
  58. 3 1
      tools/starpu_machine_display.c

+ 15 - 8
Makefile.am

@@ -30,10 +30,6 @@ endif
 
 SUBDIRS += examples
 
-if COND_OPT
-SUBDIRS += tests/opt examples/opt
-endif
-
 if BUILD_GCC_PLUGIN
 SUBDIRS += gcc-plugin
 endif
@@ -69,24 +65,35 @@ versinclude_HEADERS = 				\
 	include/starpu_scheduler.h		\
 	include/starpu_top.h			\
 	include/starpu_deprecated_api.h         \
-	include/starpu_hash.h
+	include/starpu_hash.h			\
+	include/starpu_rand.h
 
 nodist_versinclude_HEADERS = 			\
 	include/starpu_config.h
 
 if BUILD_STARPU_TOP
+starpu-top/starpu_top$(EXEEXT):
+	cd starpu-top ; $(QMAKE) ; $(MAKE)
 all-local:
 	cd starpu-top ; $(QMAKE) ; $(MAKE)
 clean-local:
 	cd starpu-top ; $(QMAKE) ; $(MAKE) clean ; $(RM) Makefile
+	$(RM) starpu_top.1
 # TODO: resources
 install-exec-local:
 	$(MKDIR_P) $(DESTDIR)$(bindir)
-	$(INSTALL_STRIP_PROGRAM) starpu-top/StarPU-Top $(DESTDIR)$(bindir)
+	$(INSTALL_STRIP_PROGRAM) starpu-top/starpu_top $(DESTDIR)$(bindir)
 uninstall-local:
-	$(RM) $(DESTDIR)$(bindir)/StarPU-Top
-	$(RM) starpu-top/StarPU-Top
+	$(RM) $(DESTDIR)$(bindir)/starpu_top
+	$(RM) starpu-top/starpu_top
 	$(RM) starpu-top/Makefile
+
+if STARPU_HAVE_HELP2MAN
+starpu_top.1: starpu-top/starpu_top$(EXEEXT)
+	help2man --no-discard-stderr -N --output=$@ starpu-top/starpu_top$(EXEEXT)
+dist_man1_MANS =\
+	starpu_top.1
+endif
 endif
 
 if STARPU_HAVE_WINDOWS

+ 3 - 23
configure.ac

@@ -16,7 +16,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AC_INIT([StarPU],1.0.0rc2, [starpu-devel@lists.gforge.inria.fr], starpu)
+AC_INIT([StarPU],1.0.0rc3, [starpu-devel@lists.gforge.inria.fr], starpu)
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_AUX_DIR([build-aux])
 
@@ -145,17 +145,7 @@ AC_ARG_ENABLE(default-drand48, [AS_HELP_STRING([--disable-default-drand48],
 				   [Do not use the default version of drand48])],
 				   enable_default_drand48=$enableval, enable_default_drand48=yes)
 if test x$have_drand48 = xyes -a x$enable_default_drand48 = xyes ; then
-  AC_DEFINE([starpu_srand48(seed)],[srand48(seed)],[srand48 equivalent function])
-  AC_DEFINE([starpu_drand48()],[drand48()],[drand48 equivalent function])
-  AC_DEFINE([starpu_erand48(xsubi)],[erand48(xsubi)],[erand48 equivalent function])
-  AC_DEFINE([starpu_srand48_r(seed, buffer)],[srand48_r(seed, buffer)],[srand48_r equivalent function])
-  AC_DEFINE([starpu_erand48_r(xsubi, buffer, result)],[erand48_r(xsubi, buffer, result)],[erand48_r equivalent function])
-else
-  AC_DEFINE([starpu_srand48(seed)],[srand(seed)],[srand48 equivalent function])
-  AC_DEFINE([starpu_drand48()],[((double)(rand()) / RAND_MAX)],[drand48 equivalent function])
-  AC_DEFINE([starpu_erand48(xsubi)],[starpu_drand48()],[erand48 equivalent function])
-  AC_DEFINE([starpu_srand48_r(seed, buffer)],[srand((unsigned int)seed)],[srand48_r equivalent function])
-  AC_DEFINE([starpu_erand48_r(xsubi, buffer, result)],[do {*(result) = ((double)(rand()) / RAND_MAX);} while (0);],[erand48_r equivalent function])
+   AC_DEFINE([STARPU_USE_DRAND48], [1], [Define to 1 if drandr48 is available and should be used])
 fi
 
 # Some systems do not define strerror_r
@@ -1562,14 +1552,6 @@ if test x$have_f77_h = xyes; then
         AC_DEFINE([STARPU_HAVE_F77_H], [1], [Define to 1 if you have the <f77.h> header file.])
 fi
 
-# Do we want to run optional tests
-AC_MSG_CHECKING(whether optional tests should be run)
-AC_ARG_ENABLE(optional_tests, [AS_HELP_STRING([--optional-tests],
-			[run optional tests])],
-			want_optional_tests=$enableval, want_optional_tests=no)
-AC_MSG_RESULT($want_optional_tests)
-AM_CONDITIONAL([COND_OPT], [test "$want_optional_tests" = yes])
-
 # Check if icc is available
 AC_CHECK_PROGS([ICC], [icc])
 
@@ -1623,7 +1605,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 ])
 
 AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only)
-AC_CONFIG_HEADER(src/common/config.h include/starpu_config.h gcc-plugin/src/starpu-gcc-config.h)
+AC_CONFIG_HEADER(src/common/config.h include/starpu_config.h gcc-plugin/src/starpu-gcc-config.h starpu-top/config.h)
 
 AC_OUTPUT([
 	Makefile
@@ -1642,10 +1624,8 @@ AC_OUTPUT([
 	starpufft/libstarpufft.pc
 	starpufft/starpufft-1.0.pc
 	examples/Makefile
-        examples/opt/Makefile
 	examples/stencil/Makefile
 	tests/Makefile
-        tests/opt/Makefile
 	doc/Makefile
 	mpi/Makefile
 	starpu-top/StarPU-Top.pro

+ 11 - 13
gcc-plugin/examples/vector_scal/vector_scal.c

@@ -29,16 +29,16 @@
 
 /* Declare and define the standard CPU implementation.  */
 
-static void vector_scal (size_t size, float vector[size], float factor)
+static void vector_scal (unsigned int size, float vector[size], float factor)
   __attribute__ ((task));
 
-static void vector_scal_cpu (size_t size, float vector[size], float factor)
+static void vector_scal_cpu (unsigned int size, float vector[size], float factor)
   __attribute__ ((task_implementation ("cpu", vector_scal)));
 
 static void
-vector_scal_cpu (size_t size, float vector[size], float factor)
+vector_scal_cpu (unsigned int size, float vector[size], float factor)
 {
-  size_t i;
+  unsigned int i;
   for (i = 0; i < size; i++)
     vector[i] *= factor;
 }
@@ -49,11 +49,11 @@ vector_scal_cpu (size_t size, float vector[size], float factor)
 
 #include <xmmintrin.h>
 
-static void vector_scal_sse (size_t size, float vector[size], float factor)
+static void vector_scal_sse (unsigned int size, float vector[size], float factor)
   __attribute__ ((task_implementation ("cpu", vector_scal)));
 
 static void
-vector_scal_sse (size_t size, float vector[size], float factor)
+vector_scal_sse (unsigned int size, float vector[size], float factor)
 {
   unsigned int n_iterations = size / 4;
 
@@ -85,11 +85,11 @@ vector_scal_sse (size_t size, float vector[size], float factor)
 /* The OpenCL programs, loaded from `main'.  */
 static struct starpu_opencl_program cl_programs;
 
-static void vector_scal_opencl (size_t size, float vector[size], float factor)
+static void vector_scal_opencl (unsigned int size, float vector[size], float factor)
   __attribute__ ((task_implementation ("opencl", vector_scal)));
 
 static void
-vector_scal_opencl (size_t size, float vector[size], float factor)
+vector_scal_opencl (unsigned int size, float vector[size], float factor)
 {
   int id, devid, err;
   cl_kernel kernel;
@@ -108,15 +108,13 @@ vector_scal_opencl (size_t size, float vector[size], float factor)
   if (err != CL_SUCCESS)
     STARPU_OPENCL_REPORT_ERROR (err);
 
-  /* XXX : clSetKernelArg will not work with a size_t ... */
-  int _size = size;
   err = clSetKernelArg (kernel, 0, sizeof (val), &val);
-  err |= clSetKernelArg (kernel, 1, sizeof (_size), &_size);
+  err |= clSetKernelArg (kernel, 1, sizeof (size), &size);
   err |= clSetKernelArg (kernel, 2, sizeof (factor), &factor);
   if (err)
     STARPU_OPENCL_REPORT_ERROR (err);
 
-  size_t global = _size, local = 1;
+  size_t global = 1, local = 1;
   err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &global, &local, 0,
 				NULL, &event);
   if (err != CL_SUCCESS)
@@ -137,7 +135,7 @@ vector_scal_opencl (size_t size, float vector[size], float factor)
 /* Declaration of the CUDA implementation.  The definition itself is in the
    `.cu' file itself.  */
 
-extern void vector_scal_cuda (size_t size, float vector[size], float factor)
+extern void vector_scal_cuda (unsigned int size, float vector[size], float factor)
   __attribute__ ((task_implementation ("cuda", vector_scal)));
 
 #endif

+ 1 - 1
gcc-plugin/examples/vector_scal/vector_scal_opencl_kernel.cl

@@ -28,7 +28,7 @@
  * OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
+__kernel void vector_mult_opencl(__global float* val, unsigned int nx, float factor)
 {
         const int i = get_global_id(0);
         if (i < nx) {

+ 168 - 28
gcc-plugin/src/starpu.c

@@ -30,6 +30,7 @@ int plugin_is_GPL_compatible;
 #include <cpplib.h>
 #include <tree.h>
 #include <tree-iterator.h>
+#include <langhooks.h>
 
 #ifdef HAVE_C_FAMILY_C_COMMON_H
 # include <c-family/c-common.h>
@@ -86,7 +87,7 @@ static const char heap_allocated_orig_type_attribute_name[] =
 static const char codelet_struct_name[] = "starpu_codelet_gcc";
 
 /* Cached function declarations.  */
-static tree unpack_fn;
+static tree unpack_fn, data_lookup_fn;
 
 
 /* Forward declarations.  */
@@ -329,14 +330,9 @@ static tree build_error_statements (location_t, tree, const char *, ...)
 static tree
 build_error_statements (location_t loc, tree error_var, const char *fmt, ...)
 {
-  gcc_assert (TREE_CODE (error_var) == VAR_DECL
-	      && TREE_TYPE (error_var) == integer_type_node);
-
-  static tree strerror_fn;
-  LOOKUP_STARPU_FUNCTION (strerror_fn, "strerror");
-
   expanded_location xloc = expand_location (loc);
 
+  tree print;
   char *str, *fmt_long;
   va_list args;
 
@@ -346,18 +342,44 @@ build_error_statements (location_t loc, tree error_var, const char *fmt, ...)
      to be done in two steps.  */
 
   vasprintf (&str, fmt, args);
-  asprintf (&fmt_long, "%s:%d: error: %s: %%s\n",
-	    xloc.file, xloc.line, str);
 
-  tree error_code =
-    build1 (NEGATE_EXPR, TREE_TYPE (error_var), error_var);
-  tree print =
-    build_call_expr (built_in_decls[BUILT_IN_PRINTF], 2,
-		     build_string_literal (strlen (fmt_long) + 1, fmt_long),
-		     build_call_expr (strerror_fn, 1, error_code));
+  if (error_var != NULL_TREE)
+    {
+      /* ERROR_VAR is an error code.  */
+
+      static tree strerror_fn;
+      LOOKUP_STARPU_FUNCTION (strerror_fn, "strerror");
+
+      gcc_assert (TREE_CODE (error_var) == VAR_DECL
+		  && TREE_TYPE (error_var) == integer_type_node);
+
+      asprintf (&fmt_long, "%s:%d: error: %s: %%s\n",
+		xloc.file, xloc.line, str);
+
+      tree error_code =
+	build1 (NEGATE_EXPR, TREE_TYPE (error_var), error_var);
+      print =
+	build_call_expr (built_in_decls[BUILT_IN_PRINTF], 2,
+			 build_string_literal (strlen (fmt_long) + 1,
+					       fmt_long),
+			 build_call_expr (strerror_fn, 1, error_code));
+    }
+  else
+    {
+      /* No error code provided.  */
+
+      asprintf (&fmt_long, "%s:%d: error: %s\n",
+		xloc.file, xloc.line, str);
+
+      print =
+	build_call_expr (built_in_decls[BUILT_IN_PUTS], 1,
+			 build_string_literal (strlen (fmt_long) + 1,
+					       fmt_long));
+    }
 
   free (fmt_long);
   free (str);
+  va_end (args);
 
   tree stmts = NULL;
   append_to_statement_list (print, &stmts);
@@ -969,10 +991,95 @@ handle_task_attribute (tree *node, tree name, tree args,
 
   /* Lookup & cache function declarations for later reuse.  */
   LOOKUP_STARPU_FUNCTION (unpack_fn, "starpu_codelet_unpack_args");
+  LOOKUP_STARPU_FUNCTION (data_lookup_fn, "starpu_data_lookup");
 
   return NULL_TREE;
 }
 
+/* Diagnose use of C types that are either nonexistent or different in
+   OpenCL.  */
+
+static void
+validate_opencl_argument_type (location_t loc, const_tree type)
+{
+  /* When TYPE is a pointer type, get to the base element type.  */
+  for (; POINTER_TYPE_P (type); type = TREE_TYPE (type));
+
+  if (!RECORD_OR_UNION_TYPE_P (type) && !VOID_TYPE_P (type))
+    {
+      tree decl = TYPE_NAME (type);
+
+      if (DECL_P (decl))
+	{
+	  static const struct { const char *c; const char *cl; }
+	  type_map[] =
+	    {
+	      { "char", "cl_char" },
+	      { "unsigned char", "cl_uchar" },
+	      { "short int", "cl_short" },
+	      { "unsigned short", "cl_ushort" },
+	      { "int", "cl_int" },
+	      { "unsigned int", "cl_uint" },
+	      { "long int", "cl_long" },
+	      { "long unsigned int", "cl_ulong" },
+	      { "float", "cl_float" },
+	      { "double", "cl_double" },
+	      { NULL, NULL }
+	    };
+
+	  const char *c_name = IDENTIFIER_POINTER (DECL_NAME (decl));
+	  const char *cl_name =
+	    ({
+	      size_t i;
+	      for (i = 0; type_map[i].c != NULL; i++)
+		{
+		  if (strcmp (type_map[i].c, c_name) == 0)
+		    break;
+		}
+	      type_map[i].cl;
+	    });
+
+	  if (cl_name != NULL)
+	    {
+	      tree cl_type = lookup_name (get_identifier (cl_name));
+
+	      if (cl_type != NULL_TREE)
+		{
+		  if (DECL_P (cl_type))
+		    cl_type = TREE_TYPE (cl_type);
+
+		  if (!lang_hooks.types_compatible_p ((tree) type, cl_type))
+		    {
+		      tree st, sclt;
+
+		      st = c_common_signed_type ((tree) type);
+		      sclt = c_common_signed_type (cl_type);
+
+		      if (st == sclt)
+			warning_at (loc, 0, "C type %qE differs in signedness "
+				    "from the same-named OpenCL type",
+				    DECL_NAME (decl));
+		      else
+			/* TYPE should be avoided because the it differs from
+			   CL_TYPE, and thus cannot be used safely in
+			   `clSetKernelArg'.  */
+			warning_at (loc, 0, "C type %qE differs from the "
+				    "same-named OpenCL type",
+				    DECL_NAME (decl));
+		    }
+		}
+
+	      /* Otherwise we can't conclude.  It could be that <CL/cl.h>
+		 wasn't included in the program, for instance.  */
+	    }
+	  else
+	    /* Recommend against use of `size_t', etc.  */
+	    warning_at (loc, 0, "%qE does not correspond to a known "
+			"OpenCL type", DECL_NAME (decl));
+	}
+    }
+}
+
 /* Handle the `task_implementation (WHERE, TASK)' attribute.  WHERE is a
    string constant ("cpu", "cuda", etc.), and TASK is the identifier of a
    function declared with the `task' attribute.  */
@@ -1037,6 +1144,15 @@ handle_task_implementation_attribute (tree *node, tree name, tree args,
 	warning_at (loc, 0,
 		    "unsupported target %E; task implementation won't be used",
 		    where);
+      else if (task_implementation_target_to_int (where) == STARPU_OPENCL)
+	{
+	  void validate (tree t)
+	  {
+	    validate_opencl_argument_type (loc, t);
+	  }
+
+	  for_each (validate, TYPE_ARG_TYPES (TREE_TYPE (fn)));
+	}
 
       /* Keep the attribute.  */
       *no_add_attrs = false;
@@ -1930,7 +2046,9 @@ handle_pre_genericize (void *gcc_data, void *user_data)
 			      TYPE_ARG_TYPES (TREE_TYPE (task))));
 
 	  /* Build its body.  */
+	  current_function_decl = task;
 	  define_task (task);
+	  current_function_decl = fn;
 
 	  /* Compile TASK's body.  */
 	  rest_of_decl_compilation (task, true, 0);
@@ -1947,23 +2065,45 @@ handle_pre_genericize (void *gcc_data, void *user_data)
 static tree
 build_pointer_lookup (tree pointer)
 {
-#if 0
-  gimple emit_error_message (void)
-  {
-    static const char msg[] =
-      "starpu: task called with unregistered pointer, aborting\n";
+  /* Make sure DATA_LOOKUP_FN is valid.  */
+  LOOKUP_STARPU_FUNCTION (data_lookup_fn, "starpu_data_lookup");
 
-    return gimple_build_call (built_in_decls[BUILT_IN_PUTS], 1,
-			      build_string_literal (strlen (msg) + 1, msg));
-  }
-#endif
+  location_t loc;
 
-  static tree data_lookup_fn;
-  LOOKUP_STARPU_FUNCTION (data_lookup_fn, "starpu_data_lookup");
+  if (DECL_P (pointer))
+    loc = DECL_SOURCE_LOCATION (pointer);
+  else
+    loc = UNKNOWN_LOCATION;
+
+  /* Introduce a local variable to hold the handle.  */
+
+  tree result_var = build_decl (loc, VAR_DECL,
+  				create_tmp_var_name (".data_lookup_result"),
+  				ptr_type_node);
+  DECL_CONTEXT (result_var) = current_function_decl;
+  DECL_ARTIFICIAL (result_var) = true;
+  DECL_SOURCE_LOCATION (result_var) = loc;
+
+  tree call = build_call_expr (data_lookup_fn, 1, pointer);
+  tree assignment = build2 (INIT_EXPR, TREE_TYPE (result_var),
+  			    result_var, call);
 
-  return build_call_expr (data_lookup_fn, 1, pointer);
+  /* Build `if (RESULT_VAR == NULL) error ();'.  */
+
+  tree cond = build3 (COND_EXPR, void_type_node,
+		      build2 (EQ_EXPR, boolean_type_node,
+			      result_var, null_pointer_node),
+		      build_error_statements (loc, NULL_TREE,
+					      "attempt to use unregistered "
+					      "pointer"),
+		      NULL_TREE);
+
+  tree stmts = NULL;
+  append_to_statement_list (assignment, &stmts);
+  append_to_statement_list (cond, &stmts);
+  append_to_statement_list (result_var, &stmts);
 
-  /* FIXME: Add `if (VAR == NULL) abort ();'.  */
+  return build4 (TARGET_EXPR, ptr_type_node, result_var, stmts, NULL_TREE, NULL_TREE);
 }
 
 /* Build the body of TASK_DECL, which will call `starpu_insert_task'.  */

+ 1 - 0
gcc-plugin/tests/Makefile.am

@@ -36,6 +36,7 @@ gcc_tests =					\
   heap-allocated-errors.c			\
   verbose.c					\
   debug-tree.c					\
+  opencl-types.c				\
   shutdown-errors.c
 
 dist_noinst_HEADERS = mocks.h

+ 7 - 7
gcc-plugin/tests/base.c

@@ -1,5 +1,5 @@
 /* GCC-StarPU
-   Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
+   Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
 
    GCC-StarPU is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,21 +21,21 @@
 
 /* The task under test.  */
 
-static void my_scalar_task (int x, char y, int z) __attribute__ ((task));
+static void my_scalar_task (int x, unsigned char y, int z) __attribute__ ((task));
 
-static void my_scalar_task_cpu (int, char, int)
+static void my_scalar_task_cpu (int, unsigned char, int)
   __attribute__ ((task_implementation ("cpu", my_scalar_task)));
-static void my_scalar_task_opencl (int, char, int)
+static void my_scalar_task_opencl (int, unsigned char, int)
   __attribute__ ((task_implementation ("opencl", my_scalar_task)));
 
 static void
-my_scalar_task_cpu (int x, char y, int z)
+my_scalar_task_cpu (int x, unsigned char y, int z)
 {
   printf ("%s: x = %i, y = %i, z = %i\n", __func__, x, (int) y, z);
 }
 
 static void
-my_scalar_task_opencl (int x, char y, int z)
+my_scalar_task_opencl (int x, unsigned char y, int z)
 {
   printf ("%s: x = %i, y = %i, z = %i\n", __func__, x, (int) y, z);
 }
@@ -84,7 +84,7 @@ main (int argc, char *argv[])
 #pragma starpu hello
 
   int x = 42, z = 99;
-  char y = 77;
+  unsigned char y = 77;
   long y_as_long_int = 77;
 
   struct insert_task_argument expected[] =

+ 24 - 0
gcc-plugin/tests/mocks.h

@@ -28,9 +28,33 @@
 
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdint.h>
 #include <string.h>
 #include <assert.h>
 #include <common/uthash.h>
+#include <stdint.h>
+
+
+/* Typedefs as found in <CL/cl_platform.h>.  */
+
+typedef int8_t         cl_char;
+typedef uint8_t        cl_uchar;
+typedef int16_t        cl_short;
+typedef uint16_t       cl_ushort;
+typedef int32_t        cl_int;
+typedef uint32_t       cl_uint;
+#ifdef BREAK_CL_LONG
+/* Make `cl_long' different from `long' for test purposes.  */
+typedef int16_t        cl_long;
+typedef uint16_t       cl_ulong;
+#else
+typedef int64_t        cl_long;
+typedef uint64_t       cl_ulong;
+#endif
+
+typedef uint16_t       cl_half;
+typedef float          cl_float;
+typedef double         cl_double;
 
 
 /* Stub used for testing purposes.  */

+ 127 - 0
gcc-plugin/tests/opencl-types.c

@@ -0,0 +1,127 @@
+/* GCC-StarPU
+   Copyright (C) 2012 Institut National de Recherche en Informatique et Automatique
+
+   GCC-StarPU is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   GCC-StarPU is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Make sure use of `size_t' as a task argument type is flagged.  */
+
+/* (instructions compile) */
+
+#undef NDEBUG
+
+/* Please gimme a broken `cl_long'!  */
+#define BREAK_CL_LONG
+
+#include <mocks.h>
+#include <unistd.h>
+
+
+/* Make sure `size_t' is flagged.  */
+
+static void my_task (size_t size, int x[size])
+  __attribute__ ((task));
+
+static void my_task_cpu (size_t size, int x[size])
+  __attribute__ ((task_implementation ("cpu", my_task)));
+static void my_task_opencl (size_t size, int x[size]) /* (warning "size_t.*not.*known OpenCL type") */
+  __attribute__ ((task_implementation ("opencl", my_task)));
+
+static void
+my_task_cpu (size_t size, int x[size])
+{
+}
+
+static void
+my_task_opencl (size_t size, int x[size])
+{
+}
+
+
+/* Make sure types that have the same name in C and OpenCL but are actually
+   different are flagged.  We assume `sizeof (long) == 4' here.  */
+
+static void my_long_task (unsigned long size, int x[size])
+  __attribute__ ((task));
+
+static void my_long_task_cpu (unsigned long size, int x[size])
+  __attribute__ ((task_implementation ("cpu", my_long_task)));
+static void my_long_task_opencl (unsigned long size,  /* (warning "differs from the same-named OpenCL type") */
+				 int x[size])
+  __attribute__ ((task_implementation ("opencl", my_long_task)));
+
+static void
+my_long_task_cpu (unsigned long size, int x[size])
+{
+}
+
+static void
+my_long_task_opencl (unsigned long size, int x[size])
+{
+}
+
+
+/* Same with a pointer-to-long.  */
+
+static void my_long_ptr_task (unsigned long *p)
+  __attribute__ ((task));
+
+static void my_long_ptr_task_cpu (unsigned long *p)
+  __attribute__ ((task_implementation ("cpu", my_long_ptr_task)));
+static void my_long_ptr_task_opencl (unsigned long *p) /* (warning "differs from the same-named OpenCL type") */
+  __attribute__ ((task_implementation ("opencl", my_long_ptr_task)));
+
+static void
+my_long_ptr_task_cpu (unsigned long *p)
+{
+}
+
+static void
+my_long_ptr_task_opencl (unsigned long *p)
+{
+}
+
+
+/* Same with an array of unsigned chars.  */
+
+static void my_uchar_task (char c[])
+  __attribute__ ((task));
+
+static void my_uchar_task_cpu (char c[])
+  __attribute__ ((task_implementation ("cpu", my_uchar_task)));
+static void my_uchar_task_opencl (char c[]) /* (warning "differs in signedness from the same-named OpenCL type") */
+  __attribute__ ((task_implementation ("opencl", my_uchar_task)));
+
+static void
+my_uchar_task_cpu (char c[])
+{
+}
+
+static void
+my_uchar_task_opencl (char c[])
+{
+}
+
+
+/* No OpenCL, no problems.  */
+
+static void my_cool_task (size_t size, long long x[size])
+  __attribute__ ((task));
+
+static void my_cool_task_cpu (size_t size, long long x[size])
+  __attribute__ ((task_implementation ("cpu", my_cool_task)));
+
+static void
+my_cool_task_cpu (size_t size, long long x[size])
+{
+}

+ 26 - 16
gcc-plugin/tests/output-pointer.c

@@ -1,5 +1,5 @@
 /* GCC-StarPU
-   Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
+   Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
 
    GCC-StarPU is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -22,46 +22,46 @@
 
 /* The tasks under test.  */
 
-static void my_pointer_task (size_t size, __output int *x)
+static void my_pointer_task (int size, __output int *x)
   __attribute__ ((task));
 
-static void my_pointer_task_cpu (size_t size, __output int *x)
+static void my_pointer_task_cpu (int size, __output int *x)
   __attribute__ ((task_implementation ("cpu", my_pointer_task)));
-static void my_pointer_task_opencl (size_t size, __output int *x)
+static void my_pointer_task_opencl (int size, __output int *x)
   __attribute__ ((task_implementation ("opencl", my_pointer_task)));
 
 static void
-my_pointer_task_cpu (size_t size, __output int *x)
+my_pointer_task_cpu (int size, __output int *x)
 {
-  printf ("%s: x = %p, size = %zi\n", __func__, x, size);
+  printf ("%s: x = %p, size = %i\n", __func__, x, size);
 }
 
 static void
-my_pointer_task_opencl (size_t size, int *x)
+my_pointer_task_opencl (int size, int *x)
 {
-  printf ("%s: x = %p, size = %zi\n", __func__, x, size);
+  printf ("%s: x = %p, size = %i\n", __func__, x, size);
 }
 
 
 
-static void my_array_task (size_t size, __output int x[size])
+static void my_array_task (int size, __output int x[size])
   __attribute__ ((task));
 
-static void my_array_task_cpu (size_t size, __output int x[size])
+static void my_array_task_cpu (int size, __output int x[size])
   __attribute__ ((task_implementation ("cpu", my_array_task)));
-static void my_array_task_opencl (size_t size, __output int x[size])
+static void my_array_task_opencl (int size, __output int x[size])
   __attribute__ ((task_implementation ("opencl", my_array_task)));
 
 static void
-my_array_task_cpu (size_t size, __output int x[size])
+my_array_task_cpu (int size, __output int x[size])
 {
-  printf ("%s: x = %p, size = %zi\n", __func__, x, size);
+  printf ("%s: x = %p, size = %i\n", __func__, x, size);
 }
 
 static void
-my_array_task_opencl (size_t size, __output int x[size])
+my_array_task_opencl (int size, __output int x[size])
 {
-  printf ("%s: x = %p, size = %zi\n", __func__, x, size);
+  printf ("%s: x = %p, size = %i\n", __func__, x, size);
 }
 
 
@@ -71,9 +71,19 @@ main (int argc, char *argv[])
 {
 #pragma starpu initialize
 
-  size_t size = 42;
+  int size = 42;
   int x[size];
 
+  /* Register X (don't use the pragma, to avoid mixing concerns in this
+     test.)  */
+
+  starpu_data_handle_t handle;
+
+  expected_register_arguments.pointer = x;
+  expected_register_arguments.elements = 42;
+  expected_register_arguments.element_size = sizeof x[0];
+  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 42, sizeof x[0]);
+
   struct insert_task_argument expected[] =
     {
       { STARPU_VALUE, &size, sizeof size },

+ 28 - 13
gcc-plugin/tests/pointers.c

@@ -1,5 +1,5 @@
 /* GCC-StarPU
-   Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
+   Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
 
    GCC-StarPU is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,42 +21,42 @@
 
 /* The tasks under test.  */
 
-static void my_pointer_task (const int *x, long long *y) __attribute__ ((task));
+static void my_pointer_task (const int *x, short *y) __attribute__ ((task));
 
-static void my_pointer_task_cpu (const int *, long long *)
+static void my_pointer_task_cpu (const int *, short *)
   __attribute__ ((task_implementation ("cpu", my_pointer_task)));
-static void my_pointer_task_opencl (const int *, long long *)
+static void my_pointer_task_opencl (const int *, short *)
   __attribute__ ((task_implementation ("opencl", my_pointer_task)));
 
 static void
-my_pointer_task_cpu (const int *x, long long *y)
+my_pointer_task_cpu (const int *x, short *y)
 {
   printf ("%s: x = %p, y = %p\n", __func__, x, y);
 }
 
 static void
-my_pointer_task_opencl (const int *x, long long *y)
+my_pointer_task_opencl (const int *x, short *y)
 {
   printf ("%s: x = %p, y = %p\n", __func__, x, y);
 }
 
 
 
-static void my_mixed_task (int *x, char z, const long long *y)
+static void my_mixed_task (int *x, unsigned char z, const short *y)
   __attribute__ ((task));
-static void my_mixed_task_cpu (int *, char, const long long *)
+static void my_mixed_task_cpu (int *, unsigned char, const short *)
   __attribute__ ((task_implementation ("cpu", my_mixed_task)));
-static void my_mixed_task_opencl (int *, char, const long long *)
+static void my_mixed_task_opencl (int *, unsigned char, const short *)
   __attribute__ ((task_implementation ("opencl", my_mixed_task)));
 
 static void
-my_mixed_task_cpu (int *x, char z, const long long *y)
+my_mixed_task_cpu (int *x, unsigned char z, const short *y)
 {
   printf ("%s: x = %p, y = %p, z = %i\n", __func__, x, y, (int) z);
 }
 
 static void
-my_mixed_task_opencl (int *x, char z, const long long *y)
+my_mixed_task_opencl (int *x, unsigned char z, const short *y)
 {
   printf ("%s: x = %p, y = %p, z = %i\n", __func__, x, y, (int) z);
 }
@@ -68,13 +68,28 @@ main (int argc, char *argv[])
 {
 #pragma starpu initialize
 
-  static const char z = 0x77;
+  static const unsigned char z = 0x77;
   int x[] = { 42 };
-  long long *y;
+  short *y;
 
   y = malloc (sizeof *y);
   *y = 77;
 
+  /* Register X and Y (don't use the pragma, to avoid mixing concerns in this
+     test.)  */
+
+  starpu_data_handle_t handle;
+
+  expected_register_arguments.pointer = x;
+  expected_register_arguments.elements = 1;
+  expected_register_arguments.element_size = sizeof x[0];
+  starpu_vector_data_register (&handle, 0, (uintptr_t) x, 1, sizeof x[0]);
+
+  expected_register_arguments.pointer = y;
+  expected_register_arguments.elements = 1;
+  expected_register_arguments.element_size = sizeof *y;
+  starpu_vector_data_register (&handle, 0, (uintptr_t) y, 1, sizeof *y);
+
   struct insert_task_argument expected_pointer_task[] =
     {
       { STARPU_R,  x },

+ 2 - 1
include/starpu.h

@@ -44,6 +44,7 @@ typedef unsigned long long uint64_t;
 #include <starpu_task_list.h>
 #include <starpu_scheduler.h>
 #include <starpu_expert.h>
+#include <starpu_rand.h>
 
 #ifdef __cplusplus
 extern "C"
@@ -87,7 +88,7 @@ int starpu_conf_init(struct starpu_conf *conf);
 /* Initialization method: it must be called prior to any other StarPU call
  * Default configuration is used if NULL is passed as argument.
  */
-int starpu_init(struct starpu_conf *conf);
+int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
 
 /* Shutdown method: note that statistics are only generated once StarPU is
  * shutdown */

+ 1 - 0
include/starpu_config.h.in

@@ -93,5 +93,6 @@ typedef ssize_t starpu_ssize_t;
 #endif
 
 #undef STARPU_SLOW_MACHINE
+#undef STARPU_USE_DRAND48
 
 #endif

+ 39 - 0
include/starpu_rand.h

@@ -0,0 +1,39 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_RAND_H__
+#define __STARPU_RAND_H__
+
+#include <stdlib.h>
+#include <starpu_config.h>
+
+#ifdef STARPU_USE_DRAND48
+typedef struct drand48_data starpu_drand48_data;
+#  define starpu_srand48(seed)				srand48(seed)
+#  define starpu_drand48()				drand48()
+#  define starpu_erand48(xsubi)				erand48(xsubi)
+#  define starpu_srand48_r(seed, buffer)		srand48_r(seed, buffer)
+#  define starpu_erand48_r(xsubi, buffer, result)	erand48_r(xsubi, buffer, result)
+#else
+typedef int starpu_drand48_data;
+#  define starpu_srand48(seed)				srand(seed)
+#  define starpu_drand48() 				(double)(rand()) / RAND_MAX
+#  define starpu_erand48(xsubi)				starpu_drand48()
+#  define starpu_srand48_r(seed, buffer) 		srand((unsigned int)seed)
+#  define starpu_erand48_r(xsubi, buffer, result)	do {*(result) = ((double)(rand()) / RAND_MAX);} while (0)
+#endif
+
+#endif /* __STARPU_RAND_H__ */

+ 9 - 3
include/starpu_scheduler.h

@@ -240,18 +240,24 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node);
 
 /* Return the current date */
 double starpu_timing_now(void);
-/* Returns expected task duration in µs */
+/* Returns expected task duration in µs */
 double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
 /* Returns an estimated speedup factor relative to CPU speed */
 double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype);
-/* Returns expected data transfer time in µs */
+/* Returns expected data transfer time in µs */
 double starpu_task_expected_data_transfer_time(uint32_t memory_node, struct starpu_task *task);
-/* Predict the transfer time (in µs) to move a handle to a memory node */
+/* Predict the transfer time (in µs) to move a handle to a memory node */
 double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_access_mode mode);
 /* Returns expected power consumption in J */
 double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
 /* Returns expected conversion time in ms (multiformat interface only) */
 double starpu_task_expected_conversion_time(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
+/* Return the expected duration of the entire task bundle in µs. */
+double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl);
+/* Return the time (in µs) expected to transfer all data used within the bundle */
+double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node);
+/* Return the expected power consumption of the entire task bundle in J. */
+double starpu_task_bundle_expected_power(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl);
 
 #ifdef __cplusplus
 }

+ 2 - 2
include/starpu_task.h

@@ -300,14 +300,14 @@ struct starpu_task *starpu_task_create(void);
  * structure (default behaviour). Calling this function on a statically
  * allocated task results in an undefined behaviour. */
 void starpu_task_destroy(struct starpu_task *task);
-int starpu_task_submit(struct starpu_task *task);
+int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
 
 /* This function blocks until the task was executed. It is not possible to
  * synchronize with a task more than once. It is not possible to wait
  * synchronous or detached tasks.
  * Upon successful completion, this function returns 0. Otherwise, -EINVAL
  * indicates that the waited task was either synchronous or detached. */
-int starpu_task_wait(struct starpu_task *task);
+int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
 
 /* This function waits until all the tasks that were already submitted have
  * been executed. */

+ 3 - 1
socl/examples/basic/basic.c

@@ -149,8 +149,10 @@ int main(int UNUSED(argc), char** UNUSED(argv)) {
    err = clEnqueueReadBuffer(cq, dm, CL_FALSE, 0, REALSIZE, d, 0, NULL, &eventR);
    check(err, "clEnqueueReadBuffer");
 
+   printf("Finishing queue...\n");
    clFinish(cq);
 
+   printf("Data...\n");
    {
       int i;
       for (i=0; i<SIZE; i++) {
@@ -175,7 +177,7 @@ int main(int UNUSED(argc), char** UNUSED(argv)) {
    DURATION(eventR, "result buffer reading");
 #endif
 
-   
+
    printf("Releasing events...\n");
    err = clReleaseEvent(eventW1);
    err |= clReleaseEvent(eventW2);

+ 7 - 7
src/Makefile.am

@@ -152,18 +152,18 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
 	core/sched_ctx.c					\
 	core/priorities.c					\
 	core/parallel_task.c					\
+	sched_policies/eager_central_policy.c			\
+	sched_policies/eager_central_priority_policy.c		\
+	sched_policies/work_stealing_policy.c			\
+	sched_policies/deque_modeling_policy_data_aware.c	\
 	sched_policies/heft.c					\
+	sched_policies/random_policy.c				\
 	sched_policies/stack_queues.c				\
 	sched_policies/deque_queues.c				\
 	sched_policies/fifo_queues.c				\
-	sched_policies/eager_central_policy.c			\
-	sched_policies/deque_modeling_policy_data_aware.c	\
-	sched_policies/eager_central_priority_policy.c		\
-	sched_policies/random_policy.c				\
-	sched_policies/work_stealing_policy.c			\
-	sched_policies/parallel_greedy.c			\
-	sched_policies/parallel_heft.c				\
 	sched_policies/detect_combined_workers.c		\
+	sched_policies/parallel_heft.c				\
+	sched_policies/parallel_greedy.c			\
 	drivers/driver_common/driver_common.c			\
 	datawizard/memory_nodes.c				\
 	datawizard/write_back.c					\

+ 2 - 1
src/core/dependencies/implicit_data_deps.c

@@ -494,7 +494,8 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_a
 		/* TODO detect if this is superflous */
 		int ret = _starpu_task_submit_internally(sync_task);
 		STARPU_ASSERT(!ret);
-		starpu_task_wait(sync_task);
+		ret = starpu_task_wait(sync_task);
+		STARPU_ASSERT(ret == 0);
 	}
 	else
 	{

+ 19 - 18
src/core/sched_policy.c

@@ -36,30 +36,31 @@ int starpu_get_prefetch_flag(void)
  *	Predefined policies
  */
 
-/* extern struct starpu_sched_policy _starpu_sched_ws_policy; */
-/* extern struct starpu_sched_policy _starpu_sched_prio_policy; */
-/* extern struct starpu_sched_policy _starpu_sched_random_policy; */
+extern struct starpu_sched_policy _starpu_sched_ws_policy;
+extern struct starpu_sched_policy _starpu_sched_prio_policy;
+extern struct starpu_sched_policy _starpu_sched_random_policy;
 extern struct starpu_sched_policy _starpu_sched_dm_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy;
-/* extern struct starpu_sched_policy _starpu_sched_eager_policy; */
-/* extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; */
-/* extern struct starpu_sched_policy _starpu_sched_pgreedy_policy; */
+extern struct starpu_sched_policy _starpu_sched_eager_policy;
+extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
+extern struct starpu_sched_policy _starpu_sched_pgreedy_policy;
 extern struct starpu_sched_policy heft_policy;
 
-static struct starpu_sched_policy *predefined_policies[] = {
-	/* &_starpu_sched_ws_policy, */
-	/* &_starpu_sched_prio_policy, */
-	/* &_starpu_sched_dm_policy, */
-	/* &_starpu_sched_dmda_policy, */
-	&heft_policy
-	/* &_starpu_sched_dmda_ready_policy, */
-	/* &_starpu_sched_dmda_sorted_policy, */
-	/* &_starpu_sched_random_policy, */
-	/* &_starpu_sched_eager_policy, */
-	/* &_starpu_sched_parallel_heft_policy, */
-	/* &_starpu_sched_pgreedy_policy */
+static struct starpu_sched_policy *predefined_policies[] =
+{
+	&_starpu_sched_ws_policy,
+	&_starpu_sched_prio_policy,
+	&_starpu_sched_dm_policy,
+	&_starpu_sched_dmda_policy,
+	&heft_policy,
+	&_starpu_sched_dmda_ready_policy,
+	&_starpu_sched_dmda_sorted_policy,
+	&_starpu_sched_random_policy,
+	&_starpu_sched_eager_policy,
+	&_starpu_sched_parallel_heft_policy,
+	&_starpu_sched_pgreedy_policy
 };
 
 struct starpu_sched_policy *_starpu_get_sched_policy(struct _starpu_sched_ctx *sched_ctx)

+ 2 - 8
src/datawizard/data_request.c

@@ -28,8 +28,6 @@ static pthread_mutex_t data_requests_list_mutex[STARPU_MAXNODES];
 static struct _starpu_data_request_list *data_requests_pending[STARPU_MAXNODES];
 static pthread_mutex_t data_requests_pending_list_mutex[STARPU_MAXNODES];
 
-int starpu_memstrategy_drop_prefetch[STARPU_MAXNODES];
-
 void _starpu_init_data_request_lists(void)
 {
 	unsigned i;
@@ -41,8 +39,6 @@ void _starpu_init_data_request_lists(void)
 
 		data_requests_pending[i] = _starpu_data_request_list_new();
 		_STARPU_PTHREAD_MUTEX_INIT(&data_requests_pending_list_mutex[i], NULL);
-
-		starpu_memstrategy_drop_prefetch[i]=0;
 	}
 }
 
@@ -434,8 +430,6 @@ void _starpu_handle_node_data_requests(uint32_t src_node, unsigned may_alloc)
 
 void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc)
 {
-	starpu_memstrategy_drop_prefetch[src_node]=0;
-
 	struct _starpu_data_request *r;
 	struct _starpu_data_request_list *new_data_requests;
 	struct _starpu_data_request_list *new_prefetch_requests;
@@ -472,7 +466,6 @@ void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc
 		res = starpu_handle_data_request(r, may_alloc);
 		if (res == -ENOMEM )
 		{
-			starpu_memstrategy_drop_prefetch[src_node]=1;
 			if (r->prefetch)
 				_starpu_data_request_list_push_back(new_prefetch_requests, r);
 			else
@@ -484,12 +477,13 @@ void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc
 		}
 	}
 
-	while(!_starpu_data_request_list_empty(local_list) && starpu_memstrategy_drop_prefetch[src_node])
+	while(!_starpu_data_request_list_empty(local_list))
 	{
 		r = _starpu_data_request_list_pop_front(local_list);
 		if (r->prefetch)
 			_starpu_data_request_list_push_back(new_prefetch_requests, r);
 		else
+			/* Prefetch request promoted while in tmp list*/
 			_starpu_data_request_list_push_back(new_data_requests, r);
 	}
 

+ 2 - 1
src/debug/traces/starpu_fxt.c

@@ -1104,7 +1104,8 @@ void starpu_fxt_distrib_file_init(struct starpu_fxt_options *options)
 	{
 		distrib_time = fopen(options->distrib_time_path, "w+");
 	}
-	else {
+	else
+	{
 		distrib_time = NULL;
 	}
 }

+ 15 - 6
src/drivers/opencl/driver_opencl_utils.c

@@ -54,21 +54,24 @@ int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_
 	{
 		sprintf(located_file_name, "%s/%s", _starpu_opencl_program_dir, source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
-		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
+		if (access(located_file_name, R_OK) == 0)
+			ret = EXIT_SUCCESS;
 	}
 
 	if (ret == EXIT_FAILURE)
 	{
 		sprintf(located_file_name, "%s/%s", _STARPU_STRINGIFY(STARPU_OPENCL_DATADIR), source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
-		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
+		if (access(located_file_name, R_OK) == 0)
+			ret = EXIT_SUCCESS;
 	}
 
 	if (ret == EXIT_FAILURE)
 	{
 		sprintf(located_file_name, "%s/%s", STARPU_SRC_DIR, source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
-		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
+		if (access(located_file_name, R_OK) == 0)
+			ret = EXIT_SUCCESS;
 	}
 
 	if (ret == EXIT_FAILURE)
@@ -80,7 +83,11 @@ int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_
 	else
 	{
 		char *last = strrchr(located_file_name, '/');
-		if (!last) strcpy(located_dir_name, "");
+
+		if (!last)
+		{
+			strcpy(located_dir_name, "");
+		}
 		else
 		{
 			sprintf(located_dir_name, "%s", located_file_name);
@@ -112,7 +119,8 @@ cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, str
 
         // Create the compute kernel in the program we wish to run
         *kernel = clCreateKernel(program, kernel_name, &err);
-	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
 
 	return CL_SUCCESS;
 }
@@ -122,7 +130,8 @@ cl_int starpu_opencl_release_kernel(cl_kernel kernel)
 	cl_int err;
 
 	err = clReleaseKernel(kernel);
-	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
 
         return CL_SUCCESS;
 }

+ 1 - 1
starpu-top/StarPU-Top-common.pri

@@ -5,7 +5,7 @@ QT += network
 QT += opengl
 QT += sql
 
-TARGET = StarPU-Top
+TARGET = starpu_top
 TEMPLATE = app
 SOURCES += $$SRCDIR/main.cpp \
 #STARPU-TOP

+ 1 - 1
starpu-top/StarPU-Top.pro.in

@@ -1,3 +1,3 @@
 SRCDIR=@srcdir@
 include ($$SRCDIR/StarPU-Top-common.pri)
-include ($$SRCDIR/StarPU-Top-qwt-@QWT_PRI@.pri)
+include (StarPU-Top-qwt-@QWT_PRI@.pri)

+ 29 - 0
starpu-top/config.h.in

@@ -0,0 +1,29 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_TOP_CONFIG_H__
+#define __STARPU_TOP_CONFIG_H__
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Major version number of StarPU. */
+#undef STARPU_MAJOR_VERSION
+
+/* Minor version number of StarPU. */
+#undef STARPU_MINOR_VERSION
+
+#endif /* __STARPU_TOP_CONFIG_H__ */

+ 53 - 10
starpu-top/main.cpp

@@ -1,7 +1,7 @@
 /*
 = StarPU-Top for StarPU =
 
-Copyright (C) 2011 
+Copyright (C) 2011
 William Braik
 Yann Courtois
 Jean-Marie Couteyen
@@ -25,19 +25,62 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <QtGui/QApplication>
 #include "mainwindow.h"
+#include <string.h>
+#include <config.h>
+
+#define PROGNAME "starpu_top"
+
+static void parse_args(int argc, char **argv)
+{
+	if (argc == 1)
+		return;
+
+	if (argc > 2 || /* Argc should be either 1 or 2 */
+	    strncmp(argv[1], "--help", 6) == 0 ||
+	    strncmp(argv[1], "-h", 2) == 0)
+	{
+		(void) fprintf(stderr, "\
+starpu-top is an interface which remotely displays the        \n\
+on-line state of a StarPU application and permits the user    \n\
+to change parameters on the fly.                              \n\
+                                                              \n\
+Usage: %s [OPTION]                                            \n\
+                                                              \n\
+Options:                                                      \n\
+	-h, --help       display this help and exit           \n\
+	-v, --version    output version information and exit  \n\
+                                                              \n\
+Report bugs to <" PACKAGE_BUGREPORT ">.",
+PROGNAME);
+	}
+	else if (strncmp(argv[1], "--version", 9) == 0 ||
+		 strncmp(argv[1], "-v", 2) == 0)
+	{
+		(void) fprintf(stderr, "%s %d.%d\n",
+			PROGNAME, STARPU_MAJOR_VERSION, STARPU_MINOR_VERSION);
+	}
+	else
+	{
+		fprintf(stderr, "Unknown arg %s\n", argv[1]);
+	}
+
+	exit(EXIT_FAILURE);
+}
 
 int main(int argc, char *argv[])
 {
-    QApplication a(argc, argv);
+	parse_args(argc, argv);
+
+	QApplication a(argc, argv);
 
-    // Application description
-    QCoreApplication::setOrganizationName("INRIA-Bordeaux");
-    QCoreApplication::setOrganizationDomain("runtime.bordeaux.inria.fr");
-    QCoreApplication::setApplicationName("StarPU-Top");
-    QCoreApplication::setApplicationVersion("0.1");
+	// Application description
+	QCoreApplication::setOrganizationName("INRIA Bordeaux Sud-Ouest");
+	QCoreApplication::setOrganizationDomain("runtime.bordeaux.inria.fr");
+	QCoreApplication::setApplicationName("StarPU-Top");
+	QCoreApplication::setApplicationVersion("0.1");
 
-    MainWindow w;
-    w.show();
+	MainWindow w;
+	w.show();
 
-    return a.exec();
+	return a.exec();
 }

+ 61 - 1
tests/Makefile.am

@@ -24,9 +24,12 @@ EXTRA_DIST =					\
 	helper.h				\
 	datawizard/scal.h			\
 	microbenchs/null_kernel_gordon.c	\
+	datawizard/scratch_opencl_kernel.cl     \
 	datawizard/sync_and_notify_data_gordon_kernels.c \
 	datawizard/sync_and_notify_data_opencl_codelet.cl\
+	datawizard/opencl_codelet_unsigned_inc_kernel.cl \
 	coverage/coverage.sh			\
+	datawizard/acquire_release_opencl_kernel.cl \
 	datawizard/interfaces/test_interfaces.h	\
 	datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \
 	datawizard/interfaces/matrix/matrix_opencl_kernel.cl \
@@ -37,7 +40,8 @@ EXTRA_DIST =					\
 	datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \
 	datawizard/interfaces/multiformat/advanced/generic.h \
 	datawizard/interfaces/csr/csr_opencl_kernel.cl \
-	datawizard/interfaces/block/block_opencl_kernel.cl
+	datawizard/interfaces/block/block_opencl_kernel.cl \
+	perfmodels/opencl_memset_kernel.cl
 
 CLEANFILES = 					\
 	*.gcno *.gcda *.linkinfo		\
@@ -186,6 +190,8 @@ noinst_PROGRAMS =				\
 	datawizard/in_place_partition   	\
 	datawizard/partition_lazy		\
 	datawizard/gpu_register   		\
+	datawizard/wt_host			\
+	datawizard/wt_broadcast			\
 	errorcheck/starpu_init_noworker		\
 	errorcheck/invalid_blocking_calls	\
 	errorcheck/invalid_tasks		\
@@ -223,6 +229,12 @@ if STARPU_USE_CUDA
 datawizard_acquire_release_SOURCES +=		\
 	datawizard/acquire_release_cuda.cu
 endif
+if STARPU_USE_OPENCL
+datawizard_acquire_release_SOURCES +=		\
+	datawizard/acquire_release_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	datawizard/acquire_release_opencl_kernel.cl
+endif
 
 datawizard_acquire_release2_SOURCES =		\
 	datawizard/acquire_release2.c
@@ -230,6 +242,10 @@ if STARPU_USE_CUDA
 datawizard_acquire_release2_SOURCES +=		\
 	datawizard/acquire_release_cuda.cu
 endif
+if STARPU_USE_OPENCL
+datawizard_acquire_release2_SOURCES +=		\
+	datawizard/acquire_release_opencl.c
+endif
 
 datawizard_scratch_SOURCES =			\
 	datawizard/scratch.c
@@ -237,6 +253,12 @@ if STARPU_USE_CUDA
 datawizard_scratch_SOURCES +=		\
 	datawizard/scratch_cuda.cu
 endif
+if STARPU_USE_OPENCL
+datawizard_scratch_SOURCES += \
+	datawizard/scratch_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	datawizard/scratch_opencl_kernel.cl
+endif
 
 datawizard_mpi_like_SOURCES =		\
 	datawizard/mpi_like.c
@@ -244,6 +266,12 @@ if STARPU_USE_CUDA
 datawizard_mpi_like_SOURCES +=			\
 	datawizard/cuda_codelet_unsigned_inc.cu
 endif
+if STARPU_USE_OPENCL
+datawizard_mpi_like_SOURCES +=			\
+	datawizard/opencl_codelet_unsigned_inc.c
+nobase_STARPU_OPENCL_DATA_DATA+= \
+	datawizard/opencl_codelet_unsigned_inc_kernel.cl
+endif
 
 datawizard_mpi_like_async_SOURCES =		\
 	datawizard/mpi_like_async.c
@@ -251,6 +279,12 @@ if STARPU_USE_CUDA
 datawizard_mpi_like_async_SOURCES +=		\
 	datawizard/cuda_codelet_unsigned_inc.cu
 endif
+if STARPU_USE_OPENCL
+datawizard_mpi_like_async_SOURCES +=			\
+	datawizard/opencl_codelet_unsigned_inc.c
+nobase_STARPU_OPENCL_DATA_DATA+= \
+	datawizard/opencl_codelet_unsigned_inc_kernel.cl
+endif
 
 datawizard_sync_and_notify_data_SOURCES =	\
 	datawizard/sync_and_notify_data.c
@@ -312,6 +346,11 @@ datawizard_gpu_register_SOURCES +=	\
 	datawizard/scal_opencl.cl
 endif
 
+datawizard_wt_host_SOURCES =			\
+	datawizard/wt_host.c
+datawizard_wt_broadcast_SOURCES =		\
+	datawizard/wt_broadcast.c
+
 if STARPU_USE_GORDON
 datawizard_sync_and_notify_data_SOURCES +=	\
 	datawizard/sync_and_notify_data_gordon_kernels.c
@@ -483,5 +522,26 @@ datawizard_interfaces_void_void_interface_SOURCES=\
 	datawizard/interfaces/test_interfaces.c        \
 	datawizard/interfaces/void/void_interface.c
 
+
+perfmodels_regression_based_SOURCES=\
+	perfmodels/regression_based.c
+
+if STARPU_USE_OPENCL
+perfmodels_regression_based_SOURCES+=\
+	perfmodels/opencl_memset.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	perfmodels/opencl_memset_kernel.cl
+endif
+
+perfmodels_non_linear_regression_based_SOURCES=\
+	perfmodels/non_linear_regression_based.c
+
+if STARPU_USE_OPENCL
+perfmodels_non_linear_regression_based_SOURCES+=\
+	perfmodels/opencl_memset.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	perfmodels/opencl_memset_kernel.cl
+endif
+
 showcheck:
 	-cat $(TEST_LOGS) /dev/null

+ 23 - 1
tests/datawizard/acquire_release.c

@@ -17,6 +17,9 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include "../helper.h"
 
 #ifdef STARPU_SLOW_MACHINE
@@ -28,6 +31,9 @@ static unsigned ntasks = 10000;
 #ifdef STARPU_USE_CUDA
 extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
+#ifdef STARPU_USE_OPENCL
+extern void increment_opencl(void *buffers[], void *args);
+#endif
 
 void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 {
@@ -40,11 +46,13 @@ void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 static struct starpu_codelet increment_cl =
 {
 	.modes = { STARPU_RW },
-        .where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {increment_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {increment_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {increment_opencl, NULL},
+#endif
 	.nbuffers = 1
 };
 
@@ -67,6 +75,9 @@ void callback(void *arg __attribute__ ((unused)))
         starpu_data_release(token_handle);
 }
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
 int main(int argc, char **argv)
 {
 	int i;
@@ -76,6 +87,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/acquire_release_opencl_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
 	starpu_variable_data_register(&token_handle, 0, (uintptr_t)&token, sizeof(unsigned));
 
         FPRINTF(stderr, "Token: %u\n", token);
@@ -99,6 +115,9 @@ int main(int argc, char **argv)
 
 	starpu_data_unregister(token_handle);
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
         FPRINTF(stderr, "Token: %u\n", token);
@@ -113,6 +132,9 @@ enodev:
 	fprintf(stderr, "WARNING: No one can execute this task\n");
 	/* yes, we do not perform the computation but we did detect that no one
  	 * could perform the kernel, so this is not an error from StarPU */
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 	return STARPU_TEST_SKIPPED;
 }

+ 25 - 1
tests/datawizard/acquire_release2.c

@@ -16,6 +16,10 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
+
 #include "../helper.h"
 
 static unsigned ntasks = 40000;
@@ -23,6 +27,9 @@ static unsigned ntasks = 40000;
 #ifdef STARPU_USE_CUDA
 extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
+#ifdef STARPU_USE_OPENCL
+extern void increment_opencl(void *buffers[], void *args);
+#endif
 
 void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 {
@@ -35,11 +42,13 @@ void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 static struct starpu_codelet increment_cl =
 {
 	.modes = { STARPU_RW },
-        .where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {increment_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {increment_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {increment_opencl, NULL},
+#endif
 	.nbuffers = 1
 };
 
@@ -64,6 +73,9 @@ void callback(void *arg __attribute__ ((unused)))
 #  warning TODO add threads
 #endif
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
 int main(int argc, char **argv)
 {
 	int i;
@@ -73,6 +85,12 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/acquire_release_opencl_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
 	starpu_variable_data_register(&token_handle, 0, (uintptr_t)&token, sizeof(unsigned));
 
         FPRINTF(stderr, "Token: %u\n", token);
@@ -96,6 +114,9 @@ int main(int argc, char **argv)
 
 	starpu_data_unregister(token_handle);
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
         FPRINTF(stderr, "Token: %u\n", token);
@@ -110,6 +131,9 @@ enodev:
 	fprintf(stderr, "WARNING: No one can execute this task\n");
 	/* yes, we do not perform the computation but we did detect that no one
  	 * could perform the kernel, so this is not an error from StarPU */
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 	return STARPU_TEST_SKIPPED;
 }

+ 67 - 0
tests/datawizard/acquire_release_opencl.c

@@ -0,0 +1,67 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program opencl_program;
+void increment_opencl(void *buffers[], void *args)
+{
+	(void) args;
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_increment_opencl_codelet", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=1;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 20 - 0
tests/datawizard/acquire_release_opencl_kernel.cl

@@ -0,0 +1,20 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+__kernel void _increment_opencl_codelet(__global unsigned *val)
+{
+	val[0]++;
+}

+ 3 - 3
tests/datawizard/data_implicit_deps.c

@@ -40,9 +40,9 @@ static void f(void *descr[], __attribute__ ((unused)) void *_args)
 static struct starpu_codelet cl_f =
 {
 	.modes = { STARPU_R, STARPU_RW },
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {f, NULL},
 	.cuda_funcs = {f, NULL},
+	.opencl_funcs = {f, NULL},
 	.nbuffers = 2
 };
 
@@ -57,9 +57,9 @@ static void g(void *descr[], __attribute__ ((unused)) void *_args)
 static struct starpu_codelet cl_g =
 {
 	.modes = { STARPU_R, STARPU_RW },
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {g, NULL},
 	.cuda_funcs = {g, NULL},
+	.opencl_funcs = {g, NULL},
 	.nbuffers = 2
 };
 
@@ -74,9 +74,9 @@ static void h(void *descr[], __attribute__ ((unused)) void *_args)
 static struct starpu_codelet cl_h =
 {
 	.modes = { STARPU_R, STARPU_RW },
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {h, NULL},
 	.cuda_funcs = {h, NULL},
+	.opencl_funcs = {h, NULL},
 	.nbuffers = 2
 };
 

+ 36 - 2
tests/datawizard/data_invalidation.c

@@ -21,6 +21,9 @@
 #include <errno.h>
 #include <starpu.h>
 #include <starpu_cuda.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <stdlib.h>
 #include "../helper.h"
 
@@ -50,6 +53,35 @@ static void cuda_memset_codelet(void *descr[], __attribute__ ((unused)) void *_a
 }
 #endif
 
+#ifdef STARPU_USE_OPENCL
+static void opencl_memset_codelet(void *buffers[], void *args)
+{
+	(void) args;
+
+	cl_command_queue queue;
+	int id = starpu_worker_get_id();
+	int devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_queue(devid, &queue);
+
+	cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	unsigned length = STARPU_VECTOR_GET_NX(buffers[0]);
+	char *v = malloc(length);
+	STARPU_ASSERT(v != NULL);
+	memset(v, 42, length);
+
+	clEnqueueWriteBuffer(queue,
+			buffer,
+			CL_TRUE,
+			0,      /* offset */
+			length, /* sizeof (char) */
+			v,
+			0,      /* num_events_in_wait_list */
+			NULL,   /* event_wait_list */
+			NULL    /* event */);
+			
+}
+#endif /* !STARPU_USE_OPENCL */
+
 static void cpu_memset_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	STARPU_SKIP_IF_VALGRIND;
@@ -57,16 +89,18 @@ static void cpu_memset_codelet(void *descr[], __attribute__ ((unused)) void *_ar
 	char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned length = STARPU_VECTOR_GET_NX(descr[0]);
 
-	memset(buf, 42, length);
+	memset(buf, 42, length * sizeof(*buf));
 }
 
 static struct starpu_codelet memset_cl =
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {cpu_memset_codelet, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_memset_codelet, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_memset_codelet, NULL},
+#endif
 	.nbuffers = 1,
 	.modes = {STARPU_W}
 };

+ 34 - 1
tests/datawizard/handle_to_pointer.c

@@ -18,6 +18,9 @@
 #include <assert.h>
 
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <stdlib.h>
 #include "../helper.h"
 
@@ -53,13 +56,43 @@ static void cuda_task(void **buffers, void *args)
 }
 #endif
 
+#ifdef STARPU_USE_OPENCL
+static void opencl_task(void *buffers[], void *args)
+{
+	cl_command_queue queue;
+	int id = starpu_worker_get_id();
+	int devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_queue(devid, &queue);
+
+	cl_mem numbers = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	unsigned size = STARPU_VECTOR_GET_NX(buffers[0]);
+
+	unsigned i;
+	for (i = 0; i < size; i++)
+	{
+		clEnqueueWriteBuffer(queue,
+				numbers,
+				CL_TRUE,
+				i*sizeof(int),  /* offset */
+				sizeof(int),
+				&i,
+				0,              /* num_events_in_wait_list */
+				NULL,           /* event_wait_list */
+				NULL            /* event */);
+	}
+			
+}
+#endif
+
 static struct starpu_codelet cl =
 {
-	.where = STARPU_CPU | STARPU_CUDA,
 	.cpu_funcs = {cpu_task, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_task, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_task, NULL},
+#endif
 	.nbuffers = 1,
 	.modes = {STARPU_W}
 };

+ 2 - 1
tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c

@@ -67,7 +67,8 @@ create_and_submit(int where)
 	/* We need to be sure the data has been copied to the GPU at the end 
 	 * of this function */
 	task->synchronous = 1;
-	starpu_task_submit(task);
+	if (starpu_task_submit(task) == -ENODEV)
+		exit(STARPU_TEST_SKIPPED);
 }
 #endif
 

+ 75 - 3
tests/datawizard/lazy_allocation.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2012       inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +20,9 @@
 #include <unistd.h>
 #include <errno.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <starpu_cuda.h>
 #include <stdlib.h>
 #include "../helper.h"
@@ -44,6 +48,35 @@ static void cuda_memset_codelet(void *descr[], __attribute__ ((unused)) void *_a
 }
 #endif
 
+#ifdef STARPU_USE_OPENCL
+static void opencl_memset_codelet(void *buffers[], void *args)
+{
+	(void) args;
+
+	cl_command_queue queue;
+	int id = starpu_worker_get_id();
+	int devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_queue(devid, &queue);
+
+	cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	unsigned length = STARPU_VECTOR_GET_NX(buffers[0]);
+	char *v = malloc(length);
+	STARPU_ASSERT(v != NULL);
+	memset(v, 42, length);
+
+	clEnqueueWriteBuffer(queue,
+			buffer,
+			CL_TRUE,
+			0,      /* offset */
+			length, /* sizeof (char) */
+			v,
+			0,      /* num_events_in_wait_list */
+			NULL,   /* event_wait_list */
+			NULL    /* event */);
+			
+}
+#endif
+
 static void cpu_memset_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	STARPU_SKIP_IF_VALGRIND;
@@ -51,16 +84,18 @@ static void cpu_memset_codelet(void *descr[], __attribute__ ((unused)) void *_ar
 	char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned length = STARPU_VECTOR_GET_NX(descr[0]);
 
-	memset(buf, 42, length);
+	memset(buf, 42, length * sizeof(*buf));
 }
 
 static struct starpu_codelet memset_cl =
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {cpu_memset_codelet, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_memset_codelet, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_memset_codelet, NULL},
+#endif
 	.nbuffers = 1,
 	.modes = {STARPU_W}
 };
@@ -108,14 +143,51 @@ static void cuda_check_content_codelet(void *descr[], __attribute__ ((unused)) v
 	}
 }
 #endif
+#ifdef STARPU_USE_OPENCL
+static void opencl_check_content_codelet(void *buffers[], void *args)
+{
+	STARPU_SKIP_IF_VALGRIND;
+
+	cl_command_queue queue;
+	int id = starpu_worker_get_id();
+	int devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_queue(devid, &queue);
+
+	cl_mem buf = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	unsigned length = STARPU_VECTOR_GET_NX(buffers[0]);
+
+	unsigned i;
+	for (i = 0; i < length; i++)
+	{
+		char dst;
+		clEnqueueReadBuffer(
+			queue,
+			buf,
+			CL_TRUE,
+			i * sizeof(dst),
+			sizeof(dst),
+			&dst,
+			0,      /* num_events_in_wait_list */
+			NULL,   /* event_wait_list */
+			NULL    /* event */);
+		if (dst != 42)
+		{
+			FPRINTF(stderr, "buf[%u] is %c while it should be %c\n", i, dst, 42);
+			exit(-1);
+		}
+	}
+}
+#endif
 
 static struct starpu_codelet check_content_cl =
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {cpu_check_content_codelet, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_check_content_codelet, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_check_content_codelet, NULL},
+#endif
 	.nbuffers = 1,
 	.modes = {STARPU_R}
 };

+ 21 - 1
tests/datawizard/mpi_like.c

@@ -17,6 +17,9 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <errno.h>
 #include <pthread.h>
 #include "../helper.h"
@@ -49,6 +52,9 @@ static struct thread_data problem_data[NTHREADS];
 #ifdef STARPU_USE_CUDA
 void cuda_codelet_unsigned_inc(void *descr[], __attribute__ ((unused)) void *cl_arg);
 #endif
+#ifdef STARPU_USE_OPENCL
+void opencl_codelet_unsigned_inc(void *buffers[], void *args);
+#endif
 
 static void increment_handle_cpu_kernel(void *descr[], void *cl_arg __attribute__((unused)))
 {
@@ -61,11 +67,13 @@ static void increment_handle_cpu_kernel(void *descr[], void *cl_arg __attribute_
 static struct starpu_codelet increment_handle_cl =
 {
 	.modes = { STARPU_RW },
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {increment_handle_cpu_kernel, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_codelet_unsigned_inc, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_codelet_unsigned_inc, NULL},
+#endif
 	.nbuffers = 1
 };
 
@@ -163,6 +171,10 @@ static void *thread_func(void *arg)
 	return NULL;
 }
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret;
@@ -171,6 +183,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/opencl_codelet_unsigned_inc_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
 	unsigned t;
 	for (t = 0; t < NTHREADS; t++)
 	{
@@ -206,6 +223,9 @@ int main(int argc, char **argv)
 		starpu_data_unregister(problem_data[t].handle);
 	}
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
 	ret = EXIT_SUCCESS;

+ 23 - 1
tests/datawizard/mpi_like_async.c

@@ -17,6 +17,9 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <pthread.h>
 #include "../helper.h"
 
@@ -65,6 +68,9 @@ static struct thread_data problem_data[NTHREADS_DEFAULT];
 #ifdef STARPU_USE_CUDA
 void cuda_codelet_unsigned_inc(void *descr[], __attribute__ ((unused)) void *cl_arg);
 #endif
+#ifdef STARPU_USE_OPENCL
+void opencl_codelet_unsigned_inc(void *buffers[], void *args);
+#endif
 
 static void increment_handle_cpu_kernel(void *descr[], void *cl_arg __attribute__((unused)))
 {
@@ -79,11 +85,13 @@ static void increment_handle_cpu_kernel(void *descr[], void *cl_arg __attribute_
 static struct starpu_codelet increment_handle_cl =
 {
 	.modes = { STARPU_RW },
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {increment_handle_cpu_kernel, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_codelet_unsigned_inc, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = { opencl_codelet_unsigned_inc, NULL},
+#endif
 	.nbuffers = 1
 };
 
@@ -295,6 +303,10 @@ static void *thread_func(void *arg)
 	return NULL;
 }
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret;
@@ -308,6 +320,13 @@ int main(int argc, char **argv)
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/opencl_codelet_unsigned_inc_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
 	/* Create a thread to perform blocking calls */
 	pthread_t progress_thread;
 	_STARPU_PTHREAD_MUTEX_INIT(&data_req_mutex, NULL);
@@ -370,6 +389,9 @@ int main(int argc, char **argv)
 		starpu_data_unregister(problem_data[t].handle);
 	}
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
 	STARPU_RETURN(ret);

+ 69 - 0
tests/datawizard/opencl_codelet_unsigned_inc.c

@@ -0,0 +1,69 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program opencl_program;
+
+void opencl_codelet_unsigned_inc(void *buffers[], void *args)
+{
+	(void) args;
+
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	cl_mem val = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_opencl_unsigned_inc", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=1;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 20 - 0
tests/datawizard/opencl_codelet_unsigned_inc_kernel.cl

@@ -0,0 +1,20 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+__kernel void _opencl_unsigned_inc(__global unsigned int *val)
+{
+	val[0]++;
+}

+ 24 - 1
tests/datawizard/scratch.c

@@ -20,6 +20,9 @@
 #include <unistd.h>
 #include <errno.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include <stdlib.h>
 #include "../helper.h"
 
@@ -34,6 +37,9 @@ starpu_data_handle_t A_handle, B_handle;
 #ifdef STARPU_USE_CUDA
 extern void cuda_f(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
+#ifdef STARPU_USE_OPENCL
+extern void opencl_f(void *buffers[], void *args);
+#endif
 
 static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
 {
@@ -56,15 +62,21 @@ static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
 
 static struct starpu_codelet cl_f =
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {cpu_f, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cuda_f, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {opencl_f, NULL},
+#endif
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_SCRATCH}
 };
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret;
@@ -73,6 +85,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
 	A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned));
 
 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, VECTORSIZE, sizeof(unsigned));
@@ -96,6 +113,9 @@ int main(int argc, char **argv)
 
 	starpu_data_unregister(A_handle);
 	starpu_data_unregister(B_handle);
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
 	/* Check result */
@@ -116,6 +136,9 @@ int main(int argc, char **argv)
 enodev:
 	starpu_data_unregister(A_handle);
 	starpu_data_unregister(B_handle);
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 	/* yes, we do not perform the computation but we did detect that no one
  	 * could perform the kernel, so this is not an error from StarPU */

+ 72 - 0
tests/datawizard/scratch_opencl.c

@@ -0,0 +1,72 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program opencl_program;
+
+void opencl_f(void *buffers[], void *args)
+{
+	(void) args;
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	cl_mem tmp = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "increment_vector_opencl", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
+	err|= clSetKernelArg(kernel, 1, sizeof(tmp), &tmp);
+	err|= clSetKernelArg(kernel, 2, sizeof(n), &n);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 29 - 0
tests/datawizard/scratch_opencl_kernel.cl

@@ -0,0 +1,29 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+__kernel void increment_vector_opencl(__global unsigned *val,
+				      __global unsigned *tmp,
+				      unsigned nx)
+{
+        const int tid = get_global_id(0);
+	const uint nthreads = get_local_size(0);
+
+	int i;
+	for (i = tid; i < nx; i += nthreads)
+	{
+		val[i] = tmp[i] + 1;
+	}
+}

+ 1 - 1
tests/datawizard/unpartition.c

@@ -109,7 +109,7 @@ int main(int argc, char **argv)
 		starpu_data_unpartition(v_handle, 0);
 
 		tasks[2] = create_task(v_handle);
-		starpu_task_submit(tasks[2]);
+		ret = starpu_task_submit(tasks[2]);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 

+ 1 - 1
tests/opt/datawizard/wt_broadcast.c

@@ -16,7 +16,7 @@
 
 #include <config.h>
 #include <starpu.h>
-#include "../../helper.h"
+#include "../helper.h"
 
 #ifdef STARPU_USE_CUDA
 #include <starpu_cuda.h>

+ 1 - 1
tests/opt/datawizard/wt_host.c

@@ -16,7 +16,7 @@
 
 #include <config.h>
 #include <starpu.h>
-#include "../../helper.h"
+#include "../helper.h"
 
 #ifdef STARPU_USE_CUDA
 #include <starpu_cuda.h>

+ 1 - 1
tests/main/declare_deps_after_submission.c

@@ -30,9 +30,9 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static struct starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dummy_func, NULL},
 	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 1 - 1
tests/main/declare_deps_after_submission_synchronous.c

@@ -30,9 +30,9 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static struct starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dummy_func, NULL},
 	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 1 - 1
tests/main/declare_deps_in_callback.c

@@ -44,9 +44,9 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static struct starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dummy_func, NULL},
 	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 1 - 1
tests/main/get_current_task.c

@@ -42,9 +42,9 @@ static void check_task_callback(void *arg)
 
 static struct starpu_codelet dummy_cl =
 {
-	.where = STARPU_CUDA|STARPU_CPU,
 	.cuda_funcs = {check_task_func, NULL},
 	.cpu_funcs = {check_task_func, NULL},
+	.opencl_funcs = {check_task_func, NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 1 - 1
tests/main/wait_all_regenerable_tasks.c

@@ -42,9 +42,9 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static struct starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dummy_func, NULL},
 	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 0 - 48
tests/opt/Makefile.am

@@ -1,48 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009, 2010, 2011-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
-# Copyright (C) 2010, 2011  Institut National de Recherche en Informatique et Automatique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-AM_CFLAGS = $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
-LIBS = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) @LIBS@
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src
-AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
-
-optbindir = $(libdir)/starpu/tests
-optbin_PROGRAMS =
-
-SUBDIRS =
-
-TESTS = $(check_PROGRAMS)
-
-check_PROGRAMS =
-
-check_PROGRAMS += 				\
-	datawizard/wt_host			\
-	datawizard/wt_broadcast
-
-optbin_PROGRAMS +=				\
-	datawizard/wt_host
-datawizard_wt_host_SOURCES =			\
-	datawizard/wt_host.c
-
-optbin_PROGRAMS +=				\
-	datawizard/wt_broadcast
-datawizard_wt_broadcast_SOURCES =		\
-	datawizard/wt_broadcast.c
-
-
-showcheck:
-	-cat $(TEST_LOGS) /dev/null

+ 27 - 3
tests/perfmodels/non_linear_regression_based.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2011  Université de Bordeaux 1
  * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2012 inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,6 +18,9 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include "../helper.h"
 
 #ifdef STARPU_USE_CUDA
@@ -27,7 +31,7 @@ static void memset_cuda(void *descr[], void *arg)
 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
 
-	cudaMemset(ptr, 42, n);
+	cudaMemset(ptr, 42, n * sizeof(*ptr));
 	cudaThreadSynchronize();
 }
 #endif
@@ -39,7 +43,7 @@ static void memset_cpu(void *descr[], void *arg)
 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
 
-	memset(ptr, 42, n);
+	memset(ptr, 42, n * sizeof(*ptr));
 }
 
 static struct starpu_perfmodel model =
@@ -48,12 +52,18 @@ static struct starpu_perfmodel model =
 	.symbol = "non_linear_memset_regression_based"
 };
 
+#ifdef STARPU_USE_OPENCL
+extern void memset_opencl(void *buffers[], void *args);
+#endif
+
 static struct starpu_codelet memset_cl =
 {
-	.where = STARPU_CUDA|STARPU_CPU,
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {memset_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {memset_opencl, NULL},
+#endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.model = &model,
 	.nbuffers = 1,
@@ -84,6 +94,10 @@ static void test_memset(int nelems)
 	starpu_data_unregister(handle);
 }
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret;
@@ -98,6 +112,12 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
 	int slog;
 	for (slog = 8; slog < 25; slog++)
 	{
@@ -105,6 +125,10 @@ int main(int argc, char **argv)
 		test_memset(size);
 	}
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
+
 	starpu_shutdown();
 
 	return EXIT_SUCCESS;

+ 69 - 0
tests/perfmodels/opencl_memset.c

@@ -0,0 +1,69 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program opencl_program;
+
+void memset_opencl(void *buffers[], void *args)
+{
+	(void) *args;
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_memset_opencl", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 22 - 0
tests/perfmodels/opencl_memset_kernel.cl

@@ -0,0 +1,22 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+__kernel void _memset_opencl(__global int *val, int nx)
+{
+        const int i = get_global_id(0);
+        if (i < nx)
+                val[i] = 42;
+}

+ 29 - 4
tests/perfmodels/regression_based.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2011-2012  Université de Bordeaux 1
  * Copyright (C) 2011  Télécom-SudParis
+ * Copyright (C) 2012 inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,6 +18,9 @@
 
 #include <config.h>
 #include <starpu.h>
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 #include "../helper.h"
 
 #ifdef STARPU_USE_CUDA
@@ -27,11 +31,15 @@ static void memset_cuda(void *descr[], void *arg)
 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
 
-	cudaMemset(ptr, 42, n);
+	cudaMemset(ptr, 42, n * sizeof(*ptr));
 	cudaThreadSynchronize();
 }
 #endif
 
+#ifdef STARPU_USE_OPENCL
+extern void memset_opencl(void *buffers[], void *args);
+#endif
+
 static void memset_cpu(void *descr[], void *arg)
 {
 	STARPU_SKIP_IF_VALGRIND;
@@ -39,7 +47,7 @@ static void memset_cpu(void *descr[], void *arg)
 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
 
-	memset(ptr, 42, n);
+	memset(ptr, 42, n * sizeof(*ptr));
 }
 
 static struct starpu_perfmodel model =
@@ -56,10 +64,12 @@ static struct starpu_perfmodel nl_model =
 
 static struct starpu_codelet memset_cl =
 {
-	.where = STARPU_CUDA|STARPU_CPU,
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {memset_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {memset_opencl, NULL},
+#endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.model = &model,
 	.nbuffers = 1,
@@ -68,10 +78,12 @@ static struct starpu_codelet memset_cl =
 
 static struct starpu_codelet nl_memset_cl =
 {
-	.where = STARPU_CUDA|STARPU_CPU,
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {memset_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {memset_opencl, NULL},
+#endif
 	.cpu_funcs = {memset_cpu, NULL},
 	.model = &nl_model,
 	.nbuffers = 1,
@@ -118,6 +130,10 @@ static void show_task_perfs(int size, struct starpu_task *task)
 	}
 }
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	struct starpu_conf conf;
@@ -133,6 +149,12 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
 	int size;
 	for (size = 1024; size < 16777216; size *= 2)
 	{
@@ -167,6 +189,9 @@ int main(int argc, char **argv)
 
 	starpu_data_unregister(handle);
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 
 	return EXIT_SUCCESS;

+ 3 - 1
tools/starpu_machine_display.c

@@ -112,7 +112,9 @@ int main(int argc, char **argv)
 {
 	parse_args(argc, argv);
 
-	starpu_init(NULL);
+	/* Even if starpu_init returns -ENODEV, we should go on : we will just
+	 * print that we found no device. */
+	(void) starpu_init(NULL);
 
 	unsigned ncpu = starpu_cpu_worker_get_count();
 	unsigned ncuda = starpu_cuda_worker_get_count();