Browse Source

merge trunk@6596:6650

Nathalie Furmento 12 years ago
parent
commit
0df511aa30
66 changed files with 597 additions and 155 deletions
  1. 15 0
      STARPU-VERSION
  2. 4 0
      configure.ac
  3. 6 3
      doc/chapters/basic-api.texi
  4. 2 2
      examples/Makefile.am
  5. 1 1
      examples/lu/clu.c
  6. 1 1
      examples/lu/clu_implicit.c
  7. 1 1
      examples/lu/clu_implicit_pivot.c
  8. 1 1
      examples/lu/clu_kernels.c
  9. 1 1
      examples/lu/clu_pivot.c
  10. 1 1
      examples/lu/dlu.c
  11. 1 1
      examples/lu/dlu_implicit.c
  12. 1 1
      examples/lu/dlu_implicit_pivot.c
  13. 1 1
      examples/lu/dlu_kernels.c
  14. 1 1
      examples/lu/dlu_pivot.c
  15. 0 0
      examples/lu/lu-double.h
  16. 0 0
      examples/lu/lu-float.h
  17. 1 1
      examples/lu/lu_example_complex_double.c
  18. 1 1
      examples/lu/lu_example_complex_float.c
  19. 1 1
      examples/lu/lu_example_double.c
  20. 1 1
      examples/lu/lu_example_float.c
  21. 1 1
      examples/lu/slu.c
  22. 1 1
      examples/lu/slu_implicit.c
  23. 1 1
      examples/lu/slu_implicit_pivot.c
  24. 1 1
      examples/lu/slu_kernels.c
  25. 1 1
      examples/lu/slu_pivot.c
  26. 1 1
      examples/lu/zlu.c
  27. 1 1
      examples/lu/zlu_implicit.c
  28. 1 1
      examples/lu/zlu_implicit_pivot.c
  29. 1 1
      examples/lu/zlu_kernels.c
  30. 1 1
      examples/lu/zlu_pivot.c
  31. 78 12
      gcc-plugin/src/starpu.c
  32. 51 1
      gcc-plugin/tests/mocks.h
  33. 4 1
      gcc-plugin/tests/opencl.c
  34. 2 2
      mpi/Makefile.am
  35. 0 0
      mpi/examples/mpi_lu/mpi_lu-double.h
  36. 0 0
      mpi/examples/mpi_lu/mpi_lu-float.h
  37. 1 1
      mpi/examples/mpi_lu/pdlu.c
  38. 1 1
      mpi/examples/mpi_lu/pdlu_kernels.c
  39. 1 1
      mpi/examples/mpi_lu/plu_example_double.c
  40. 1 1
      mpi/examples/mpi_lu/plu_example_float.c
  41. 1 1
      mpi/examples/mpi_lu/plu_solve_double.c
  42. 1 1
      mpi/examples/mpi_lu/plu_solve_float.c
  43. 1 1
      mpi/examples/mpi_lu/pslu.c
  44. 1 1
      mpi/examples/mpi_lu/pslu_kernels.c
  45. 1 1
      mpi/examples/mpi_lu/slu_kernels.c
  46. 23 23
      socl/examples/matmul/matmul.c
  47. 19 2
      src/common/fxt.c
  48. 38 36
      src/common/fxt.h
  49. 2 2
      src/core/task.c
  50. 43 4
      src/core/workers.c
  51. 1 0
      src/core/workers.h
  52. 14 14
      src/drivers/cpu/driver_cpu.c
  53. 2 2
      starpufft/Makefile.am
  54. 2 2
      starpufft/cuda_kernels.cu
  55. 2 2
      starpufft/cudaf_kernels.cu
  56. 2 2
      starpufft/examples/test.c
  57. 2 2
      starpufft/examples/test_threads.c
  58. 2 2
      starpufft/examples/testf.c
  59. 2 2
      starpufft/examples/testf_threads.c
  60. 0 0
      starpufft/starpufft-double.h
  61. 0 0
      starpufft/starpufft-float.h
  62. 2 2
      starpufft/starpufft.c
  63. 2 2
      starpufft/starpufftf.c
  64. 1 0
      tests/Makefile.am
  65. 239 0
      tests/main/driver_api/init_run_deinit.c
  66. 4 2
      tests/sched_policies/simple_cpu_gpu_sched.c

+ 15 - 0
STARPU-VERSION

@@ -2,6 +2,21 @@
 
 
 # Versioning (SONAMEs) for StarPU libraries.
 # Versioning (SONAMEs) for StarPU libraries.
 
 
+# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info
+# Here are a set of rules to help you update your library version information:
+# Start with version information of ‘0:0:0’ for each libtool library.
+# Update the version information only immediately before a public
+# release of your software. More frequent updates are unnecessary, and
+# only guarantee that the current interface number gets larger faster.
+# - If the library source code has changed at all since the last
+#   update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’).
+# - If any interfaces have been added, removed, or changed since the
+#   last update, increment current, and set revision to 0.
+# - If any interfaces have been added since the last public release,
+#   then increment age.
+# - If any interfaces have been removed or changed since the last
+#   public release, then set age to 0. change
+
 # Libtool interface versioning (info "(libtool) Versioning").
 # Libtool interface versioning (info "(libtool) Versioning").
 LIBSTARPU_INTERFACE_CURRENT=1	# increment upon ABI change
 LIBSTARPU_INTERFACE_CURRENT=1	# increment upon ABI change
 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change

+ 4 - 0
configure.ac

@@ -1559,6 +1559,10 @@ AS_IF([test "$have_valid_hwloc" = "yes"], [
 	])
 	])
 LDFLAGS="${SAVED_LDFLAGS}"
 LDFLAGS="${SAVED_LDFLAGS}"
 CPPFLAGS="${SAVED_CPPFLAGS}"
 CPPFLAGS="${SAVED_CPPFLAGS}"
+
+if test "$have_valid_hwloc" = "no" -a "$hwloc_dir" != "no" ; then
+   AC_MSG_ERROR([hwloc was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install hwloc. However, if you really want to use StarPU without enabling hwloc, please restart configure by specifying the option '--without-hwloc'.])
+fi
 AC_MSG_CHECKING(whether hwloc should be used)
 AC_MSG_CHECKING(whether hwloc should be used)
 AC_MSG_RESULT($have_valid_hwloc)
 AC_MSG_RESULT($have_valid_hwloc)
 AC_SUBST(HWLOC_REQUIRES)
 AC_SUBST(HWLOC_REQUIRES)

+ 6 - 3
doc/chapters/basic-api.texi

@@ -1343,11 +1343,13 @@ option when configuring StarPU.
 
 
 @item @code{struct starpu_perfmodel *model} (optional)
 @item @code{struct starpu_perfmodel *model} (optional)
 This is a pointer to the task duration performance model associated to this
 This is a pointer to the task duration performance model associated to this
-codelet. This optional field is ignored when set to @code{NULL}.
+codelet. This optional field is ignored when set to @code{NULL} or
+when its @code{symbol} field is not set.
 
 
 @item @code{struct starpu_perfmodel *power_model} (optional)
 @item @code{struct starpu_perfmodel *power_model} (optional)
 This is a pointer to the task power consumption performance model associated
 This is a pointer to the task power consumption performance model associated
-to this codelet. This optional field is ignored when set to @code{NULL}.
+to this codelet. This optional field is ignored when set to
+@code{NULL} or when its @code{symbol} field is not set.
 In the case of parallel codelets, this has to account for all processing units
 In the case of parallel codelets, this has to account for all processing units
 involved in the parallel execution.
 involved in the parallel execution.
 
 
@@ -1825,7 +1827,8 @@ archs will be determined by multiplying by an arch-specific factor.
 
 
 @item @code{const char *symbol}
 @item @code{const char *symbol}
 is the symbol name for the performance model, which will be used as
 is the symbol name for the performance model, which will be used as
-file name to store the model.
+file name to store the model. It must be set otherwise the model will
+be ignored.
 
 
 @item @code{double (*cost_model)(struct starpu_buffer_descr *)}
 @item @code{double (*cost_model)(struct starpu_buffer_descr *)}
 This field is deprecated. Use instead the @code{cost_function} field.
 This field is deprecated. Use instead the @code{cost_function} field.

+ 2 - 2
examples/Makefile.am

@@ -115,8 +115,8 @@ noinst_HEADERS = 				\
 	heat/dw_factolu.h			\
 	heat/dw_factolu.h			\
 	lu/xlu.h				\
 	lu/xlu.h				\
 	lu/xlu_kernels.h			\
 	lu/xlu_kernels.h			\
-	lu/float.h				\
-	lu/double.h				\
+	lu/lu-float.h				\
+	lu/lu-double.h				\
 	lu/complex_float.h			\
 	lu/complex_float.h			\
 	lu/complex_double.h			\
 	lu/complex_double.h			\
 	lu/blas_complex.h			\
 	lu/blas_complex.h			\

+ 1 - 1
examples/lu/clu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "xlu.c"
 #include "xlu.c"

+ 1 - 1
examples/lu/clu_implicit.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "xlu_implicit.c"
 #include "xlu_implicit.c"

+ 1 - 1
examples/lu/clu_implicit_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "xlu_implicit_pivot.c"
 #include "xlu_implicit_pivot.c"

+ 1 - 1
examples/lu/clu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "xlu_kernels.c"
 #include "xlu_kernels.c"

+ 1 - 1
examples/lu/clu_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "xlu_pivot.c"
 #include "xlu_pivot.c"

+ 1 - 1
examples/lu/dlu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "xlu.c"
 #include "xlu.c"

+ 1 - 1
examples/lu/dlu_implicit.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "xlu_implicit.c"
 #include "xlu_implicit.c"

+ 1 - 1
examples/lu/dlu_implicit_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "xlu_implicit_pivot.c"
 #include "xlu_implicit_pivot.c"

+ 1 - 1
examples/lu/dlu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "xlu_kernels.c"
 #include "xlu_kernels.c"

+ 1 - 1
examples/lu/dlu_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "xlu_pivot.c"
 #include "xlu_pivot.c"

examples/lu/double.h → examples/lu/lu-double.h


examples/lu/float.h → examples/lu/lu-float.h


+ 1 - 1
examples/lu/lu_example_complex_double.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "lu_example.c"
 #include "lu_example.c"

+ 1 - 1
examples/lu/lu_example_complex_float.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_float.h"
+#include "complex_-float.h"
 #include "lu_example.c"
 #include "lu_example.c"

+ 1 - 1
examples/lu/lu_example_double.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "lu-double.h"
 #include "lu_example.c"
 #include "lu_example.c"

+ 1 - 1
examples/lu/lu_example_float.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "lu_example.c"
 #include "lu_example.c"

+ 1 - 1
examples/lu/slu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "xlu.c"
 #include "xlu.c"

+ 1 - 1
examples/lu/slu_implicit.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "xlu_implicit.c"
 #include "xlu_implicit.c"

+ 1 - 1
examples/lu/slu_implicit_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "xlu_implicit_pivot.c"
 #include "xlu_implicit_pivot.c"

+ 1 - 1
examples/lu/slu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "xlu_kernels.c"
 #include "xlu_kernels.c"

+ 1 - 1
examples/lu/slu_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "lu-float.h"
 #include "xlu_pivot.c"
 #include "xlu_pivot.c"

+ 1 - 1
examples/lu/zlu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "xlu.c"
 #include "xlu.c"

+ 1 - 1
examples/lu/zlu_implicit.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "xlu_implicit.c"
 #include "xlu_implicit.c"

+ 1 - 1
examples/lu/zlu_implicit_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "xlu_implicit_pivot.c"
 #include "xlu_implicit_pivot.c"

+ 1 - 1
examples/lu/zlu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "xlu_kernels.c"
 #include "xlu_kernels.c"

+ 1 - 1
examples/lu/zlu_pivot.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "complex_double.h"
+#include "complex_-double.h"
 #include "xlu_pivot.c"
 #include "xlu_pivot.c"

+ 78 - 12
gcc-plugin/src/starpu.c

@@ -1187,6 +1187,18 @@ opencl_event_type (void)
   return t;
   return t;
 }
 }
 
 
+/* Return an expression that, given the OpenCL error code in ERROR_VAR,
+   returns a string.  */
+
+static tree
+build_opencl_error_string (tree error_var)
+{
+  static tree clstrerror_fn;
+  LOOKUP_STARPU_FUNCTION (clstrerror_fn, "starpu_opencl_error_string");
+
+  return build_call_expr (clstrerror_fn, 1, error_var);
+}
+
 /* Return an error-checking `clSetKernelArg' call for argument ARG, at
 /* Return an error-checking `clSetKernelArg' call for argument ARG, at
    index IDX, of KERNEL.  */
    index IDX, of KERNEL.  */
 
 
@@ -1198,14 +1210,8 @@ build_opencl_set_kernel_arg_call (location_t loc, tree fn,
   gcc_assert (TREE_CODE (fn) == FUNCTION_DECL
   gcc_assert (TREE_CODE (fn) == FUNCTION_DECL
 	      && TREE_TYPE (kernel) == opencl_kernel_type ());
 	      && TREE_TYPE (kernel) == opencl_kernel_type ());
 
 
-  static tree setkernarg_fn, clstrerror_fn;
+  static tree setkernarg_fn;
   LOOKUP_STARPU_FUNCTION (setkernarg_fn, "clSetKernelArg");
   LOOKUP_STARPU_FUNCTION (setkernarg_fn, "clSetKernelArg");
-  LOOKUP_STARPU_FUNCTION (clstrerror_fn, "starpu_opencl_error_string");
-
-  local_define (tree, build_errorstr, (tree error_var))
-  {
-    return build_call_expr (clstrerror_fn, 1, error_var);
-  };
 
 
   tree call = build_call_expr (setkernarg_fn, 4, kernel,
   tree call = build_call_expr (setkernarg_fn, 4, kernel,
 			       build_int_cst (integer_type_node, idx),
 			       build_int_cst (integer_type_node, idx),
@@ -1225,7 +1231,8 @@ build_opencl_set_kernel_arg_call (location_t loc, tree fn,
   cond = build3 (COND_EXPR, void_type_node,
   cond = build3 (COND_EXPR, void_type_node,
 		 build2 (NE_EXPR, boolean_type_node,
 		 build2 (NE_EXPR, boolean_type_node,
 			 error_var, integer_zero_node),
 			 error_var, integer_zero_node),
-		 build_error_statements (loc, error_var, build_errorstr,
+		 build_error_statements (loc, error_var,
+					 build_opencl_error_string,
 					 "failed to set OpenCL kernel "
 					 "failed to set OpenCL kernel "
 					 "argument %d", idx),
 					 "argument %d", idx),
 		 NULL_TREE);
 		 NULL_TREE);
@@ -1286,7 +1293,8 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
     /* No further warnings for this node.  */
     /* No further warnings for this node.  */
     TREE_NO_WARNING (task_impl) = true;
     TREE_NO_WARNING (task_impl) = true;
 
 
-  static tree load_fn, load_kern_fn, wid_fn, devid_fn;
+  static tree load_fn, load_kern_fn, enqueue_kern_fn, wid_fn, devid_fn, clfinish_fn,
+    collect_stats_fn, release_ev_fn;
 
 
   if (load_fn == NULL_TREE)
   if (load_fn == NULL_TREE)
     {
     {
@@ -1303,6 +1311,10 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
   LOOKUP_STARPU_FUNCTION (load_kern_fn, "starpu_opencl_load_kernel");
   LOOKUP_STARPU_FUNCTION (load_kern_fn, "starpu_opencl_load_kernel");
   LOOKUP_STARPU_FUNCTION (wid_fn, "starpu_worker_get_id");
   LOOKUP_STARPU_FUNCTION (wid_fn, "starpu_worker_get_id");
   LOOKUP_STARPU_FUNCTION (devid_fn, "starpu_worker_get_devid");
   LOOKUP_STARPU_FUNCTION (devid_fn, "starpu_worker_get_devid");
+  LOOKUP_STARPU_FUNCTION (enqueue_kern_fn, "clEnqueueNDRangeKernel");
+  LOOKUP_STARPU_FUNCTION (clfinish_fn, "clFinish");
+  LOOKUP_STARPU_FUNCTION (collect_stats_fn, "starpu_opencl_collect_stats");
+  LOOKUP_STARPU_FUNCTION (release_ev_fn, "clReleaseEvent");
 
 
   if (verbose_output_p)
   if (verbose_output_p)
     inform (loc, "defining %qE, with OpenCL kernel %qs from file %qs",
     inform (loc, "defining %qE, with OpenCL kernel %qs from file %qs",
@@ -1363,13 +1375,15 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 			       load_stmts);
 			       load_stmts);
 
 
       /* Local variables.  */
       /* Local variables.  */
-      tree kernel_var, queue_var, event_var, group_size_var, ngroups_var;
+      tree kernel_var, queue_var, event_var, group_size_var, ngroups_var,
+	error_var;
 
 
       kernel_var = local_var (opencl_kernel_type ());
       kernel_var = local_var (opencl_kernel_type ());
       queue_var = local_var (opencl_command_queue_type ());
       queue_var = local_var (opencl_command_queue_type ());
       event_var = local_var (opencl_event_type ());
       event_var = local_var (opencl_event_type ());
       group_size_var = local_var (size_type_node);
       group_size_var = local_var (size_type_node);
       ngroups_var = local_var (size_type_node);
       ngroups_var = local_var (size_type_node);
+      error_var = local_var (integer_type_node);
 
 
       /* Build `starpu_opencl_load_kernel (...)'.
       /* Build `starpu_opencl_load_kernel (...)'.
          TODO: Check return value.  */
          TODO: Check return value.  */
@@ -1385,8 +1399,43 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 					 TREE_STRING_POINTER (kernel)),
 					 TREE_STRING_POINTER (kernel)),
 					devid);
 					devid);
 
 
-      /* TODO: `clSetKernelArg', `clEnqueueNDRangeKernel', etc.  */
-
+      tree enqueue_kern =
+	build_call_expr (enqueue_kern_fn, 9,
+			 queue_var, kernel_var,
+			 build_int_cst (integer_type_node, 1),
+			 null_pointer_node,
+			 build_addr (group_size_var, task_impl),
+			 build_addr (ngroups_var, task_impl),
+			 integer_zero_node,
+			 null_pointer_node,
+			 build_addr (event_var, task_impl));
+      tree enqueue_err =
+	build2 (INIT_EXPR, TREE_TYPE (error_var), error_var, enqueue_kern);
+
+      tree enqueue_cond =
+	build3 (COND_EXPR, void_type_node,
+		build2 (NE_EXPR, boolean_type_node,
+			error_var, integer_zero_node),
+		build_error_statements (loc, error_var,
+					build_opencl_error_string,
+					"failed to enqueue kernel"),
+		NULL_TREE);
+
+      tree clfinish =
+	build_call_expr (clfinish_fn, 1, queue_var);
+
+      tree collect_stats =
+	build_call_expr (collect_stats_fn, 1, event_var);
+
+      tree release_ev =
+	build_call_expr (release_ev_fn, 1, event_var);
+
+      tree enqueue_stmts = NULL_TREE;
+      append_to_statement_list (enqueue_err, &enqueue_stmts);
+      append_to_statement_list (enqueue_cond, &enqueue_stmts);
+
+
+      /* TODO: Build `clFinish', `clReleaseEvent', & co.  */
       /* Put it all together.  */
       /* Put it all together.  */
       tree stmts = NULL_TREE;
       tree stmts = NULL_TREE;
       append_to_statement_list (load_cond, &stmts);
       append_to_statement_list (load_cond, &stmts);
@@ -1396,6 +1445,23 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 								   kernel_var),
 								   kernel_var),
 				&stmts);
 				&stmts);
 
 
+      /* TODO: Support user-provided values.  */
+      append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (group_size_var),
+					group_size_var,
+					build_int_cst (integer_type_node, 1)),
+				&stmts);
+      append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (ngroups_var),
+					ngroups_var,
+					build_int_cst (integer_type_node, 1)),
+				&stmts);
+      append_to_statement_list (build4 (TARGET_EXPR, void_type_node,
+					error_var, enqueue_stmts,
+					NULL_TREE, NULL_TREE),
+				&stmts);
+      append_to_statement_list (clfinish, &stmts);
+      append_to_statement_list (collect_stats, &stmts);
+      append_to_statement_list (release_ev, &stmts);
+
       /* Bind the local vars.  */
       /* Bind the local vars.  */
       tree vars = chain_trees (kernel_var, queue_var, event_var,
       tree vars = chain_trees (kernel_var, queue_var, event_var,
 			       group_size_var, ngroups_var, NULL_TREE);
 			       group_size_var, ngroups_var, NULL_TREE);

+ 51 - 1
gcc-plugin/tests/mocks.h

@@ -440,13 +440,25 @@ typedef int cl_command_queue;
 
 
 extern cl_int clSetKernelArg (cl_kernel, cl_uint, size_t, const void *);
 extern cl_int clSetKernelArg (cl_kernel, cl_uint, size_t, const void *);
 
 
+extern cl_int
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel        /* kernel */,
+                       cl_uint          /* work_dim */,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *   /* global_work_size */,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */);
+
 #endif
 #endif
 
 
 
 
 /* Number of `load_opencl_from_string', `load_kernel', and `clSetKernelArg'
 /* Number of `load_opencl_from_string', `load_kernel', and `clSetKernelArg'
    calls.  */
    calls.  */
 static unsigned int load_opencl_calls, load_opencl_kernel_calls,
 static unsigned int load_opencl_calls, load_opencl_kernel_calls,
-  opencl_set_kernel_arg_calls;
+  opencl_set_kernel_arg_calls, opencl_enqueue_calls, opencl_finish_calls,
+  opencl_collect_stats_calls, opencl_release_event_calls;
 
 
 struct load_opencl_arguments
 struct load_opencl_arguments
 {
 {
@@ -528,6 +540,44 @@ clSetKernelArg (cl_kernel kernel, cl_uint index, size_t size,
   return 0;
   return 0;
 }
 }
 
 
+cl_int
+clEnqueueNDRangeKernel(cl_command_queue command_queue,
+                       cl_kernel        kernel,
+                       cl_uint          work_dim,
+                       const size_t *   global_work_offset,
+                       const size_t *   global_work_size,
+                       const size_t *   local_work_size,
+                       cl_uint          num_events_in_wait_list,
+                       const cl_event * event_wait_list,
+                       cl_event *       event)
+{
+  assert (*global_work_size == 1 && *local_work_size == 1);
+  opencl_enqueue_calls++;
+  return 0;
+}
+
+cl_int
+clFinish (cl_command_queue command_queue)
+{
+  opencl_finish_calls++;
+  return 0;
+}
+
+cl_int
+starpu_opencl_collect_stats (cl_event event)
+{
+  opencl_collect_stats_calls++;
+  return 0;
+}
+
+cl_int
+clReleaseEvent (cl_event event)
+{
+  opencl_release_event_calls++;
+  return 0;
+}
+
+
 const char *
 const char *
 starpu_opencl_error_string (cl_int s)
 starpu_opencl_error_string (cl_int s)
 {
 {

+ 4 - 1
gcc-plugin/tests/opencl.c

@@ -64,6 +64,9 @@ main ()
   assert (load_opencl_calls == 1);
   assert (load_opencl_calls == 1);
   assert (load_opencl_kernel_calls == 3);
   assert (load_opencl_kernel_calls == 3);
   assert (opencl_set_kernel_arg_calls == 3 * 2);
   assert (opencl_set_kernel_arg_calls == 3 * 2);
-
+  assert (opencl_enqueue_calls == 3);
+  assert (opencl_finish_calls == 3);
+  assert (opencl_release_event_calls == 3);
+  assert (opencl_collect_stats_calls == 3);
   return EXIT_SUCCESS;
   return EXIT_SUCCESS;
 }
 }

+ 2 - 2
mpi/Makefile.am

@@ -29,8 +29,8 @@ BUILT_SOURCES =
 CLEANFILES = *.gcno *.gcda *.linkinfo
 CLEANFILES = *.gcno *.gcda *.linkinfo
 
 
 EXTRA_DIST = 					\
 EXTRA_DIST = 					\
-	examples/mpi_lu/float.h			\
-	examples/mpi_lu/double.h		\
+	examples/mpi_lu/mpi_lu-float.h		\
+	examples/mpi_lu/mpi_lu-double.h		\
 	examples/mpi_lu/plu_example.c		\
 	examples/mpi_lu/plu_example.c		\
 	examples/mpi_lu/plu_solve.c		\
 	examples/mpi_lu/plu_solve.c		\
 	examples/mpi_lu/pxlu.h			\
 	examples/mpi_lu/pxlu.h			\

mpi/examples/mpi_lu/double.h → mpi/examples/mpi_lu/mpi_lu-double.h


mpi/examples/mpi_lu/float.h → mpi/examples/mpi_lu/mpi_lu-float.h


+ 1 - 1
mpi/examples/mpi_lu/pdlu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "mpi_lu-double.h"
 #include "pxlu.c"
 #include "pxlu.c"

+ 1 - 1
mpi/examples/mpi_lu/pdlu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "mpi_lu-double.h"
 #include "pxlu_kernels.c"
 #include "pxlu_kernels.c"

+ 1 - 1
mpi/examples/mpi_lu/plu_example_double.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "mpi_lu-double.h"
 #include "plu_example.c"
 #include "plu_example.c"

+ 1 - 1
mpi/examples/mpi_lu/plu_example_float.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "mpi_lu-float.h"
 #include "plu_example.c"
 #include "plu_example.c"

+ 1 - 1
mpi/examples/mpi_lu/plu_solve_double.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "mpi_lu-double.h"
 #include "plu_solve.c"
 #include "plu_solve.c"

+ 1 - 1
mpi/examples/mpi_lu/plu_solve_float.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "mpi_lu-float.h"
 #include "plu_solve.c"
 #include "plu_solve.c"

+ 1 - 1
mpi/examples/mpi_lu/pslu.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "mpi_lu-float.h"
 #include "pxlu.c"
 #include "pxlu.c"

+ 1 - 1
mpi/examples/mpi_lu/pslu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "mpi_lu-float.h"
 #include "pxlu_kernels.c"
 #include "pxlu_kernels.c"

+ 1 - 1
mpi/examples/mpi_lu/slu_kernels.c

@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "mpi_lu-float.h"
 #include "xlu_kernels.c"
 #include "xlu_kernels.c"

+ 23 - 23
socl/examples/matmul/matmul.c

@@ -34,9 +34,9 @@
 #define TYPE float
 #define TYPE float
 
 
 // Basic Matrix dimensions
 // Basic Matrix dimensions
-#define WA (1024L * BLOCK_SIZE) // Matrix A width
+#define WA (128L * BLOCK_SIZE) // Matrix A width
 #define HA (512L * BLOCK_SIZE) // Matrix A height
 #define HA (512L * BLOCK_SIZE) // Matrix A height
-#define WB (1024L * BLOCK_SIZE) // Matrix B width
+#define WB (128L * BLOCK_SIZE) // Matrix B width
 #define HB WA  // Matrix B height
 #define HB WA  // Matrix B height
 #define WC WB  // Matrix C width 
 #define WC WB  // Matrix C width 
 #define HC HA  // Matrix C height
 #define HC HA  // Matrix C height
@@ -115,10 +115,8 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 	}
 	}
 }
 }
 
 
-#define shrLog(...) fprintf(stderr, __VA_ARGS__);
-
 // Round Up Division function
 // Round Up Division function
-size_t shrRoundUp(int group_size, int global_size) {
+size_t roundUp(int group_size, int global_size) {
 	int r = global_size % group_size;
 	int r = global_size % group_size;
 	if(r == 0) {
 	if(r == 0) {
 		return global_size;
 		return global_size;
@@ -127,18 +125,18 @@ size_t shrRoundUp(int group_size, int global_size) {
 	}
 	}
 }
 }
 
 
-void fillArray(TYPE* pfData, int iSize) {
+void fillArray(TYPE* data, int size) {
 	int i;
 	int i;
 	const TYPE fScale = (TYPE)(1.0f / (float)RAND_MAX);
 	const TYPE fScale = (TYPE)(1.0f / (float)RAND_MAX);
-	for (i = 0; i < iSize; ++i) {
-		pfData[i] = fScale * rand();
+	for (i = 0; i < size; ++i) {
+		data[i] = fScale * rand();
 	}
 	}
 }
 }
 
 
-void shrPrintArray(float* pfData, int iSize) {
+void printArray(float* data, int size) {
 	int i;
 	int i;
-	for (i = 0; i < iSize; ++i) {
-		shrLog("%d: %.3f\n", i, pfData[i]);
+	for (i = 0; i < size; ++i) {
+		printf("%d: %.3f\n", i, data[i]);
 	}
 	}
 }
 }
 
 
@@ -222,8 +220,10 @@ int main(int argc, const char** argv) {
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
 	check(clGetPlatformIDs(5, platforms, &platform_count));
 	check(clGetPlatformIDs(5, platforms, &platform_count));
-	if (platform_count == 0)
-		error("No platform found\n");
+	if (platform_count == 0) {
+		printf("No platform found\n");
+		exit(77);
+	}
 
 
 	cl_uint device_count;
 	cl_uint device_count;
 	cl_uint devs[platform_count];
 	cl_uint devs[platform_count];
@@ -350,7 +350,7 @@ int main(int argc, const char** argv) {
 		check(clSetKernelArg(multiplicationKernel[p], 4, sizeof(cl_mem), (void *) &d_B[d]));
 		check(clSetKernelArg(multiplicationKernel[p], 4, sizeof(cl_mem), (void *) &d_B[d]));
 		check(clSetKernelArg(multiplicationKernel[p], 5, sizeof(cl_mem), (void *) &d_C[i]));
 		check(clSetKernelArg(multiplicationKernel[p], 5, sizeof(cl_mem), (void *) &d_C[i]));
 
 
-		size_t globalWorkSize[] = {shrRoundUp(BLOCK_SIZE,WC), shrRoundUp(BLOCK_SIZE,workSize[i])};
+		size_t globalWorkSize[] = {roundUp(BLOCK_SIZE,WC), roundUp(BLOCK_SIZE,workSize[i])};
 
 
 		check(clEnqueueNDRangeKernel(commandQueue[p][dev], multiplicationKernel[p], 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &GPUExecution[i]));
 		check(clEnqueueNDRangeKernel(commandQueue[p][dev], multiplicationKernel[p], 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &GPUExecution[i]));
 
 
@@ -430,26 +430,26 @@ int main(int argc, const char** argv) {
 	return 0;
 	return 0;
 }
 }
 
 
-void printDiff(TYPE *data1, TYPE *data2, int width, int height, int iListLength, TYPE fListTol) {
-	shrLog("Listing first %d Differences > %.6f...\n", iListLength, fListTol);
+void printDiff(TYPE *data1, TYPE *data2, int width, int height, int listLength, TYPE listTol) {
+	printf("Listing first %d Differences > %.6f...\n", listLength, listTol);
 	int i,j,k;
 	int i,j,k;
 	int error_count=0;
 	int error_count=0;
 	for (j = 0; j < height; j++) {
 	for (j = 0; j < height; j++) {
-		if (error_count < iListLength) {
-			shrLog("\n  Row %d:\n", j);
+		if (error_count < listLength) {
+			printf("\n  Row %d:\n", j);
 		}
 		}
 		for (i = 0; i < width; i++) {
 		for (i = 0; i < width; i++) {
 			k = j * width + i;
 			k = j * width + i;
-			float fDiff = fabs(data1[k] - data2[k]);
-			if (fDiff > fListTol) {                
-				if (error_count < iListLength) {
-					shrLog("    Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], fDiff);
+			float diff = fabs(data1[k] - data2[k]);
+			if (diff > listTol) {                
+				if (error_count < listLength) {
+					printf("    Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], diff);
 				}
 				}
 				error_count++;
 				error_count++;
 			}
 			}
 		}
 		}
 	}
 	}
-	shrLog(" \n  Total Errors = %d\n\n", error_count);
+	printf(" \n  Total Errors = %d\n\n", error_count);
 }
 }
 
 
 /**
 /**

+ 19 - 2
src/common/fxt.c

@@ -29,6 +29,10 @@
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
 
 
+#ifdef __FreeBSD__
+#include <sys/thr.h> /* for thr_self() */
+#endif
+
 #define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
 #define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
 
 
 static char _STARPU_PROF_FILE_USER[128];
 static char _STARPU_PROF_FILE_USER[128];
@@ -38,6 +42,19 @@ static int _starpu_written = 0;
 
 
 static int _starpu_id;
 static int _starpu_id;
 
 
+long _starpu_gettid()
+{
+#if defined(__linux__)
+	return syscall(SYS_gettid);
+#elif defined(__FreeBSD__)
+	long tid;
+	thr_self(&tid);
+	return tid;
+#else
+	return (long) pthread_self();
+#endif
+}
+
 static void _starpu_profile_set_tracefile(void *last, ...)
 static void _starpu_profile_set_tracefile(void *last, ...)
 {
 {
 	va_list vl;
 	va_list vl;
@@ -78,7 +95,7 @@ void _starpu_start_fxt_profiling(void)
 		_starpu_profile_set_tracefile(NULL);
 		_starpu_profile_set_tracefile(NULL);
 	}
 	}
 
 
-	threadid = syscall(SYS_gettid);
+	threadid = _starpu_gettid();
 
 
 	atexit(_starpu_stop_fxt_profiling);
 	atexit(_starpu_stop_fxt_profiling);
 
 
@@ -140,7 +157,7 @@ void _starpu_stop_fxt_profiling(void)
 
 
 void _starpu_fxt_register_thread(unsigned cpuid)
 void _starpu_fxt_register_thread(unsigned cpuid)
 {
 {
-	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, syscall(SYS_gettid));
+	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid());
 }
 }
 
 
 #endif // STARPU_USE_FXT
 #endif // STARPU_USE_FXT

+ 38 - 36
src/common/fxt.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -107,6 +107,8 @@
 #include <fxt/fxt.h>
 #include <fxt/fxt.h>
 #include <fxt/fut.h>
 #include <fxt/fut.h>
 
 
+long _starpu_gettid(void);
+
 /* Initialize the FxT library. */
 /* Initialize the FxT library. */
 void _starpu_start_fxt_profiling(void);
 void _starpu_start_fxt_profiling(void);
 
 
@@ -178,13 +180,13 @@ do {									\
 
 
 /* workerkind = _STARPU_FUT_CPU_KEY for instance */
 /* workerkind = _STARPU_FUT_CPU_KEY for instance */
 #define _STARPU_TRACE_NEW_MEM_NODE(nodeid)			\
 #define _STARPU_TRACE_NEW_MEM_NODE(nodeid)			\
-	FUT_DO_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid());
 
 
 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, devid, memnode)	\
 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, devid, memnode)	\
-	FUT_DO_PROBE4(_STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE4(_STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, _starpu_gettid());
 
 
 #define _STARPU_TRACE_WORKER_INIT_END				\
 #define _STARPU_TRACE_WORKER_INIT_END				\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_INIT_END, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid());
 
 
 #define _STARPU_TRACE_START_CODELET_BODY(job)				\
 #define _STARPU_TRACE_START_CODELET_BODY(job)				\
 do {									\
 do {									\
@@ -192,10 +194,10 @@ do {									\
 	if (model_name)                                                 \
 	if (model_name)                                                 \
 	{								\
 	{								\
 		/* we include the symbol name */			\
 		/* we include the symbol name */			\
-		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 1, model_name); \
+		_STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_START_CODELET_BODY, (job), _starpu_gettid(), 1, model_name); \
 	}								\
 	}								\
 	else {                                                          \
 	else {                                                          \
-		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 0); \
+		FUT_DO_PROBE3(_STARPU_FUT_START_CODELET_BODY, (job), _starpu_gettid(), 0); \
 	}								\
 	}								\
 } while(0);
 } while(0);
 
 
@@ -203,35 +205,35 @@ do {									\
 do {									\
 do {									\
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
-	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
+	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), _starpu_gettid());	\
 } while(0);
 } while(0);
 
 
 #define _STARPU_TRACE_START_CALLBACK(job)	\
 #define _STARPU_TRACE_START_CALLBACK(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_END_CALLBACK(job)	\
 #define _STARPU_TRACE_END_CALLBACK(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_JOB_PUSH(task, prio)	\
 #define _STARPU_TRACE_JOB_PUSH(task, prio)	\
-	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, task, prio, syscall(SYS_gettid));
+	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, task, prio, _starpu_gettid());
 
 
 #define _STARPU_TRACE_JOB_POP(task, prio)	\
 #define _STARPU_TRACE_JOB_POP(task, prio)	\
-	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, task, prio, syscall(SYS_gettid));
+	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, task, prio, _starpu_gettid());
 
 
 #define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
 #define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
-	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, syscall(SYS_gettid))
+	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid())
 
 
 #define _STARPU_TRACE_START_FETCH_INPUT(job)	\
 #define _STARPU_TRACE_START_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_END_FETCH_INPUT(job)	\
 #define _STARPU_TRACE_END_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_START_PUSH_OUTPUT(job)	\
 #define _STARPU_TRACE_START_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_END_PUSH_OUTPUT(job)	\
 #define _STARPU_TRACE_END_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT, job, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT, job, _starpu_gettid());
 
 
 #define _STARPU_TRACE_TAG(tag, job)	\
 #define _STARPU_TRACE_TAG(tag, job)	\
 	FUT_DO_PROBE2(_STARPU_FUT_TAG, tag, (job)->job_id)
 	FUT_DO_PROBE2(_STARPU_FUT_TAG, tag, (job)->job_id)
@@ -251,10 +253,10 @@ do {										\
         const char *model_name = _starpu_get_model_name((job));                       \
         const char *model_name = _starpu_get_model_name((job));                       \
 	if (model_name)					                        \
 	if (model_name)					                        \
 	{									\
 	{									\
-		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 1, model_name);\
+		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 1, model_name);\
 	}									\
 	}									\
 	else {									\
 	else {									\
-		FUT_DO_PROBE4(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 0);\
+		FUT_DO_PROBE4(_STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 0);\
 	}									\
 	}									\
 } while(0);
 } while(0);
 
 
@@ -264,10 +266,10 @@ do {										\
         const char *model_name = _starpu_get_model_name((job));                       \
         const char *model_name = _starpu_get_model_name((job));                       \
 	if (model_name)                                                         \
 	if (model_name)                                                         \
 	{									\
 	{									\
-          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 1, model_name); \
+          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \
 	}									\
 	}									\
 	else {									\
 	else {									\
-		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 0);\
+		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
 	}									\
 	}									\
 } while(0);
 } while(0);
 
 
@@ -290,56 +292,56 @@ do {										\
 	FUT_DO_PROBE2(_STARPU_FUT_WORK_STEALING, empty_q, victim_q)
 	FUT_DO_PROBE2(_STARPU_FUT_WORK_STEALING, empty_q, victim_q)
 
 
 #define _STARPU_TRACE_WORKER_DEINIT_START			\
 #define _STARPU_TRACE_WORKER_DEINIT_START			\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid());
 
 
 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
-	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid());
 
 
 #define _STARPU_TRACE_WORKER_SLEEP_START	\
 #define _STARPU_TRACE_WORKER_SLEEP_START	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid());
 
 
 #define _STARPU_TRACE_WORKER_SLEEP_END	\
 #define _STARPU_TRACE_WORKER_SLEEP_END	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
 
 
 #define _STARPU_TRACE_USER_DEFINED_START	\
 #define _STARPU_TRACE_USER_DEFINED_START	\
-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, _starpu_gettid());
 
 
 #define _STARPU_TRACE_USER_DEFINED_END		\
 #define _STARPU_TRACE_USER_DEFINED_END		\
-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, syscall(SYS_gettid));
+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, _starpu_gettid());
 
 
 #define _STARPU_TRACE_START_ALLOC(memnode)		\
 #define _STARPU_TRACE_START_ALLOC(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC, memnode, _starpu_gettid());
 	
 	
 #define _STARPU_TRACE_END_ALLOC(memnode)		\
 #define _STARPU_TRACE_END_ALLOC(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC, memnode, _starpu_gettid());
 
 
 #define _STARPU_TRACE_START_ALLOC_REUSE(memnode)		\
 #define _STARPU_TRACE_START_ALLOC_REUSE(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC_REUSE, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid());
 	
 	
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
 	
 	
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode)		\
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, _starpu_gettid());
 	
 	
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode)		\
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, _starpu_gettid());
 	
 	
 /* We skip these events becasue they are called so often that they cause FxT to
 /* We skip these events becasue they are called so often that they cause FxT to
  * fail and make the overall trace unreadable anyway. */
  * fail and make the overall trace unreadable anyway. */
 #define _STARPU_TRACE_START_PROGRESS(memnode)		\
 #define _STARPU_TRACE_START_PROGRESS(memnode)		\
 	do {} while (0);
 	do {} while (0);
-//	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS, memnode, syscall(SYS_gettid));
+//	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS, memnode, _starpu_gettid());
 
 
 #define _STARPU_TRACE_END_PROGRESS(memnode)		\
 #define _STARPU_TRACE_END_PROGRESS(memnode)		\
 	do {} while (0);
 	do {} while (0);
-	//FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS, memnode, syscall(SYS_gettid));
+	//FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS, memnode, _starpu_gettid());
 	
 	
 #define _STARPU_TRACE_USER_EVENT(code)			\
 #define _STARPU_TRACE_USER_EVENT(code)			\
-	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, _starpu_gettid());
 
 
 #define _STARPU_TRACE_SET_PROFILING(status)		\
 #define _STARPU_TRACE_SET_PROFILING(status)		\
-	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, syscall(SYS_gettid));
+	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, _starpu_gettid());
 
 
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)

+ 2 - 2
src/core/task.c

@@ -417,10 +417,10 @@ int starpu_task_submit(struct starpu_task *task)
 
 
 		_starpu_detect_implicit_data_deps(task);
 		_starpu_detect_implicit_data_deps(task);
 
 
-		if (task->cl->model)
+		if (task->cl->model && task->cl->model->symbol)
 			_starpu_load_perfmodel(task->cl->model);
 			_starpu_load_perfmodel(task->cl->model);
 
 
-		if (task->cl->power_model)
+		if (task->cl->power_model && task->cl->power_model->symbol)
 			_starpu_load_perfmodel(task->cl->power_model);
 			_starpu_load_perfmodel(task->cl->power_model);
 	}
 	}
 
 

+ 43 - 4
src/core/workers.c

@@ -229,6 +229,9 @@ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
 
 
 		switch (d->type)
 		switch (d->type)
 		{
 		{
+		case STARPU_CPU_WORKER:
+			if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
+				return 0;
 		case STARPU_CUDA_WORKER:
 		case STARPU_CUDA_WORKER:
 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
 				return 0;
 				return 0;
@@ -257,7 +260,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 	unsigned nworkers = config->topology.nworkers;
 	unsigned nworkers = config->topology.nworkers;
 
 
 	/* Launch workers asynchronously (except for SPUs) */
 	/* Launch workers asynchronously (except for SPUs) */
-	unsigned cuda = 0;
+	unsigned cpu = 0, cuda = 0;
 	unsigned worker;
 	unsigned worker;
 	for (worker = 0; worker < nworkers; worker++)
 	for (worker = 0; worker < nworkers; worker++)
 	{
 	{
@@ -273,6 +276,8 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 		workerarg->worker_size = 1;
 		workerarg->worker_size = 1;
 		workerarg->combined_workerid = workerarg->workerid;
 		workerarg->combined_workerid = workerarg->workerid;
 		workerarg->current_rank = 0;
 		workerarg->current_rank = 0;
+		workerarg->run_by_starpu = 1;
+
 		workerarg->has_prev_init = 0;
 		workerarg->has_prev_init = 0;
 		/* mutex + cond only for the local list */
 		/* mutex + cond only for the local list */
 		/* we have a single local list */
 		/* we have a single local list */
@@ -304,8 +309,17 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 			case STARPU_CPU_WORKER:
 			case STARPU_CPU_WORKER:
 				workerarg->set = NULL;
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
 				workerarg->worker_is_initialized = 0;
-				pthread_create(&workerarg->worker_thread,
-						NULL, _starpu_cpu_worker, workerarg);
+				driver.id.cpu_id = cpu;
+				if (_starpu_may_launch_driver(config->conf, &driver))
+				{
+					pthread_create(&workerarg->worker_thread,
+							NULL, _starpu_cpu_worker, workerarg);
+				}
+				else
+				{
+					workerarg->run_by_starpu = 0;
+				}
+				cpu++;
 				break;
 				break;
 #endif
 #endif
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -318,6 +332,10 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 					pthread_create(&workerarg->worker_thread,
 					pthread_create(&workerarg->worker_thread,
 						       NULL, _starpu_cuda_worker, workerarg);
 						       NULL, _starpu_cuda_worker, workerarg);
 				}
 				}
+				else
+				{
+					workerarg->run_by_starpu = 0;
+				}
 				cuda++;
 				cuda++;
 				break;
 				break;
 #endif
 #endif
@@ -325,7 +343,10 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 			case STARPU_OPENCL_WORKER:
 			case STARPU_OPENCL_WORKER:
 				starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
 				starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
 				if (!_starpu_may_launch_driver(config->conf, &driver))
 				if (!_starpu_may_launch_driver(config->conf, &driver))
+				{
+					workerarg->run_by_starpu = 0;
 					break;
 					break;
+				}
 				workerarg->set = NULL;
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
 				workerarg->worker_is_initialized = 0;
 				pthread_create(&workerarg->worker_thread,
 				pthread_create(&workerarg->worker_thread,
@@ -367,6 +388,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 		}
 		}
 	}
 	}
 
 
+	cpu  = 0;
 	cuda = 0;
 	cuda = 0;
 	for (worker = 0; worker < nworkers; worker++)
 	for (worker = 0; worker < nworkers; worker++)
 	{
 	{
@@ -377,10 +399,17 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 		switch (workerarg->arch)
 		switch (workerarg->arch)
 		{
 		{
 			case STARPU_CPU_WORKER:
 			case STARPU_CPU_WORKER:
+				driver.id.cpu_id = cpu;
+				if (!_starpu_may_launch_driver(config->conf, &driver))
+				{
+					cpu++;
+					break;
+				}
 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 				while (!workerarg->worker_is_initialized)
 				while (!workerarg->worker_is_initialized)
 					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
 					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
 				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
 				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
+				cpu++;
 				break;
 				break;
 			case STARPU_CUDA_WORKER:
 			case STARPU_CUDA_WORKER:
 				driver.id.cuda_id = cuda;
 				driver.id.cuda_id = cuda;
@@ -667,6 +696,9 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 		}
 		}
 		else
 		else
 		{
 		{
+			if (!worker->run_by_starpu)
+				goto out;
+
 			if (!pthread_equal(pthread_self(), worker->worker_thread))
 			if (!pthread_equal(pthread_self(), worker->worker_thread))
 			{
 			{
 				status = pthread_join(worker->worker_thread, NULL);
 				status = pthread_join(worker->worker_thread, NULL);
@@ -679,6 +711,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 			}
 			}
 		}
 		}
 
 
+out:
 		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
 		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
 		_starpu_job_list_delete(worker->terminated_jobs);
 		_starpu_job_list_delete(worker->terminated_jobs);
 	}
 	}
@@ -1082,6 +1115,9 @@ struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
 	return &config.sched_ctxs[0];
 	return &config.sched_ctxs[0];
 }
 }
 
 
+#ifdef STARPU_USE_CPU
+extern int _starpu_run_cpu(struct starpu_driver *);
+#endif
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 extern int _starpu_run_cuda(struct starpu_driver *);
 extern int _starpu_run_cuda(struct starpu_driver *);
 #endif
 #endif
@@ -1097,6 +1133,10 @@ starpu_driver_run(struct starpu_driver *d)
 
 
 	switch (d->type)
 	switch (d->type)
 	{
 	{
+#ifdef STARPU_USE_CPU
+	case STARPU_CPU_WORKER:
+		return _starpu_run_cpu(d);
+#endif
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	case STARPU_CUDA_WORKER:
 	case STARPU_CUDA_WORKER:
 		return _starpu_run_cuda(d);
 		return _starpu_run_cuda(d);
@@ -1105,7 +1145,6 @@ starpu_driver_run(struct starpu_driver *d)
 	case STARPU_OPENCL_WORKER:
 	case STARPU_OPENCL_WORKER:
 		return _starpu_run_opencl(d);
 		return _starpu_run_opencl(d);
 #endif
 #endif
-	case STARPU_CPU_WORKER:    /* Not supported yet */
 	case STARPU_GORDON_WORKER: /* Not supported yet */
 	case STARPU_GORDON_WORKER: /* Not supported yet */
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;

+ 1 - 0
src/core/workers.h

@@ -81,6 +81,7 @@ struct _starpu_worker
 	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
 	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
 	char name[48];
 	char name[48];
 	char short_name[10];
 	char short_name[10];
+	unsigned run_by_starpu; /* Is this run by StarPU or directly by the application ? */
 
 
 	struct _starpu_sched_ctx **sched_ctx;
 	struct _starpu_sched_ctx **sched_ctx;
 	unsigned nsched_ctxs; /* the no of contexts a worker belongs to*/
 	unsigned nsched_ctxs; /* the no of contexts a worker belongs to*/

+ 14 - 14
src/drivers/cpu/driver_cpu.c

@@ -101,7 +101,6 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 static struct _starpu_worker*
 static struct _starpu_worker*
 _starpu_get_worker_from_driver(struct starpu_driver *d)
 _starpu_get_worker_from_driver(struct starpu_driver *d)
 {
 {
-#if 1
 	int workers[d->id.cpu_id + 1];
 	int workers[d->id.cpu_id + 1];
 	int nworkers;
 	int nworkers;
 	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, d->id.cpu_id+1);
 	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, d->id.cpu_id+1);
@@ -109,19 +108,6 @@ _starpu_get_worker_from_driver(struct starpu_driver *d)
 		return NULL; // No device was found.
 		return NULL; // No device was found.
 	
 	
 	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
 	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
-#else
-	int workers[STARPU_NMAXWORKERS];
-	int nworkers;
-	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, STARPU_NMAXWORKERS);
-	STARPU_ASSERT(nworkers > 0);
-	int i;
-	for (i = 0; i < nworkers; i++)
-	{
-		fprintf(stderr, "\tCPU %d\n", i);
-	}
-
-	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
-#endif
 }
 }
 
 
 int _starpu_cpu_driver_init(struct starpu_driver *d)
 int _starpu_cpu_driver_init(struct starpu_driver *d)
@@ -345,3 +331,17 @@ _starpu_cpu_worker(void *arg)
 
 
 	return NULL;
 	return NULL;
 }
 }
+
+int _starpu_run_cpu(struct starpu_driver *d)
+{
+	STARPU_ASSERT(d && d->type == STARPU_CPU_WORKER);
+
+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
+	STARPU_ASSERT(worker);
+
+	worker->set = NULL;
+	worker->worker_is_initialized = 0;
+	_starpu_cpu_worker(worker);
+
+	return 0;
+}

+ 2 - 2
starpufft/Makefile.am

@@ -19,8 +19,8 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_C
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
 
 
 EXTRA_DIST =			\
 EXTRA_DIST =			\
-	float.h			\
-	double.h		\
+	starpufft-float.h	\
+	starpufft-double.h	\
 	cudax_kernels.h		\
 	cudax_kernels.h		\
 	starpufftx.c		\
 	starpufftx.c		\
 	starpufftx1d.c		\
 	starpufftx1d.c		\

+ 2 - 2
starpufft/cuda_kernels.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "starpufft-double.h"
 #include "cudax_kernels.cu"
 #include "cudax_kernels.cu"

+ 2 - 2
starpufft/cudaf_kernels.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "starpufft-float.h"
 #include "cudax_kernels.cu"
 #include "cudax_kernels.cu"

+ 2 - 2
starpufft/examples/test.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "starpufft-double.h"
 #include "testx.c"
 #include "testx.c"

+ 2 - 2
starpufft/examples/test_threads.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "starpufft-double.h"
 #include "testx_threads.c"
 #include "testx_threads.c"

+ 2 - 2
starpufft/examples/testf.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "starpufft-float.h"
 #include "testx.c"
 #include "testx.c"

+ 2 - 2
starpufft/examples/testf_threads.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "starpufft-float.h"
 #include "testx_threads.c"
 #include "testx_threads.c"

starpufft/double.h → starpufft/starpufft-double.h


starpufft/float.h → starpufft/starpufft-float.h


+ 2 - 2
starpufft/starpufft.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "double.h"
+#include "starpufft-double.h"
 #include "starpufftx.c"
 #include "starpufftx.c"

+ 2 - 2
starpufft/starpufftf.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009  Université de Bordeaux 1
+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,5 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include "float.h"
+#include "starpufft-float.h"
 #include "starpufftx.c"
 #include "starpufftx.c"

+ 1 - 0
tests/Makefile.am

@@ -115,6 +115,7 @@ noinst_PROGRAMS =				\
 	starpu_machine_display			\
 	starpu_machine_display			\
 	main/deprecated_func			\
 	main/deprecated_func			\
 	main/deprecated_buffer			\
 	main/deprecated_buffer			\
+	main/driver_api/init_run_deinit         \
 	main/restart				\
 	main/restart				\
 	main/execute_on_a_specific_worker	\
 	main/execute_on_a_specific_worker	\
 	main/insert_task			\
 	main/insert_task			\

+ 239 - 0
tests/main/driver_api/init_run_deinit.c

@@ -0,0 +1,239 @@
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+#include "../../helper.h"
+
+#define NTASKS 8
+
+#if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
+static void
+dummy(void *buffers[], void *args)
+{
+	(void) buffers;
+	(*(int *)args)++;
+}
+
+static struct starpu_codelet cl =
+{
+	.cpu_funcs    = { dummy, NULL },
+	.cuda_funcs   = { dummy, NULL },
+	.opencl_funcs = { dummy, NULL },
+	.nbuffers     = 0
+};
+
+static void
+init_driver(struct starpu_driver *d)
+{
+	int ret;
+	ret = starpu_driver_init(d);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init");
+}
+
+static void
+run(struct starpu_task *task, struct starpu_driver *d)
+{
+	int ret;
+	ret = starpu_task_submit(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_driver_run_once(d);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run_once");
+}
+
+static void
+deinit_driver(struct starpu_driver *d)
+{
+	int ret; 
+	ret = starpu_driver_deinit(d);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit");
+}
+#endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL */
+
+#ifdef STARPU_USE_CPU
+static int
+test_cpu(void)
+{
+	int var = 0, ret;
+	struct starpu_conf conf;
+
+	ret = starpu_conf_init(&conf);
+	if (ret == -EINVAL)
+		return 1;
+	
+
+	struct starpu_driver d =
+	{
+		.type = STARPU_CPU_WORKER,
+		.id.cpu_id = 0
+	};
+
+	conf.not_launched_drivers = &d;
+	conf.n_not_launched_drivers = 1;
+
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
+
+	init_driver(&d);
+	int i;	
+	for (i = 0; i < NTASKS; i++)
+	{
+		struct starpu_task *task;
+		task = starpu_task_create();
+		cl.where = STARPU_CPU;
+		task->cl = &cl;
+		task->cl_arg = &var;
+
+		run(task, &d);
+	}
+	deinit_driver(&d);
+
+	starpu_task_wait_for_all();
+	starpu_shutdown();
+
+	fprintf(stderr, "[CPU] Var is %d\n", var);
+	return !!(var != NTASKS);
+}
+#endif /* STARPU_USE_CPU */
+
+#ifdef STARPU_USE_CUDA
+static int
+test_cuda(void)
+{
+	int var = 0, ret;
+	struct starpu_conf conf;
+
+	ret = starpu_conf_init(&conf);
+	if (ret == -EINVAL)
+		return 1;
+	
+
+	struct starpu_driver d =
+	{
+		.type = STARPU_CUDA_WORKER,
+		.id.cuda_id = 0
+	};
+
+	conf.ncuda = 1;
+	conf.nopencl = 0;
+	conf.not_launched_drivers = &d;
+	conf.n_not_launched_drivers = 1;
+
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
+
+
+	init_driver(&d);
+	int i;	
+	for (i = 0; i < NTASKS; i++)
+	{
+		struct starpu_task *task;
+		task = starpu_task_create();
+		cl.where = STARPU_CUDA;
+		task->cl = &cl;
+		task->cl_arg = &var;
+
+		run(task, &d);
+	}
+	deinit_driver(&d);
+
+	starpu_task_wait_for_all();
+	starpu_shutdown();
+
+	fprintf(stderr, "[CUDA] Var is %d\n", var);
+	return !!(var != NTASKS);
+}
+#endif /* STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static int
+test_opencl(void)
+{
+        cl_int err;
+        cl_platform_id platform;
+        cl_uint dummy;
+
+        err = clGetPlatformIDs(1, &platform, &dummy);
+        if (err != CL_SUCCESS)
+        {   
+                fprintf(stderr, "%s:%d\n", __func__, __LINE__);
+		return 1;
+	}
+
+	cl_device_id device_id;
+        err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
+        if (err != CL_SUCCESS)
+        {   
+                fprintf(stderr, "%s:%d\n", __func__, __LINE__);
+		return 1;
+	}
+
+	int var = 0, ret;
+	struct starpu_conf conf;
+
+	ret = starpu_conf_init(&conf);
+	if (ret == -EINVAL)
+		return 1;
+	
+	struct starpu_driver d =
+	{
+		.type = STARPU_OPENCL_WORKER,
+		.id.opencl_id = device_id
+	};
+
+	conf.ncuda = 0;
+	conf.nopencl = 1;
+	conf.not_launched_drivers = &d;
+	conf.n_not_launched_drivers = 1;
+
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
+
+
+	init_driver(&d);
+	int i;	
+	for (i = 0; i < NTASKS; i++)
+	{
+		struct starpu_task *task;
+		task = starpu_task_create();
+		cl.where = STARPU_OPENCL;
+		task->cl = &cl;
+		task->cl_arg = &var;
+
+		run(task, &d);
+	}
+	deinit_driver(&d);
+
+
+	starpu_task_wait_for_all();
+	starpu_shutdown();
+
+	FPRINTF(stderr, "[OpenCL] Var is %d\n", var);
+	return !!(var != NTASKS);
+}
+#endif /* STARPU_USE_OPENCL */
+
+int
+main(void)
+{
+	int ret = STARPU_TEST_SKIPPED;
+
+#ifdef STARPU_USE_CPU
+	ret = test_cpu();
+	if (ret == 1)
+		return ret;
+#endif
+#ifdef STARPU_USE_CUDA
+	ret = test_cuda();
+	if (ret == 1)
+		return ret;
+#endif
+#ifdef STARPU_USE_OPENCL
+	ret = test_opencl();
+	if (ret == 1)
+		return ret;
+#endif
+
+	return ret;
+}

+ 4 - 2
tests/sched_policies/simple_cpu_gpu_sched.c

@@ -88,11 +88,13 @@ gpu_task_gpu(struct starpu_task *task,
 
 
 static struct starpu_perfmodel model_cpu_task = 
 static struct starpu_perfmodel model_cpu_task = 
 {
 {
-	.type = STARPU_PER_ARCH
+	.type = STARPU_PER_ARCH,
+	.symbol = "model_cpu_task"
 };
 };
 static struct starpu_perfmodel model_gpu_task = 
 static struct starpu_perfmodel model_gpu_task = 
 {
 {
-	.type = STARPU_PER_ARCH
+	.type = STARPU_PER_ARCH,
+	.symbol = "model_gpu_task"
 };
 };
 
 
 static void
 static void