13 年之前 · 0df511aa30
--- a/STARPU-VERSION
+++ b/STARPU-VERSION
@@ -2,6 +2,21 @@
 
				 
			
 
				 # Versioning (SONAMEs) for StarPU libraries.
			
 
				 
			
 
				+# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info
			
 
				+# Here are a set of rules to help you update your library version information:
			
 
				+# Start with version information of ‘0:0:0’ for each libtool library.
			
 
				+# Update the version information only immediately before a public
			
 
				+# release of your software. More frequent updates are unnecessary, and
			
 
				+# only guarantee that the current interface number gets larger faster.
			
 
				+# - If the library source code has changed at all since the last
			
 
				+#   update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’).
			
 
				+# - If any interfaces have been added, removed, or changed since the
			
 
				+#   last update, increment current, and set revision to 0.
			
 
				+# - If any interfaces have been added since the last public release,
			
 
				+#   then increment age.
			
 
				+# - If any interfaces have been removed or changed since the last
			
 
				+#   public release, then set age to 0. change
			
 
				+
			
 
				 # Libtool interface versioning (info "(libtool) Versioning").
			
 
				 LIBSTARPU_INTERFACE_CURRENT=1	# increment upon ABI change
			
 
				 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
			
--- a/configure.ac
+++ b/configure.ac
@@ -1559,6 +1559,10 @@ AS_IF([test "$have_valid_hwloc" = "yes"], [
 
				 	])
			
 
				 LDFLAGS="${SAVED_LDFLAGS}"
			
 
				 CPPFLAGS="${SAVED_CPPFLAGS}"
			
 
				+
			
 
				+if test "$have_valid_hwloc" = "no" -a "$hwloc_dir" != "no" ; then
			
 
				+   AC_MSG_ERROR([hwloc was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install hwloc. However, if you really want to use StarPU without enabling hwloc, please restart configure by specifying the option '--without-hwloc'.])
			
 
				+fi
			
 
				 AC_MSG_CHECKING(whether hwloc should be used)
			
 
				 AC_MSG_RESULT($have_valid_hwloc)
			
 
				 AC_SUBST(HWLOC_REQUIRES)
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -1343,11 +1343,13 @@ option when configuring StarPU.
 
				 
			
 
				 @item @code{struct starpu_perfmodel *model} (optional)
			
 
				 This is a pointer to the task duration performance model associated to this
			
 
				-codelet. This optional field is ignored when set to @code{NULL}.
			
 
				+codelet. This optional field is ignored when set to @code{NULL} or
			
 
				+when its @code{symbol} field is not set.
			
 
				 
			
 
				 @item @code{struct starpu_perfmodel *power_model} (optional)
			
 
				 This is a pointer to the task power consumption performance model associated
			
 
				-to this codelet. This optional field is ignored when set to @code{NULL}.
			
 
				+to this codelet. This optional field is ignored when set to
			
 
				+@code{NULL} or when its @code{symbol} field is not set.
			
 
				 In the case of parallel codelets, this has to account for all processing units
			
 
				 involved in the parallel execution.
			
 
				 
			
@@ -1825,7 +1827,8 @@ archs will be determined by multiplying by an arch-specific factor.
 
				 
			
 
				 @item @code{const char *symbol}
			
 
				 is the symbol name for the performance model, which will be used as
			
 
				-file name to store the model.
			
 
				+file name to store the model. It must be set otherwise the model will
			
 
				+be ignored.
			
 
				 
			
 
				 @item @code{double (*cost_model)(struct starpu_buffer_descr *)}
			
 
				 This field is deprecated. Use instead the @code{cost_function} field.
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -115,8 +115,8 @@ noinst_HEADERS = 				\
 
				 	heat/dw_factolu.h			\
			
 
				 	lu/xlu.h				\
			
 
				 	lu/xlu_kernels.h			\
			
 
				-	lu/float.h				\
			
 
				-	lu/double.h				\
			
 
				+	lu/lu-float.h				\
			
 
				+	lu/lu-double.h				\
			
 
				 	lu/complex_float.h			\
			
 
				 	lu/complex_double.h			\
			
 
				 	lu/blas_complex.h			\
			
--- a/examples/lu/clu.c
+++ b/examples/lu/clu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/clu_implicit.c
+++ b/examples/lu/clu_implicit.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/clu_implicit_pivot.c
+++ b/examples/lu/clu_implicit_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/clu_kernels.c
+++ b/examples/lu/clu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/clu_pivot.c
+++ b/examples/lu/clu_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/dlu.c
+++ b/examples/lu/dlu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/dlu_implicit.c
+++ b/examples/lu/dlu_implicit.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/dlu_implicit_pivot.c
+++ b/examples/lu/dlu_implicit_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/dlu_kernels.c
+++ b/examples/lu/dlu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/dlu_pivot.c
+++ b/examples/lu/dlu_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/lu-double.h
+++ b/examples/lu/lu-double.h
--- a/examples/lu/lu-float.h
+++ b/examples/lu/lu-float.h
--- a/examples/lu/lu_example_complex_double.c
+++ b/examples/lu/lu_example_complex_double.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_complex_float.c
+++ b/examples/lu/lu_example_complex_float.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_float.h"
			
 
				+#include "complex_-float.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_double.c
+++ b/examples/lu/lu_example_double.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "lu-double.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_float.c
+++ b/examples/lu/lu_example_float.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/slu.c
+++ b/examples/lu/slu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/slu_implicit.c
+++ b/examples/lu/slu_implicit.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/slu_implicit_pivot.c
+++ b/examples/lu/slu_implicit_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/slu_kernels.c
+++ b/examples/lu/slu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/slu_pivot.c
+++ b/examples/lu/slu_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "lu-float.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/zlu.c
+++ b/examples/lu/zlu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/zlu_implicit.c
+++ b/examples/lu/zlu_implicit.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/zlu_implicit_pivot.c
+++ b/examples/lu/zlu_implicit_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/zlu_kernels.c
+++ b/examples/lu/zlu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/zlu_pivot.c
+++ b/examples/lu/zlu_pivot.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "complex_double.h"
			
 
				+#include "complex_-double.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/gcc-plugin/src/starpu.c
+++ b/gcc-plugin/src/starpu.c
@@ -1187,6 +1187,18 @@ opencl_event_type (void)
 
				   return t;
			
 
				 }
			
 
				 
			
 
				+/* Return an expression that, given the OpenCL error code in ERROR_VAR,
			
 
				+   returns a string.  */
			
 
				+
			
 
				+static tree
			
 
				+build_opencl_error_string (tree error_var)
			
 
				+{
			
 
				+  static tree clstrerror_fn;
			
 
				+  LOOKUP_STARPU_FUNCTION (clstrerror_fn, "starpu_opencl_error_string");
			
 
				+
			
 
				+  return build_call_expr (clstrerror_fn, 1, error_var);
			
 
				+}
			
 
				+
			
 
				 /* Return an error-checking `clSetKernelArg' call for argument ARG, at
			
 
				    index IDX, of KERNEL.  */
			
 
				 
			
@@ -1198,14 +1210,8 @@ build_opencl_set_kernel_arg_call (location_t loc, tree fn,
 
				   gcc_assert (TREE_CODE (fn) == FUNCTION_DECL
			
 
				 	      && TREE_TYPE (kernel) == opencl_kernel_type ());
			
 
				 
			
 
				-  static tree setkernarg_fn, clstrerror_fn;
			
 
				+  static tree setkernarg_fn;
			
 
				   LOOKUP_STARPU_FUNCTION (setkernarg_fn, "clSetKernelArg");
			
 
				-  LOOKUP_STARPU_FUNCTION (clstrerror_fn, "starpu_opencl_error_string");
			
 
				-
			
 
				-  local_define (tree, build_errorstr, (tree error_var))
			
 
				-  {
			
 
				-    return build_call_expr (clstrerror_fn, 1, error_var);
			
 
				-  };
			
 
				 
			
 
				   tree call = build_call_expr (setkernarg_fn, 4, kernel,
			
 
				 			       build_int_cst (integer_type_node, idx),
			
@@ -1225,7 +1231,8 @@ build_opencl_set_kernel_arg_call (location_t loc, tree fn,
 
				   cond = build3 (COND_EXPR, void_type_node,
			
 
				 		 build2 (NE_EXPR, boolean_type_node,
			
 
				 			 error_var, integer_zero_node),
			
 
				-		 build_error_statements (loc, error_var, build_errorstr,
			
 
				+		 build_error_statements (loc, error_var,
			
 
				+					 build_opencl_error_string,
			
 
				 					 "failed to set OpenCL kernel "
			
 
				 					 "argument %d", idx),
			
 
				 		 NULL_TREE);
			
@@ -1286,7 +1293,8 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
				     /* No further warnings for this node.  */
			
 
				     TREE_NO_WARNING (task_impl) = true;
			
 
				 
			
 
				-  static tree load_fn, load_kern_fn, wid_fn, devid_fn;
			
 
				+  static tree load_fn, load_kern_fn, enqueue_kern_fn, wid_fn, devid_fn, clfinish_fn,
			
 
				+    collect_stats_fn, release_ev_fn;
			
 
				 
			
 
				   if (load_fn == NULL_TREE)
			
 
				     {
			
@@ -1303,6 +1311,10 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
				   LOOKUP_STARPU_FUNCTION (load_kern_fn, "starpu_opencl_load_kernel");
			
 
				   LOOKUP_STARPU_FUNCTION (wid_fn, "starpu_worker_get_id");
			
 
				   LOOKUP_STARPU_FUNCTION (devid_fn, "starpu_worker_get_devid");
			
 
				+  LOOKUP_STARPU_FUNCTION (enqueue_kern_fn, "clEnqueueNDRangeKernel");
			
 
				+  LOOKUP_STARPU_FUNCTION (clfinish_fn, "clFinish");
			
 
				+  LOOKUP_STARPU_FUNCTION (collect_stats_fn, "starpu_opencl_collect_stats");
			
 
				+  LOOKUP_STARPU_FUNCTION (release_ev_fn, "clReleaseEvent");
			
 
				 
			
 
				   if (verbose_output_p)
			
 
				     inform (loc, "defining %qE, with OpenCL kernel %qs from file %qs",
			
@@ -1363,13 +1375,15 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
				 			       load_stmts);
			
 
				 
			
 
				       /* Local variables.  */
			
 
				-      tree kernel_var, queue_var, event_var, group_size_var, ngroups_var;
			
 
				+      tree kernel_var, queue_var, event_var, group_size_var, ngroups_var,
			
 
				+	error_var;
			
 
				 
			
 
				       kernel_var = local_var (opencl_kernel_type ());
			
 
				       queue_var = local_var (opencl_command_queue_type ());
			
 
				       event_var = local_var (opencl_event_type ());
			
 
				       group_size_var = local_var (size_type_node);
			
 
				       ngroups_var = local_var (size_type_node);
			
 
				+      error_var = local_var (integer_type_node);
			
 
				 
			
 
				       /* Build `starpu_opencl_load_kernel (...)'.
			
 
				          TODO: Check return value.  */
			
@@ -1385,8 +1399,43 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
				 					 TREE_STRING_POINTER (kernel)),
			
 
				 					devid);
			
 
				 
			
 
				-      /* TODO: `clSetKernelArg', `clEnqueueNDRangeKernel', etc.  */
			
 
				-
			
 
				+      tree enqueue_kern =
			
 
				+	build_call_expr (enqueue_kern_fn, 9,
			
 
				+			 queue_var, kernel_var,
			
 
				+			 build_int_cst (integer_type_node, 1),
			
 
				+			 null_pointer_node,
			
 
				+			 build_addr (group_size_var, task_impl),
			
 
				+			 build_addr (ngroups_var, task_impl),
			
 
				+			 integer_zero_node,
			
 
				+			 null_pointer_node,
			
 
				+			 build_addr (event_var, task_impl));
			
 
				+      tree enqueue_err =
			
 
				+	build2 (INIT_EXPR, TREE_TYPE (error_var), error_var, enqueue_kern);
			
 
				+
			
 
				+      tree enqueue_cond =
			
 
				+	build3 (COND_EXPR, void_type_node,
			
 
				+		build2 (NE_EXPR, boolean_type_node,
			
 
				+			error_var, integer_zero_node),
			
 
				+		build_error_statements (loc, error_var,
			
 
				+					build_opencl_error_string,
			
 
				+					"failed to enqueue kernel"),
			
 
				+		NULL_TREE);
			
 
				+
			
 
				+      tree clfinish =
			
 
				+	build_call_expr (clfinish_fn, 1, queue_var);
			
 
				+
			
 
				+      tree collect_stats =
			
 
				+	build_call_expr (collect_stats_fn, 1, event_var);
			
 
				+
			
 
				+      tree release_ev =
			
 
				+	build_call_expr (release_ev_fn, 1, event_var);
			
 
				+
			
 
				+      tree enqueue_stmts = NULL_TREE;
			
 
				+      append_to_statement_list (enqueue_err, &enqueue_stmts);
			
 
				+      append_to_statement_list (enqueue_cond, &enqueue_stmts);
			
 
				+
			
 
				+
			
 
				+      /* TODO: Build `clFinish', `clReleaseEvent', & co.  */
			
 
				       /* Put it all together.  */
			
 
				       tree stmts = NULL_TREE;
			
 
				       append_to_statement_list (load_cond, &stmts);
			
@@ -1396,6 +1445,23 @@ define_opencl_task_implementation (location_t loc, tree task_impl,
 
				 								   kernel_var),
			
 
				 				&stmts);
			
 
				 
			
 
				+      /* TODO: Support user-provided values.  */
			
 
				+      append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (group_size_var),
			
 
				+					group_size_var,
			
 
				+					build_int_cst (integer_type_node, 1)),
			
 
				+				&stmts);
			
 
				+      append_to_statement_list (build2 (INIT_EXPR, TREE_TYPE (ngroups_var),
			
 
				+					ngroups_var,
			
 
				+					build_int_cst (integer_type_node, 1)),
			
 
				+				&stmts);
			
 
				+      append_to_statement_list (build4 (TARGET_EXPR, void_type_node,
			
 
				+					error_var, enqueue_stmts,
			
 
				+					NULL_TREE, NULL_TREE),
			
 
				+				&stmts);
			
 
				+      append_to_statement_list (clfinish, &stmts);
			
 
				+      append_to_statement_list (collect_stats, &stmts);
			
 
				+      append_to_statement_list (release_ev, &stmts);
			
 
				+
			
 
				       /* Bind the local vars.  */
			
 
				       tree vars = chain_trees (kernel_var, queue_var, event_var,
			
 
				 			       group_size_var, ngroups_var, NULL_TREE);
			
--- a/gcc-plugin/tests/mocks.h
+++ b/gcc-plugin/tests/mocks.h
@@ -440,13 +440,25 @@ typedef int cl_command_queue;
 
				 
			
 
				 extern cl_int clSetKernelArg (cl_kernel, cl_uint, size_t, const void *);
			
 
				 
			
 
				+extern cl_int
			
 
				+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
			
 
				+                       cl_kernel        /* kernel */,
			
 
				+                       cl_uint          /* work_dim */,
			
 
				+                       const size_t *   /* global_work_offset */,
			
 
				+                       const size_t *   /* global_work_size */,
			
 
				+                       const size_t *   /* local_work_size */,
			
 
				+                       cl_uint          /* num_events_in_wait_list */,
			
 
				+                       const cl_event * /* event_wait_list */,
			
 
				+                       cl_event *       /* event */);
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 
			
 
				 /* Number of `load_opencl_from_string', `load_kernel', and `clSetKernelArg'
			
 
				    calls.  */
			
 
				 static unsigned int load_opencl_calls, load_opencl_kernel_calls,
			
 
				-  opencl_set_kernel_arg_calls;
			
 
				+  opencl_set_kernel_arg_calls, opencl_enqueue_calls, opencl_finish_calls,
			
 
				+  opencl_collect_stats_calls, opencl_release_event_calls;
			
 
				 
			
 
				 struct load_opencl_arguments
			
 
				 {
			
@@ -528,6 +540,44 @@ clSetKernelArg (cl_kernel kernel, cl_uint index, size_t size,
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				+cl_int
			
 
				+clEnqueueNDRangeKernel(cl_command_queue command_queue,
			
 
				+                       cl_kernel        kernel,
			
 
				+                       cl_uint          work_dim,
			
 
				+                       const size_t *   global_work_offset,
			
 
				+                       const size_t *   global_work_size,
			
 
				+                       const size_t *   local_work_size,
			
 
				+                       cl_uint          num_events_in_wait_list,
			
 
				+                       const cl_event * event_wait_list,
			
 
				+                       cl_event *       event)
			
 
				+{
			
 
				+  assert (*global_work_size == 1 && *local_work_size == 1);
			
 
				+  opencl_enqueue_calls++;
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+cl_int
			
 
				+clFinish (cl_command_queue command_queue)
			
 
				+{
			
 
				+  opencl_finish_calls++;
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+cl_int
			
 
				+starpu_opencl_collect_stats (cl_event event)
			
 
				+{
			
 
				+  opencl_collect_stats_calls++;
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+cl_int
			
 
				+clReleaseEvent (cl_event event)
			
 
				+{
			
 
				+  opencl_release_event_calls++;
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 const char *
			
 
				 starpu_opencl_error_string (cl_int s)
			
 
				 {
			
--- a/gcc-plugin/tests/opencl.c
+++ b/gcc-plugin/tests/opencl.c
@@ -64,6 +64,9 @@ main ()
 
				   assert (load_opencl_calls == 1);
			
 
				   assert (load_opencl_kernel_calls == 3);
			
 
				   assert (opencl_set_kernel_arg_calls == 3 * 2);
			
 
				-
			
 
				+  assert (opencl_enqueue_calls == 3);
			
 
				+  assert (opencl_finish_calls == 3);
			
 
				+  assert (opencl_release_event_calls == 3);
			
 
				+  assert (opencl_collect_stats_calls == 3);
			
 
				   return EXIT_SUCCESS;
			
 
				 }
			
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -29,8 +29,8 @@ BUILT_SOURCES =
 
				 CLEANFILES = *.gcno *.gcda *.linkinfo
			
 
				 
			
 
				 EXTRA_DIST = 					\
			
 
				-	examples/mpi_lu/float.h			\
			
 
				-	examples/mpi_lu/double.h		\
			
 
				+	examples/mpi_lu/mpi_lu-float.h		\
			
 
				+	examples/mpi_lu/mpi_lu-double.h		\
			
 
				 	examples/mpi_lu/plu_example.c		\
			
 
				 	examples/mpi_lu/plu_solve.c		\
			
 
				 	examples/mpi_lu/pxlu.h			\
			
--- a/mpi/examples/mpi_lu/mpi_lu-double.h
+++ b/mpi/examples/mpi_lu/mpi_lu-double.h
--- a/mpi/examples/mpi_lu/mpi_lu-float.h
+++ b/mpi/examples/mpi_lu/mpi_lu-float.h
--- a/mpi/examples/mpi_lu/pdlu.c
+++ b/mpi/examples/mpi_lu/pdlu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "mpi_lu-double.h"
			
 
				 #include "pxlu.c"
			
--- a/mpi/examples/mpi_lu/pdlu_kernels.c
+++ b/mpi/examples/mpi_lu/pdlu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "mpi_lu-double.h"
			
 
				 #include "pxlu_kernels.c"
			
--- a/mpi/examples/mpi_lu/plu_example_double.c
+++ b/mpi/examples/mpi_lu/plu_example_double.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "mpi_lu-double.h"
			
 
				 #include "plu_example.c"
			
--- a/mpi/examples/mpi_lu/plu_example_float.c
+++ b/mpi/examples/mpi_lu/plu_example_float.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "mpi_lu-float.h"
			
 
				 #include "plu_example.c"
			
--- a/mpi/examples/mpi_lu/plu_solve_double.c
+++ b/mpi/examples/mpi_lu/plu_solve_double.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "mpi_lu-double.h"
			
 
				 #include "plu_solve.c"
			
--- a/mpi/examples/mpi_lu/plu_solve_float.c
+++ b/mpi/examples/mpi_lu/plu_solve_float.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "mpi_lu-float.h"
			
 
				 #include "plu_solve.c"
			
--- a/mpi/examples/mpi_lu/pslu.c
+++ b/mpi/examples/mpi_lu/pslu.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "mpi_lu-float.h"
			
 
				 #include "pxlu.c"
			
--- a/mpi/examples/mpi_lu/pslu_kernels.c
+++ b/mpi/examples/mpi_lu/pslu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "mpi_lu-float.h"
			
 
				 #include "pxlu_kernels.c"
			
--- a/mpi/examples/mpi_lu/slu_kernels.c
+++ b/mpi/examples/mpi_lu/slu_kernels.c
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "mpi_lu-float.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/socl/examples/matmul/matmul.c
+++ b/socl/examples/matmul/matmul.c
@@ -34,9 +34,9 @@
 
				 #define TYPE float

			
 
				 

			
 
				 // Basic Matrix dimensions

			
 
				-#define WA (1024L * BLOCK_SIZE) // Matrix A width

			
 
				+#define WA (128L * BLOCK_SIZE) // Matrix A width

			
 
				 #define HA (512L * BLOCK_SIZE) // Matrix A height

			
 
				-#define WB (1024L * BLOCK_SIZE) // Matrix B width

			
 
				+#define WB (128L * BLOCK_SIZE) // Matrix B width

			
 
				 #define HB WA  // Matrix B height

			
 
				 #define WC WB  // Matrix C width 

			
 
				 #define HC HA  // Matrix C height

			
@@ -115,10 +115,8 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 
				 	}

			
 
				 }

			
 
				 

			
 
				-#define shrLog(...) fprintf(stderr, __VA_ARGS__);

			
 
				-

			
 
				 // Round Up Division function

			
 
				-size_t shrRoundUp(int group_size, int global_size) {

			
 
				+size_t roundUp(int group_size, int global_size) {

			
 
				 	int r = global_size % group_size;

			
 
				 	if(r == 0) {

			
 
				 		return global_size;

			
@@ -127,18 +125,18 @@ size_t shrRoundUp(int group_size, int global_size) {
 
				 	}

			
 
				 }

			
 
				 

			
 
				-void fillArray(TYPE* pfData, int iSize) {

			
 
				+void fillArray(TYPE* data, int size) {

			
 
				 	int i;

			
 
				 	const TYPE fScale = (TYPE)(1.0f / (float)RAND_MAX);

			
 
				-	for (i = 0; i < iSize; ++i) {

			
 
				-		pfData[i] = fScale * rand();

			
 
				+	for (i = 0; i < size; ++i) {

			
 
				+		data[i] = fScale * rand();

			
 
				 	}

			
 
				 }

			
 
				 

			
 
				-void shrPrintArray(float* pfData, int iSize) {

			
 
				+void printArray(float* data, int size) {

			
 
				 	int i;

			
 
				-	for (i = 0; i < iSize; ++i) {

			
 
				-		shrLog("%d: %.3f\n", i, pfData[i]);

			
 
				+	for (i = 0; i < size; ++i) {

			
 
				+		printf("%d: %.3f\n", i, data[i]);

			
 
				 	}

			
 
				 }

			
 
				 

			
@@ -222,8 +220,10 @@ int main(int argc, const char** argv) {
 
				 	parse_args(argc, argv);

			
 
				 

			
 
				 	check(clGetPlatformIDs(5, platforms, &platform_count));

			
 
				-	if (platform_count == 0)

			
 
				-		error("No platform found\n");

			
 
				+	if (platform_count == 0) {

			
 
				+		printf("No platform found\n");

			
 
				+		exit(77);

			
 
				+	}

			
 
				 

			
 
				 	cl_uint device_count;

			
 
				 	cl_uint devs[platform_count];

			
@@ -350,7 +350,7 @@ int main(int argc, const char** argv) {
 
				 		check(clSetKernelArg(multiplicationKernel[p], 4, sizeof(cl_mem), (void *) &d_B[d]));

			
 
				 		check(clSetKernelArg(multiplicationKernel[p], 5, sizeof(cl_mem), (void *) &d_C[i]));

			
 
				 

			
 
				-		size_t globalWorkSize[] = {shrRoundUp(BLOCK_SIZE,WC), shrRoundUp(BLOCK_SIZE,workSize[i])};

			
 
				+		size_t globalWorkSize[] = {roundUp(BLOCK_SIZE,WC), roundUp(BLOCK_SIZE,workSize[i])};

			
 
				 

			
 
				 		check(clEnqueueNDRangeKernel(commandQueue[p][dev], multiplicationKernel[p], 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &GPUExecution[i]));

			
 
				 

			
@@ -430,26 +430,26 @@ int main(int argc, const char** argv) {
 
				 	return 0;

			
 
				 }

			
 
				 

			
 
				-void printDiff(TYPE *data1, TYPE *data2, int width, int height, int iListLength, TYPE fListTol) {

			
 
				-	shrLog("Listing first %d Differences > %.6f...\n", iListLength, fListTol);

			
 
				+void printDiff(TYPE *data1, TYPE *data2, int width, int height, int listLength, TYPE listTol) {

			
 
				+	printf("Listing first %d Differences > %.6f...\n", listLength, listTol);

			
 
				 	int i,j,k;

			
 
				 	int error_count=0;

			
 
				 	for (j = 0; j < height; j++) {

			
 
				-		if (error_count < iListLength) {

			
 
				-			shrLog("\n  Row %d:\n", j);

			
 
				+		if (error_count < listLength) {

			
 
				+			printf("\n  Row %d:\n", j);

			
 
				 		}

			
 
				 		for (i = 0; i < width; i++) {

			
 
				 			k = j * width + i;

			
 
				-			float fDiff = fabs(data1[k] - data2[k]);

			
 
				-			if (fDiff > fListTol) {                

			
 
				-				if (error_count < iListLength) {

			
 
				-					shrLog("    Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], fDiff);

			
 
				+			float diff = fabs(data1[k] - data2[k]);

			
 
				+			if (diff > listTol) {                

			
 
				+				if (error_count < listLength) {

			
 
				+					printf("    Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], diff);

			
 
				 				}

			
 
				 				error_count++;

			
 
				 			}

			
 
				 		}

			
 
				 	}

			
 
				-	shrLog(" \n  Total Errors = %d\n\n", error_count);

			
 
				+	printf(" \n  Total Errors = %d\n\n", error_count);

			
 
				 }

			
 
				 

			
 
				 /**

			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -29,6 +29,10 @@
 
				 #include <windows.h>
			
 
				 #endif
			
 
				 
			
 
				+#ifdef __FreeBSD__
			
 
				+#include <sys/thr.h> /* for thr_self() */
			
 
				+#endif
			
 
				+
			
 
				 #define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
			
 
				 
			
 
				 static char _STARPU_PROF_FILE_USER[128];
			
@@ -38,6 +42,19 @@ static int _starpu_written = 0;
 
				 
			
 
				 static int _starpu_id;
			
 
				 
			
 
				+long _starpu_gettid()
			
 
				+{
			
 
				+#if defined(__linux__)
			
 
				+	return syscall(SYS_gettid);
			
 
				+#elif defined(__FreeBSD__)
			
 
				+	long tid;
			
 
				+	thr_self(&tid);
			
 
				+	return tid;
			
 
				+#else
			
 
				+	return (long) pthread_self();
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 static void _starpu_profile_set_tracefile(void *last, ...)
			
 
				 {
			
 
				 	va_list vl;
			
@@ -78,7 +95,7 @@ void _starpu_start_fxt_profiling(void)
 
				 		_starpu_profile_set_tracefile(NULL);
			
 
				 	}
			
 
				 
			
 
				-	threadid = syscall(SYS_gettid);
			
 
				+	threadid = _starpu_gettid();
			
 
				 
			
 
				 	atexit(_starpu_stop_fxt_profiling);
			
 
				 
			
@@ -140,7 +157,7 @@ void _starpu_stop_fxt_profiling(void)
 
				 
			
 
				 void _starpu_fxt_register_thread(unsigned cpuid)
			
 
				 {
			
 
				-	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid());
			
 
				 }
			
 
				 
			
 
				 #endif // STARPU_USE_FXT
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -107,6 +107,8 @@
 
				 #include <fxt/fxt.h>
			
 
				 #include <fxt/fut.h>
			
 
				 
			
 
				+long _starpu_gettid(void);
			
 
				+
			
 
				 /* Initialize the FxT library. */
			
 
				 void _starpu_start_fxt_profiling(void);
			
 
				 
			
@@ -178,13 +180,13 @@ do {									\
 
				 
			
 
				 /* workerkind = _STARPU_FUT_CPU_KEY for instance */
			
 
				 #define _STARPU_TRACE_NEW_MEM_NODE(nodeid)			\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, devid, memnode)	\
			
 
				-	FUT_DO_PROBE4(_STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE4(_STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_INIT_END				\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_INIT_END, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_START_CODELET_BODY(job)				\
			
 
				 do {									\
			
@@ -192,10 +194,10 @@ do {									\
 
				 	if (model_name)                                                 \
			
 
				 	{								\
			
 
				 		/* we include the symbol name */			\
			
 
				-		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 1, model_name); \
			
 
				+		_STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_START_CODELET_BODY, (job), _starpu_gettid(), 1, model_name); \
			
 
				 	}								\
			
 
				 	else {                                                          \
			
 
				-		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 0); \
			
 
				+		FUT_DO_PROBE3(_STARPU_FUT_START_CODELET_BODY, (job), _starpu_gettid(), 0); \
			
 
				 	}								\
			
 
				 } while(0);
			
 
				 
			
@@ -203,35 +205,35 @@ do {									\
 
				 do {									\
			
 
				 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
			
 
				 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
			
 
				-	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
			
 
				+	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), _starpu_gettid());	\
			
 
				 } while(0);
			
 
				 
			
 
				 #define _STARPU_TRACE_START_CALLBACK(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_END_CALLBACK(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_JOB_PUSH(task, prio)	\
			
 
				-	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, task, prio, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, task, prio, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_JOB_POP(task, prio)	\
			
 
				-	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, task, prio, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, task, prio, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, syscall(SYS_gettid))
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid())
			
 
				 
			
 
				 #define _STARPU_TRACE_START_FETCH_INPUT(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_END_FETCH_INPUT(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_START_PUSH_OUTPUT(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_END_PUSH_OUTPUT(job)	\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT, job, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT, job, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_TAG(tag, job)	\
			
 
				 	FUT_DO_PROBE2(_STARPU_FUT_TAG, tag, (job)->job_id)
			
@@ -251,10 +253,10 @@ do {										\
 
				         const char *model_name = _starpu_get_model_name((job));                       \
			
 
				 	if (model_name)					                        \
			
 
				 	{									\
			
 
				-		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 1, model_name);\
			
 
				+		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 1, model_name);\
			
 
				 	}									\
			
 
				 	else {									\
			
 
				-		FUT_DO_PROBE4(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 0);\
			
 
				+		FUT_DO_PROBE4(_STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 0);\
			
 
				 	}									\
			
 
				 } while(0);
			
 
				 
			
@@ -264,10 +266,10 @@ do {										\
 
				         const char *model_name = _starpu_get_model_name((job));                       \
			
 
				 	if (model_name)                                                         \
			
 
				 	{									\
			
 
				-          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 1, model_name); \
			
 
				+          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \
			
 
				 	}									\
			
 
				 	else {									\
			
 
				-		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 0);\
			
 
				+		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
			
 
				 	}									\
			
 
				 } while(0);
			
 
				 
			
@@ -290,56 +292,56 @@ do {										\
 
				 	FUT_DO_PROBE2(_STARPU_FUT_WORK_STEALING, empty_q, victim_q)
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_DEINIT_START			\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_SLEEP_START	\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_SLEEP_END	\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_USER_DEFINED_START	\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_USER_DEFINED_END		\
			
 
				-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_START_ALLOC(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC, memnode, _starpu_gettid());
			
 
				 	
			
 
				 #define _STARPU_TRACE_END_ALLOC(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC, memnode, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_START_ALLOC_REUSE(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC_REUSE, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid());
			
 
				 	
			
 
				 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
			
 
				 	
			
 
				 #define _STARPU_TRACE_START_MEMRECLAIM(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, _starpu_gettid());
			
 
				 	
			
 
				 #define _STARPU_TRACE_END_MEMRECLAIM(memnode)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, _starpu_gettid());
			
 
				 	
			
 
				 /* We skip these events becasue they are called so often that they cause FxT to
			
 
				  * fail and make the overall trace unreadable anyway. */
			
 
				 #define _STARPU_TRACE_START_PROGRESS(memnode)		\
			
 
				 	do {} while (0);
			
 
				-//	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS, memnode, syscall(SYS_gettid));
			
 
				+//	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS, memnode, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_END_PROGRESS(memnode)		\
			
 
				 	do {} while (0);
			
 
				-	//FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS, memnode, syscall(SYS_gettid));
			
 
				+	//FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS, memnode, _starpu_gettid());
			
 
				 	
			
 
				 #define _STARPU_TRACE_USER_EVENT(code)			\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_SET_PROFILING(status)		\
			
 
				-	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, syscall(SYS_gettid));
			
 
				+	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, _starpu_gettid());
			
 
				 
			
 
				 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
			
 
				 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -417,10 +417,10 @@ int starpu_task_submit(struct starpu_task *task)
 
				 
			
 
				 		_starpu_detect_implicit_data_deps(task);
			
 
				 
			
 
				-		if (task->cl->model)
			
 
				+		if (task->cl->model && task->cl->model->symbol)
			
 
				 			_starpu_load_perfmodel(task->cl->model);
			
 
				 
			
 
				-		if (task->cl->power_model)
			
 
				+		if (task->cl->power_model && task->cl->power_model->symbol)
			
 
				 			_starpu_load_perfmodel(task->cl->power_model);
			
 
				 	}
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -229,6 +229,9 @@ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
 
				 
			
 
				 		switch (d->type)
			
 
				 		{
			
 
				+		case STARPU_CPU_WORKER:
			
 
				+			if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
			
 
				+				return 0;
			
 
				 		case STARPU_CUDA_WORKER:
			
 
				 			if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
			
 
				 				return 0;
			
@@ -257,7 +260,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 	unsigned nworkers = config->topology.nworkers;
			
 
				 
			
 
				 	/* Launch workers asynchronously (except for SPUs) */
			
 
				-	unsigned cuda = 0;
			
 
				+	unsigned cpu = 0, cuda = 0;
			
 
				 	unsigned worker;
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
@@ -273,6 +276,8 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		workerarg->worker_size = 1;
			
 
				 		workerarg->combined_workerid = workerarg->workerid;
			
 
				 		workerarg->current_rank = 0;
			
 
				+		workerarg->run_by_starpu = 1;
			
 
				+
			
 
				 		workerarg->has_prev_init = 0;
			
 
				 		/* mutex + cond only for the local list */
			
 
				 		/* we have a single local list */
			
@@ -304,8 +309,17 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 			case STARPU_CPU_WORKER:
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				-				pthread_create(&workerarg->worker_thread,
			
 
				-						NULL, _starpu_cpu_worker, workerarg);
			
 
				+				driver.id.cpu_id = cpu;
			
 
				+				if (_starpu_may_launch_driver(config->conf, &driver))
			
 
				+				{
			
 
				+					pthread_create(&workerarg->worker_thread,
			
 
				+							NULL, _starpu_cpu_worker, workerarg);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					workerarg->run_by_starpu = 0;
			
 
				+				}
			
 
				+				cpu++;
			
 
				 				break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -318,6 +332,10 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 					pthread_create(&workerarg->worker_thread,
			
 
				 						       NULL, _starpu_cuda_worker, workerarg);
			
 
				 				}
			
 
				+				else
			
 
				+				{
			
 
				+					workerarg->run_by_starpu = 0;
			
 
				+				}
			
 
				 				cuda++;
			
 
				 				break;
			
 
				 #endif
			
@@ -325,7 +343,10 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 			case STARPU_OPENCL_WORKER:
			
 
				 				starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
			
 
				 				if (!_starpu_may_launch_driver(config->conf, &driver))
			
 
				+				{
			
 
				+					workerarg->run_by_starpu = 0;
			
 
				 					break;
			
 
				+				}
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				 				pthread_create(&workerarg->worker_thread,
			
@@ -367,6 +388,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	cpu  = 0;
			
 
				 	cuda = 0;
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
@@ -377,10 +399,17 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		switch (workerarg->arch)
			
 
				 		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				+				driver.id.cpu_id = cpu;
			
 
				+				if (!_starpu_may_launch_driver(config->conf, &driver))
			
 
				+				{
			
 
				+					cpu++;
			
 
				+					break;
			
 
				+				}
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				 				while (!workerarg->worker_is_initialized)
			
 
				 					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
			
 
				 				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+				cpu++;
			
 
				 				break;
			
 
				 			case STARPU_CUDA_WORKER:
			
 
				 				driver.id.cuda_id = cuda;
			
@@ -667,6 +696,9 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				+			if (!worker->run_by_starpu)
			
 
				+				goto out;
			
 
				+
			
 
				 			if (!pthread_equal(pthread_self(), worker->worker_thread))
			
 
				 			{
			
 
				 				status = pthread_join(worker->worker_thread, NULL);
			
@@ -679,6 +711,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+out:
			
 
				 		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
			
 
				 		_starpu_job_list_delete(worker->terminated_jobs);
			
 
				 	}
			
@@ -1082,6 +1115,9 @@ struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
 
				 	return &config.sched_ctxs[0];
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+extern int _starpu_run_cpu(struct starpu_driver *);
			
 
				+#endif
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 extern int _starpu_run_cuda(struct starpu_driver *);
			
 
				 #endif
			
@@ -1097,6 +1133,10 @@ starpu_driver_run(struct starpu_driver *d)
 
				 
			
 
				 	switch (d->type)
			
 
				 	{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	case STARPU_CPU_WORKER:
			
 
				+		return _starpu_run_cpu(d);
			
 
				+#endif
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	case STARPU_CUDA_WORKER:
			
 
				 		return _starpu_run_cuda(d);
			
@@ -1105,7 +1145,6 @@ starpu_driver_run(struct starpu_driver *d)
 
				 	case STARPU_OPENCL_WORKER:
			
 
				 		return _starpu_run_opencl(d);
			
 
				 #endif
			
 
				-	case STARPU_CPU_WORKER:    /* Not supported yet */
			
 
				 	case STARPU_GORDON_WORKER: /* Not supported yet */
			
 
				 	default:
			
 
				 		return -EINVAL;
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -81,6 +81,7 @@ struct _starpu_worker
 
				 	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
			
 
				 	char name[48];
			
 
				 	char short_name[10];
			
 
				+	unsigned run_by_starpu; /* Is this run by StarPU or directly by the application ? */
			
 
				 
			
 
				 	struct _starpu_sched_ctx **sched_ctx;
			
 
				 	unsigned nsched_ctxs; /* the no of contexts a worker belongs to*/
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -101,7 +101,6 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 static struct _starpu_worker*
			
 
				 _starpu_get_worker_from_driver(struct starpu_driver *d)
			
 
				 {
			
 
				-#if 1
			
 
				 	int workers[d->id.cpu_id + 1];
			
 
				 	int nworkers;
			
 
				 	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, d->id.cpu_id+1);
			
@@ -109,19 +108,6 @@ _starpu_get_worker_from_driver(struct starpu_driver *d)
 
				 		return NULL; // No device was found.
			
 
				 	
			
 
				 	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
			
 
				-#else
			
 
				-	int workers[STARPU_NMAXWORKERS];
			
 
				-	int nworkers;
			
 
				-	nworkers = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, STARPU_NMAXWORKERS);
			
 
				-	STARPU_ASSERT(nworkers > 0);
			
 
				-	int i;
			
 
				-	for (i = 0; i < nworkers; i++)
			
 
				-	{
			
 
				-		fprintf(stderr, "\tCPU %d\n", i);
			
 
				-	}
			
 
				-
			
 
				-	return _starpu_get_worker_struct(workers[d->id.cpu_id]);
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 int _starpu_cpu_driver_init(struct starpu_driver *d)
			
@@ -345,3 +331,17 @@ _starpu_cpu_worker(void *arg)
 
				 
			
 
				 	return NULL;
			
 
				 }
			
 
				+
			
 
				+int _starpu_run_cpu(struct starpu_driver *d)
			
 
				+{
			
 
				+	STARPU_ASSERT(d && d->type == STARPU_CPU_WORKER);
			
 
				+
			
 
				+	struct _starpu_worker *worker = _starpu_get_worker_from_driver(d);
			
 
				+	STARPU_ASSERT(worker);
			
 
				+
			
 
				+	worker->set = NULL;
			
 
				+	worker->worker_is_initialized = 0;
			
 
				+	_starpu_cpu_worker(worker);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/starpufft/Makefile.am
+++ b/starpufft/Makefile.am
@@ -19,8 +19,8 @@ AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_C
 
				 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 
			
 
				 EXTRA_DIST =			\
			
 
				-	float.h			\
			
 
				-	double.h		\
			
 
				+	starpufft-float.h	\
			
 
				+	starpufft-double.h	\
			
 
				 	cudax_kernels.h		\
			
 
				 	starpufftx.c		\
			
 
				 	starpufftx1d.c		\
			
--- a/starpufft/cuda_kernels.cu
+++ b/starpufft/cuda_kernels.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "starpufft-double.h"
			
 
				 #include "cudax_kernels.cu"
			
--- a/starpufft/cudaf_kernels.cu
+++ b/starpufft/cudaf_kernels.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "starpufft-float.h"
			
 
				 #include "cudax_kernels.cu"
			
--- a/starpufft/examples/test.c
+++ b/starpufft/examples/test.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "starpufft-double.h"
			
 
				 #include "testx.c"
			
--- a/starpufft/examples/test_threads.c
+++ b/starpufft/examples/test_threads.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "starpufft-double.h"
			
 
				 #include "testx_threads.c"
			
--- a/starpufft/examples/testf.c
+++ b/starpufft/examples/testf.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "starpufft-float.h"
			
 
				 #include "testx.c"
			
--- a/starpufft/examples/testf_threads.c
+++ b/starpufft/examples/testf_threads.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "starpufft-float.h"
			
 
				 #include "testx_threads.c"
			
--- a/starpufft/starpufft-double.h
+++ b/starpufft/starpufft-double.h
--- a/starpufft/starpufft-float.h
+++ b/starpufft/starpufft-float.h
--- a/starpufft/starpufft.c
+++ b/starpufft/starpufft.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "double.h"
			
 
				+#include "starpufft-double.h"
			
 
				 #include "starpufftx.c"
			
--- a/starpufft/starpufftf.c
+++ b/starpufft/starpufftf.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,5 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include "float.h"
			
 
				+#include "starpufft-float.h"
			
 
				 #include "starpufftx.c"
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -115,6 +115,7 @@ noinst_PROGRAMS =				\
 
				 	starpu_machine_display			\
			
 
				 	main/deprecated_func			\
			
 
				 	main/deprecated_buffer			\
			
 
				+	main/driver_api/init_run_deinit         \
			
 
				 	main/restart				\
			
 
				 	main/execute_on_a_specific_worker	\
			
 
				 	main/insert_task			\
			
--- a/tests/main/driver_api/init_run_deinit.c
+++ b/tests/main/driver_api/init_run_deinit.c
@@ -0,0 +1,239 @@
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+#include "../../helper.h"
			
 
				+
			
 
				+#define NTASKS 8
			
 
				+
			
 
				+#if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				+static void
			
 
				+dummy(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(*(int *)args)++;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				+	.cpu_funcs    = { dummy, NULL },
			
 
				+	.cuda_funcs   = { dummy, NULL },
			
 
				+	.opencl_funcs = { dummy, NULL },
			
 
				+	.nbuffers     = 0
			
 
				+};
			
 
				+
			
 
				+static void
			
 
				+init_driver(struct starpu_driver *d)
			
 
				+{
			
 
				+	int ret;
			
 
				+	ret = starpu_driver_init(d);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init");
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+run(struct starpu_task *task, struct starpu_driver *d)
			
 
				+{
			
 
				+	int ret;
			
 
				+	ret = starpu_task_submit(task);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	ret = starpu_driver_run_once(d);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run_once");
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+deinit_driver(struct starpu_driver *d)
			
 
				+{
			
 
				+	int ret; 
			
 
				+	ret = starpu_driver_deinit(d);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit");
			
 
				+}
			
 
				+#endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL */
			
 
				+
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+static int
			
 
				+test_cpu(void)
			
 
				+{
			
 
				+	int var = 0, ret;
			
 
				+	struct starpu_conf conf;
			
 
				+
			
 
				+	ret = starpu_conf_init(&conf);
			
 
				+	if (ret == -EINVAL)
			
 
				+		return 1;
			
 
				+	
			
 
				+
			
 
				+	struct starpu_driver d =
			
 
				+	{
			
 
				+		.type = STARPU_CPU_WORKER,
			
 
				+		.id.cpu_id = 0
			
 
				+	};
			
 
				+
			
 
				+	conf.not_launched_drivers = &d;
			
 
				+	conf.n_not_launched_drivers = 1;
			
 
				+
			
 
				+	ret = starpu_init(&conf);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+
			
 
				+	init_driver(&d);
			
 
				+	int i;	
			
 
				+	for (i = 0; i < NTASKS; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task;
			
 
				+		task = starpu_task_create();
			
 
				+		cl.where = STARPU_CPU;
			
 
				+		task->cl = &cl;
			
 
				+		task->cl_arg = &var;
			
 
				+
			
 
				+		run(task, &d);
			
 
				+	}
			
 
				+	deinit_driver(&d);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	fprintf(stderr, "[CPU] Var is %d\n", var);
			
 
				+	return !!(var != NTASKS);
			
 
				+}
			
 
				+#endif /* STARPU_USE_CPU */
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int
			
 
				+test_cuda(void)
			
 
				+{
			
 
				+	int var = 0, ret;
			
 
				+	struct starpu_conf conf;
			
 
				+
			
 
				+	ret = starpu_conf_init(&conf);
			
 
				+	if (ret == -EINVAL)
			
 
				+		return 1;
			
 
				+	
			
 
				+
			
 
				+	struct starpu_driver d =
			
 
				+	{
			
 
				+		.type = STARPU_CUDA_WORKER,
			
 
				+		.id.cuda_id = 0
			
 
				+	};
			
 
				+
			
 
				+	conf.ncuda = 1;
			
 
				+	conf.nopencl = 0;
			
 
				+	conf.not_launched_drivers = &d;
			
 
				+	conf.n_not_launched_drivers = 1;
			
 
				+
			
 
				+	ret = starpu_init(&conf);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+
			
 
				+
			
 
				+	init_driver(&d);
			
 
				+	int i;	
			
 
				+	for (i = 0; i < NTASKS; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task;
			
 
				+		task = starpu_task_create();
			
 
				+		cl.where = STARPU_CUDA;
			
 
				+		task->cl = &cl;
			
 
				+		task->cl_arg = &var;
			
 
				+
			
 
				+		run(task, &d);
			
 
				+	}
			
 
				+	deinit_driver(&d);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	fprintf(stderr, "[CUDA] Var is %d\n", var);
			
 
				+	return !!(var != NTASKS);
			
 
				+}
			
 
				+#endif /* STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static int
			
 
				+test_opencl(void)
			
 
				+{
			
 
				+        cl_int err;
			
 
				+        cl_platform_id platform;
			
 
				+        cl_uint dummy;
			
 
				+
			
 
				+        err = clGetPlatformIDs(1, &platform, &dummy);
			
 
				+        if (err != CL_SUCCESS)
			
 
				+        {   
			
 
				+                fprintf(stderr, "%s:%d\n", __func__, __LINE__);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	cl_device_id device_id;
			
 
				+        err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
			
 
				+        if (err != CL_SUCCESS)
			
 
				+        {   
			
 
				+                fprintf(stderr, "%s:%d\n", __func__, __LINE__);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	int var = 0, ret;
			
 
				+	struct starpu_conf conf;
			
 
				+
			
 
				+	ret = starpu_conf_init(&conf);
			
 
				+	if (ret == -EINVAL)
			
 
				+		return 1;
			
 
				+	
			
 
				+	struct starpu_driver d =
			
 
				+	{
			
 
				+		.type = STARPU_OPENCL_WORKER,
			
 
				+		.id.opencl_id = device_id
			
 
				+	};
			
 
				+
			
 
				+	conf.ncuda = 0;
			
 
				+	conf.nopencl = 1;
			
 
				+	conf.not_launched_drivers = &d;
			
 
				+	conf.n_not_launched_drivers = 1;
			
 
				+
			
 
				+	ret = starpu_init(&conf);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+
			
 
				+
			
 
				+	init_driver(&d);
			
 
				+	int i;	
			
 
				+	for (i = 0; i < NTASKS; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task;
			
 
				+		task = starpu_task_create();
			
 
				+		cl.where = STARPU_OPENCL;
			
 
				+		task->cl = &cl;
			
 
				+		task->cl_arg = &var;
			
 
				+
			
 
				+		run(task, &d);
			
 
				+	}
			
 
				+	deinit_driver(&d);
			
 
				+
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	FPRINTF(stderr, "[OpenCL] Var is %d\n", var);
			
 
				+	return !!(var != NTASKS);
			
 
				+}
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+	int ret = STARPU_TEST_SKIPPED;
			
 
				+
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	ret = test_cpu();
			
 
				+	if (ret == 1)
			
 
				+		return ret;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	ret = test_cuda();
			
 
				+	if (ret == 1)
			
 
				+		return ret;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	ret = test_opencl();
			
 
				+	if (ret == 1)
			
 
				+		return ret;
			
 
				+#endif
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
--- a/tests/sched_policies/simple_cpu_gpu_sched.c
+++ b/tests/sched_policies/simple_cpu_gpu_sched.c
@@ -88,11 +88,13 @@ gpu_task_gpu(struct starpu_task *task,
 
				 
			
 
				 static struct starpu_perfmodel model_cpu_task = 
			
 
				 {
			
 
				-	.type = STARPU_PER_ARCH
			
 
				+	.type = STARPU_PER_ARCH,
			
 
				+	.symbol = "model_cpu_task"
			
 
				 };
			
 
				 static struct starpu_perfmodel model_gpu_task = 
			
 
				 {
			
 
				-	.type = STARPU_PER_ARCH
			
 
				+	.type = STARPU_PER_ARCH,
			
 
				+	.symbol = "model_gpu_task"
			
 
				 };
			
 
				 
			
 
				 static void