Browse Source

merge again

Andra Hugo 13 years ago
parent
commit
c91789a855

+ 82 - 1
doc/chapters/advanced-examples.texi

@@ -10,10 +10,11 @@
 * Using multiple implementations of a codelet::
 * Using multiple implementations of a codelet::
 * Enabling implementation according to capabilities::
 * Enabling implementation according to capabilities::
 * Task and Worker Profiling::   
 * Task and Worker Profiling::   
-* Partitioning Data::           Partitioning Data
+* Partitioning Data::
 * Performance model example::   
 * Performance model example::   
 * Theoretical lower bound on execution time::  
 * Theoretical lower bound on execution time::  
 * Insert Task Utility::          
 * Insert Task Utility::          
+* Data reduction::  
 * Parallel Tasks::
 * Parallel Tasks::
 * Debugging::
 * Debugging::
 * The multiformat interface::
 * The multiformat interface::
@@ -587,6 +588,86 @@ be executed, and is allowed to read from @code{i} to use it e.g. as an
 index. Note that this macro is only avaible when compiling StarPU with
 index. Note that this macro is only avaible when compiling StarPU with
 the compiler @code{gcc}.
 the compiler @code{gcc}.
 
 
+@node Data reduction
+@section Data reduction
+
+In various cases, some piece of data is used to accumulate intermediate
+results. For instances, the dot product of a vector, maximum/minimum finding,
+the histogram of a photograph, etc. When these results are produced along the
+whole machine, it would not be efficient to accumulate them in only one place,
+incurring data transmission each and access concurrency.
+
+StarPU provides a @code{STARPU_REDUX} mode, which permits to optimize
+that case: it will allocate a buffer on each memory node, and accumulate
+intermediate results there. When the data is eventually accessed in the normal
+@code{STARPU_R} mode, StarPU will collect the intermediate results in just one
+buffer.
+
+For this to work, the user has to use the
+@code{starpu_data_set_reduction_methods} to declare how to initialize these
+buffers, and how to assemble partial results.
+
+For instance, @code{cg} uses that to optimize its dot product: it first defines
+the codelets for initialization and reduction:
+
+@smallexample
+struct starpu_codelet bzero_variable_cl =
+@{
+        .cpu_funcs = @{ bzero_variable_cpu, NULL @},
+        .cuda_funcs = @{ bzero_variable_cuda, NULL @},
+        .nbuffers = 1,
+@}
+
+static void accumulate_variable_cpu(void *descr[], void *cl_arg)
+@{
+        double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]);
+        double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]);
+        *v_dst = *v_dst + *v_src;
+@}
+
+static void accumulate_variable_cuda(void *descr[], void *cl_arg)
+@{
+        double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]);
+        double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]);
+        cublasaxpy(1, (double)1.0, v_src, 1, v_dst, 1);
+        cudaStreamSynchronize(starpu_cuda_get_local_stream());
+@}
+
+struct starpu_codelet accumulate_variable_cl =
+@{
+        .cpu_funcs = @{ accumulate_variable_cpu, NULL @},
+        .cuda_funcs = @{ accumulate_variable_cuda, NULL @},
+        .nbuffers = 1,
+@}
+@end smallexample
+
+and attaches them as reduction methods for its dtq handle:
+
+@smallexample
+starpu_data_set_reduction_methods(dtq_handle,
+        &accumulate_variable_cl, &bzero_variable_cl);
+@end smallexample
+
+and dtq_handle can now be used in @code{STARPU_REDUX} mode for the dot products
+with partitioned vectors:
+
+@smallexample
+int dots(starpu_data_handle v1, starpu_data_handle v2,
+         starpu_data_handle s, unsigned nblocks)
+@{
+    starpu_insert_task(&bzero_variable_cl, STARPU_W, s, 0);
+    for (b = 0; b < nblocks; b++)
+        starpu_insert_task(&dot_kernel_cl,
+            STARPU_RW, s,
+            STARPU_R, starpu_data_get_sub_data(v1, 1, b),
+            STARPU_R, starpu_data_get_sub_data(v2, 1, b),
+            0);
+@}
+@end smallexample
+
+The @code{cg} example also uses reduction for the blocked gemv kernel, leading
+to yet more relaxed dependencies and more parallelism.
+
 @node Parallel Tasks
 @node Parallel Tasks
 @section Parallel Tasks
 @section Parallel Tasks
 
 

File diff suppressed because it is too large
+ 1 - 1
doc/chapters/basic-api.texi


+ 2 - 1
doc/chapters/c-extensions.texi

@@ -10,7 +10,8 @@
 When GCC plug-in support is available, StarPU builds a plug-in for the
 When GCC plug-in support is available, StarPU builds a plug-in for the
 GNU Compiler Collection (GCC), which defines extensions to languages of
 GNU Compiler Collection (GCC), which defines extensions to languages of
 the C family (C, C++, Objective-C) that make it easier to write StarPU
 the C family (C, C++, Objective-C) that make it easier to write StarPU
-code@footnote{This feature is only available for GCC 4.5 and later.  You
+code@footnote{This feature is only available for GCC 4.5 and later; it
+is known to work with GCC 4.5, 4.6, and 4.7.  You
 may need to install a specific @code{-dev} package of your distro, such
 may need to install a specific @code{-dev} package of your distro, such
 as @code{gcc-4.6-plugin-dev} on Debian and derivatives.  In addition,
 as @code{gcc-4.6-plugin-dev} on Debian and derivatives.  In addition,
 the plug-in's test suite is only run when
 the plug-in's test suite is only run when

+ 4 - 0
doc/chapters/perf-optimization.texi

@@ -75,6 +75,10 @@ mode, or because write accesses are actually commutative), use the
 @code{starpu_data_set_sequential_consistency_flag} function to disable implicit
 @code{starpu_data_set_sequential_consistency_flag} function to disable implicit
 dependencies on that data.
 dependencies on that data.
 
 
+In the same vein, accumulation of results in the same data can become a
+bottleneck. The use of the @code{STARPU_REDUX} mode permits to optimize such
+accumulation (@pxref{Data reduction}).
+
 @node Task granularity
 @node Task granularity
 @section Task granularity
 @section Task granularity
 
 

+ 1 - 1
examples/interface/complex_interface.c

@@ -261,7 +261,7 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node,
 	return 0;
 	return 0;
 }
 }
 #endif
 #endif
-static const struct starpu_data_copy_methods complex_copy_methods =
+static struct starpu_data_copy_methods complex_copy_methods =
 {
 {
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	.ram_to_cuda = copy_ram_to_cuda,
 	.ram_to_cuda = copy_ram_to_cuda,

+ 41 - 16
gcc-plugin/src/c-expr.y

@@ -1,5 +1,5 @@
 /* GCC-StarPU
 /* GCC-StarPU
-   Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
+   Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
 
 
    GCC-StarPU is free software: you can redistribute it and/or modify
    GCC-StarPU is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    it under the terms of the GNU General Public License as published by
@@ -95,28 +95,53 @@
   /* Mapping of libcpp token names to Bison-generated token names.  This is
   /* Mapping of libcpp token names to Bison-generated token names.  This is
      not ideal but Bison cannot be told to use the `enum cpp_ttype'
      not ideal but Bison cannot be told to use the `enum cpp_ttype'
      values.  */
      values.  */
+
+#define STARPU_CPP_TOKENS			\
+  TK (CPP_NAME)					\
+  TK (CPP_NUMBER)				\
+  TK (CPP_AND)					\
+  TK (CPP_OPEN_SQUARE)				\
+  TK (CPP_CLOSE_SQUARE)				\
+  TK (CPP_OPEN_PAREN)				\
+  TK (CPP_CLOSE_PAREN)				\
+  TK (CPP_PLUS)					\
+  TK (CPP_MINUS)				\
+  TK (CPP_MULT)					\
+  TK (CPP_DIV)					\
+  TK (CPP_DOT)					\
+  TK (CPP_DEREF)
+
+#ifndef __cplusplus
+
   static const int cpplib_bison_token_map[] =
   static const int cpplib_bison_token_map[] =
     {
     {
-      [CPP_NAME] = YCPP_NAME,
-      [CPP_NUMBER] = YCPP_NUM,
-      [CPP_AND] = YCPP_AND,
-      [CPP_OPEN_SQUARE] = YCPP_OPEN_SQUARE,
-      [CPP_CLOSE_SQUARE] = YCPP_CLOSE_SQUARE,
-      [CPP_OPEN_PAREN] = YCPP_OPEN_PAREN,
-      [CPP_CLOSE_PAREN] = YCPP_CLOSE_PAREN,
-      [CPP_PLUS] = YCPP_PLUS,
-      [CPP_MINUS] = YCPP_MINUS,
-      [CPP_MULT] = YCPP_MULT,
-      [CPP_DIV] = YCPP_DIV,
-      [CPP_DOT] = YCPP_DOT,
-      [CPP_DEREF] = YCPP_DEREF
+# define TK(x) [x] = Y ## x,
+      STARPU_CPP_TOKENS
+# undef TK
     };
     };
 
 
+#else /* __cplusplus */
+
+  /* No designated initializers in C++.  */
+  static int cpplib_bison_token_map[CPP_PADDING];
+
+#endif	/* __cplusplus */
+
   static int
   static int
   yylex (YYSTYPE *lvalp)
   yylex (YYSTYPE *lvalp)
   {
   {
     int ret;
     int ret;
 
 
+#ifdef __cplusplus
+    if (cpplib_bison_token_map[CPP_NAME] != YCPP_NAME)
+      {
+	/* Initialize the table.  */
+# define TK(x) cpplib_bison_token_map[x] = Y ## x;
+	STARPU_CPP_TOKENS
+# undef TK
+      }
+#endif
+
     ret = pragma_lex (lvalp);
     ret = pragma_lex (lvalp);
     if (ret < sizeof cpplib_bison_token_map / sizeof cpplib_bison_token_map[0])
     if (ret < sizeof cpplib_bison_token_map / sizeof cpplib_bison_token_map[0])
       ret = cpplib_bison_token_map[ret];
       ret = cpplib_bison_token_map[ret];
@@ -128,7 +153,7 @@
 }
 }
 
 
 %token YCPP_NAME "identifier"
 %token YCPP_NAME "identifier"
-%token YCPP_NUM "integer"
+%token YCPP_NUMBER "integer"
 %token YCPP_AND "&"
 %token YCPP_AND "&"
 %token YCPP_OPEN_SQUARE "["
 %token YCPP_OPEN_SQUARE "["
 %token YCPP_CLOSE_SQUARE "]"
 %token YCPP_CLOSE_SQUARE "]"
@@ -228,7 +253,7 @@ primary_expression: identifier
      | YCPP_OPEN_PAREN expression YCPP_CLOSE_PAREN { $$ = $2; }
      | YCPP_OPEN_PAREN expression YCPP_CLOSE_PAREN { $$ = $2; }
 ;
 ;
 
 
-constant: YCPP_NUM { $$ = $1; }
+constant: YCPP_NUMBER { $$ = $1; }
 ;
 ;
 
 
 %%
 %%

+ 2 - 0
gcc-plugin/src/starpu-gcc-config.h.in

@@ -28,6 +28,8 @@
 
 
 #undef HAVE_DECL_BUILTIN_DECL_EXPLICIT
 #undef HAVE_DECL_BUILTIN_DECL_EXPLICIT
 
 
+#undef HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY
+
 #undef HAVE_C_FAMILY_C_COMMON_H
 #undef HAVE_C_FAMILY_C_COMMON_H
 #undef HAVE_C_COMMON_H
 #undef HAVE_C_COMMON_H
 
 

+ 99 - 58
gcc-plugin/src/starpu.c

@@ -60,6 +60,41 @@
 #include <starpu.h>  /* for `STARPU_CPU' & co.  */
 #include <starpu.h>  /* for `STARPU_CPU' & co.  */
 
 
 
 
+/* GCC 4.7 requires compilation with `g++', and C++ lacks a number of GNU C
+   features, so work around that.  */
+
+#ifdef __cplusplus
+
+/* G++ doesn't implement nested functions, so use C++11 lambdas instead.  */
+
+# include <functional>
+
+# define local_define(ret, name, parms)     auto name = [=]parms
+# define function_parm(ret, name, parms)    std::function<ret parms> name
+
+/* G++ lacks designated initializers.  */
+# define designated_field_init(name, value) value /* XXX: cross fingers */
+
+#else  /* !__cplusplus */
+
+/* GNU C nested functions.  */
+
+# define local_define(ret, name, parms)	    ret name parms
+# define function_parm(ret, name, parms)    ret (*name) parms
+
+/* Designated field initializer.  */
+
+# define designated_field_init(name, value) .name = value
+
+#endif	/* !__cplusplus */
+
+
+/* C expression parser, possibly with C++ linkage.  */
+
+extern int yyparse (location_t, const char *, tree *);
+extern int yydebug;
+
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C" {
 extern "C" {
 #endif
 #endif
@@ -264,7 +299,7 @@ array_type_element_count (location_t loc, const_tree array_type)
 static tree
 static tree
 build_constructor_from_unsorted_list (tree type, tree vals)
 build_constructor_from_unsorted_list (tree type, tree vals)
 {
 {
-  int compare_elmt_bitpos (const void *rt1, const void *rt2)
+  local_define (int, compare_elmt_bitpos, (const void *rt1, const void *rt2))
   {
   {
     const constructor_elt *elmt1 = (constructor_elt *) rt1;
     const constructor_elt *elmt1 = (constructor_elt *) rt1;
     const constructor_elt *elmt2 = (constructor_elt *) rt2;
     const constructor_elt *elmt2 = (constructor_elt *) rt2;
@@ -274,7 +309,7 @@ build_constructor_from_unsorted_list (tree type, tree vals)
       = tree_int_cst_compare (bit_position (field1), bit_position (field2));
       = tree_int_cst_compare (bit_position (field1), bit_position (field2));
 
 
     return ret ? ret : (int) (DECL_UID (field1) - DECL_UID (field2));
     return ret ? ret : (int) (DECL_UID (field1) - DECL_UID (field2));
-  }
+  };
 
 
   tree t;
   tree t;
   VEC(constructor_elt,gc) *v = NULL;
   VEC(constructor_elt,gc) *v = NULL;
@@ -433,7 +468,7 @@ chain_trees (tree t, ...)
 }
 }
 
 
 static tree
 static tree
-filter (bool (*pred) (const_tree), tree t)
+filter (function_parm (bool, pred, (const_tree)), tree t)
 {
 {
   tree result, lst;
   tree result, lst;
 
 
@@ -451,12 +486,12 @@ filter (bool (*pred) (const_tree), tree t)
 }
 }
 
 
 static tree
 static tree
-list_remove (bool (*pred) (const_tree), tree t)
+list_remove (function_parm (bool, pred, (const_tree)), tree t)
 {
 {
-  bool opposite (const_tree t)
+  local_define (bool, opposite, (const_tree t))
   {
   {
     return !pred (t);
     return !pred (t);
-  }
+  };
 
 
   return filter (opposite, t);
   return filter (opposite, t);
 }
 }
@@ -465,7 +500,7 @@ list_remove (bool (*pred) (const_tree), tree t)
    chain of arbitrary tree objects.  */
    chain of arbitrary tree objects.  */
 
 
 static tree
 static tree
-map (tree (*func) (const_tree), tree t)
+map (function_parm (tree, func, (const_tree)), tree t)
 {
 {
   tree result, tail, lst;
   tree result, tail, lst;
 
 
@@ -485,7 +520,7 @@ map (tree (*func) (const_tree), tree t)
 }
 }
 
 
 static void
 static void
-for_each (void (*func) (tree), tree t)
+for_each (function_parm (void, func, (tree)), tree t)
 {
 {
   tree lst;
   tree lst;
 
 
@@ -496,7 +531,7 @@ for_each (void (*func) (tree), tree t)
 }
 }
 
 
 static size_t
 static size_t
-count (bool (*pred) (const_tree), const_tree t)
+count (function_parm (bool, pred, (const_tree)), const_tree t)
 {
 {
   size_t result;
   size_t result;
   const_tree lst;
   const_tree lst;
@@ -604,9 +639,6 @@ handle_pragma_wait (struct cpp_reader *reader)
 
 
 /* The minimal C expression parser.  */
 /* The minimal C expression parser.  */
 
 
-extern int yyparse (location_t, const char *, tree *);
-extern int yydebug;
-
 /* Parse expressions from the CPP reader for PRAGMA, which is located at LOC.
 /* Parse expressions from the CPP reader for PRAGMA, which is located at LOC.
    Return a TREE_LIST of C expressions.  */
    Return a TREE_LIST of C expressions.  */
 
 
@@ -1169,10 +1201,10 @@ handle_task_implementation_attribute (tree *node, tree name, tree args,
 		    where);
 		    where);
       else if (task_implementation_target_to_int (where) == STARPU_OPENCL)
       else if (task_implementation_target_to_int (where) == STARPU_OPENCL)
 	{
 	{
-	  void validate (tree t)
+	  local_define (void, validate, (tree t))
 	  {
 	  {
 	    validate_opencl_argument_type (loc, t);
 	    validate_opencl_argument_type (loc, t);
-	  }
+	  };
 
 
 	  for_each (validate, TYPE_ARG_TYPES (TREE_TYPE (fn)));
 	  for_each (validate, TYPE_ARG_TYPES (TREE_TYPE (fn)));
 	}
 	}
@@ -1494,27 +1526,35 @@ register_task_attributes (void *gcc_data, void *user_data)
     {
     {
       task_attribute_name, 0, 0, true, false, false,
       task_attribute_name, 0, 0, true, false, false,
       handle_task_attribute
       handle_task_attribute
+#ifdef HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY
+      , false
+#endif
     };
     };
 
 
   static const struct attribute_spec task_implementation_attr =
   static const struct attribute_spec task_implementation_attr =
     {
     {
       task_implementation_attribute_name, 2, 2, true, false, false,
       task_implementation_attribute_name, 2, 2, true, false, false,
       handle_task_implementation_attribute
       handle_task_implementation_attribute
+#ifdef HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY
+      , false
+#endif
     };
     };
 
 
   static const struct attribute_spec heap_allocated_attr =
   static const struct attribute_spec heap_allocated_attr =
     {
     {
       heap_allocated_attribute_name, 0, 0, true, false, false,
       heap_allocated_attribute_name, 0, 0, true, false, false,
       handle_heap_allocated_attribute
       handle_heap_allocated_attribute
+#ifdef HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY
+      , false
+#endif
     };
     };
 
 
   static const struct attribute_spec output_attr =
   static const struct attribute_spec output_attr =
     {
     {
       output_attribute_name, 0, 0, true, true, false,
       output_attribute_name, 0, 0, true, true, false,
-      handle_output_attribute,
-#if 0 /* FIXME: Check whether the `affects_type_identity' field is
-	 present.  */
-      true /* affects type identity */
+      handle_output_attribute
+#ifdef HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY
+      , true /* affects type identity */
 #endif
 #endif
     };
     };
 
 
@@ -1556,7 +1596,7 @@ build_codelet_wrapper_identifier (tree task_impl)
   id = DECL_NAME (task_impl);
   id = DECL_NAME (task_impl);
   task_name = IDENTIFIER_POINTER (id);
   task_name = IDENTIFIER_POINTER (id);
 
 
-  cl_name = alloca (IDENTIFIER_LENGTH (id) + strlen (suffix) + 1);
+  cl_name = (char *) alloca (IDENTIFIER_LENGTH (id) + strlen (suffix) + 1);
   memcpy (cl_name, task_name, IDENTIFIER_LENGTH (id));
   memcpy (cl_name, task_name, IDENTIFIER_LENGTH (id));
   strcpy (&cl_name[IDENTIFIER_LENGTH (id)], suffix);
   strcpy (&cl_name[IDENTIFIER_LENGTH (id)], suffix);
 
 
@@ -1571,12 +1611,16 @@ static tree
 build_codelet_wrapper_definition (tree task_impl)
 build_codelet_wrapper_definition (tree task_impl)
 {
 {
   location_t loc;
   location_t loc;
-  tree task_decl, decl;
+  tree task_decl, wrapper_name, decl;
 
 
   loc = DECL_SOURCE_LOCATION (task_impl);
   loc = DECL_SOURCE_LOCATION (task_impl);
   task_decl = task_implementation_task (task_impl);
   task_decl = task_implementation_task (task_impl);
 
 
-  tree build_local_var (const_tree type)
+  wrapper_name = build_codelet_wrapper_identifier (task_impl);
+  decl = build_decl (loc, FUNCTION_DECL, wrapper_name,
+		     build_codelet_wrapper_type ());
+
+  local_define (tree, build_local_var, (const_tree type))
   {
   {
     tree var, t;
     tree var, t;
     const char *seed;
     const char *seed;
@@ -1589,12 +1633,12 @@ build_codelet_wrapper_definition (tree task_impl)
     DECL_ARTIFICIAL (var) = true;
     DECL_ARTIFICIAL (var) = true;
 
 
     return var;
     return var;
-  }
+  };
 
 
   /* Return the body of the wrapper, which unpacks `cl_args' and calls the
   /* Return the body of the wrapper, which unpacks `cl_args' and calls the
      user-defined task implementation.  */
      user-defined task implementation.  */
 
 
-  tree build_body (tree wrapper_decl, tree vars)
+  local_define (tree, build_body, (tree wrapper_decl, tree vars))
   {
   {
     bool opencl_p;
     bool opencl_p;
     tree stmts = NULL, call, v;
     tree stmts = NULL, call, v;
@@ -1669,12 +1713,12 @@ build_codelet_wrapper_definition (tree task_impl)
     TREE_TYPE (bind) = TREE_TYPE (TREE_TYPE (wrapper_decl));
     TREE_TYPE (bind) = TREE_TYPE (TREE_TYPE (wrapper_decl));
 
 
     return bind;
     return bind;
-  }
+  };
 
 
   /* Return the parameter list of the wrapper:
   /* Return the parameter list of the wrapper:
      `(void **BUFFERS, void *CL_ARGS)'.  */
      `(void **BUFFERS, void *CL_ARGS)'.  */
 
 
-  tree build_parameters (tree wrapper_decl)
+  local_define (tree, build_parameters, (tree wrapper_decl))
   {
   {
     tree param1, param2;
     tree param1, param2;
 
 
@@ -1693,13 +1737,9 @@ build_codelet_wrapper_definition (tree task_impl)
     TREE_USED (param2) = true;
     TREE_USED (param2) = true;
 
 
     return chainon (param1, param2);
     return chainon (param1, param2);
-  }
-
-  tree wrapper_name, vars, result;
+  };
 
 
-  wrapper_name = build_codelet_wrapper_identifier (task_impl);
-  decl = build_decl (loc, FUNCTION_DECL, wrapper_name,
-		     build_codelet_wrapper_type ());
+  tree vars, result;
 
 
   vars = map (build_local_var,
   vars = map (build_local_var,
 	      list_remove (void_type_p,
 	      list_remove (void_type_p,
@@ -1749,7 +1789,7 @@ build_codelet_wrapper_definition (tree task_impl)
 static void
 static void
 define_codelet_wrappers (tree task)
 define_codelet_wrappers (tree task)
 {
 {
-  void define (tree task_impl)
+  local_define (void, define, (tree task_impl))
   {
   {
     tree wrapper_def;
     tree wrapper_def;
 
 
@@ -1759,7 +1799,7 @@ define_codelet_wrappers (tree task)
       tree_cons (get_identifier (task_implementation_wrapper_attribute_name),
       tree_cons (get_identifier (task_implementation_wrapper_attribute_name),
 		 wrapper_def,
 		 wrapper_def,
 		 DECL_ATTRIBUTES (task_impl));
 		 DECL_ATTRIBUTES (task_impl));
-  }
+  };
 
 
   for_each (define, task_implementation_list (task));
   for_each (define, task_implementation_list (task));
 }
 }
@@ -1779,7 +1819,7 @@ build_codelet_identifier (tree task_decl)
   id = DECL_NAME (task_decl);
   id = DECL_NAME (task_decl);
   task_name = IDENTIFIER_POINTER (id);
   task_name = IDENTIFIER_POINTER (id);
 
 
-  cl_name = alloca (IDENTIFIER_LENGTH (id) + strlen (suffix) + 1);
+  cl_name = (char *) alloca (IDENTIFIER_LENGTH (id) + strlen (suffix) + 1);
   memcpy (cl_name, task_name, IDENTIFIER_LENGTH (id));
   memcpy (cl_name, task_name, IDENTIFIER_LENGTH (id));
   strcpy (&cl_name[IDENTIFIER_LENGTH (id)], suffix);
   strcpy (&cl_name[IDENTIFIER_LENGTH (id)], suffix);
 
 
@@ -1840,7 +1880,7 @@ build_codelet_initializer (tree task_decl)
   fields = TYPE_FIELDS (codelet_type ());
   fields = TYPE_FIELDS (codelet_type ());
   gcc_assert (TREE_CODE (fields) == FIELD_DECL);
   gcc_assert (TREE_CODE (fields) == FIELD_DECL);
 
 
-  tree lookup_field (const char *name)
+  local_define (tree, lookup_field, (const char *name))
   {
   {
     tree fdecl, fname;
     tree fdecl, fname;
 
 
@@ -1855,9 +1895,9 @@ build_codelet_initializer (tree task_decl)
 
 
     /* Field NAME wasn't found.  */
     /* Field NAME wasn't found.  */
     gcc_assert (false);
     gcc_assert (false);
-  }
+  };
 
 
-  tree field_initializer (const char *name, tree value)
+  local_define (tree, field_initializer, (const char *name, tree value))
   {
   {
     tree field, init;
     tree field, init;
 
 
@@ -1872,15 +1912,15 @@ build_codelet_initializer (tree task_decl)
       TREE_VALUE (init) = value;
       TREE_VALUE (init) = value;
 
 
     return init;
     return init;
-  }
+  };
 
 
-  tree codelet_name ()
+  local_define (tree, codelet_name, ())
   {
   {
     const char *name = IDENTIFIER_POINTER (DECL_NAME (task_decl));
     const char *name = IDENTIFIER_POINTER (DECL_NAME (task_decl));
     return build_string_literal (strlen (name) + 1, name);
     return build_string_literal (strlen (name) + 1, name);
-  }
+  };
 
 
-  tree where_init (tree impls)
+  local_define (tree, where_init, (tree impls))
   {
   {
     tree impl;
     tree impl;
     int where_int = 0;
     int where_int = 0;
@@ -1903,9 +1943,9 @@ build_codelet_initializer (tree task_decl)
       }
       }
 
 
     return build_int_cstu (integer_type_node, where_int);
     return build_int_cstu (integer_type_node, where_int);
-  }
+  };
 
 
-  tree implementation_pointers (tree impls, int where)
+  local_define (tree, implementation_pointers, (tree impls, int where))
   {
   {
     size_t len;
     size_t len;
     tree impl, pointers;
     tree impl, pointers;
@@ -1943,17 +1983,17 @@ build_codelet_initializer (tree task_decl)
     return build_constructor_from_list (build_array_type (ptr_type_node,
     return build_constructor_from_list (build_array_type (ptr_type_node,
 							  index_type),
 							  index_type),
 					nreverse (pointers));
 					nreverse (pointers));
-  }
+  };
 
 
-  tree pointer_arg_count (void)
+  local_define (tree, pointer_arg_count, (void))
   {
   {
     size_t len;
     size_t len;
 
 
     len = list_length (task_pointer_parameter_types (task_decl));
     len = list_length (task_pointer_parameter_types (task_decl));
     return build_int_cstu (integer_type_node, len);
     return build_int_cstu (integer_type_node, len);
-  }
+  };
 
 
-  tree access_mode_array (void)
+  local_define (tree, access_mode_array, (void))
   {
   {
     const_tree type;
     const_tree type;
     tree modes;
     tree modes;
@@ -1975,7 +2015,7 @@ build_codelet_initializer (tree task_decl)
     return build_constructor_from_list (build_array_type (integer_type_node,
     return build_constructor_from_list (build_array_type (integer_type_node,
 							  index_type),
 							  index_type),
 					nreverse (modes));
 					nreverse (modes));
-  }
+  };
 
 
   if (verbose_output_p)
   if (verbose_output_p)
     inform (DECL_SOURCE_LOCATION (task_decl),
     inform (DECL_SOURCE_LOCATION (task_decl),
@@ -2044,7 +2084,7 @@ handle_pre_genericize (void *gcc_data, void *user_data)
 	  /* TASK lacks a body.  Declare its codelet, intantiate its codelet
 	  /* TASK lacks a body.  Declare its codelet, intantiate its codelet
 	     wrappers, and its body in this compilation unit.  */
 	     wrappers, and its body in this compilation unit.  */
 
 
-	  tree build_parameter (const_tree lst)
+	  local_define (tree, build_parameter, (const_tree lst))
 	  {
 	  {
 	    tree param, type;
 	    tree param, type;
 
 
@@ -2056,7 +2096,7 @@ handle_pre_genericize (void *gcc_data, void *user_data)
 	    DECL_CONTEXT (param) = task;
 	    DECL_CONTEXT (param) = task;
 
 
 	    return param;
 	    return param;
-	  }
+	  };
 
 
 	  /* Declare TASK's codelet.  It cannot be defined yet because the
 	  /* Declare TASK's codelet.  It cannot be defined yet because the
 	     complete list of tasks isn't available at this point.  */
 	     complete list of tasks isn't available at this point.  */
@@ -2380,9 +2420,10 @@ lower_starpu (void)
 
 
 static struct opt_pass pass_lower_starpu =
 static struct opt_pass pass_lower_starpu =
   {
   {
-    .type = GIMPLE_PASS,
-    .name = "pass_lower_starpu",
-    .execute = lower_starpu,
+    designated_field_init (type, GIMPLE_PASS),
+    designated_field_init (name, "pass_lower_starpu"),
+    designated_field_init (gate, NULL),
+    designated_field_init (execute, lower_starpu)
 
 
     /* The rest is zeroed.  */
     /* The rest is zeroed.  */
   };
   };
@@ -2432,10 +2473,10 @@ plugin_init (struct plugin_name_args *plugin_info,
 
 
   struct register_pass_info pass_info =
   struct register_pass_info pass_info =
     {
     {
-      .pass = &pass_lower_starpu,
-      .reference_pass_name = "*build_cgraph_edges",
-      .ref_pass_instance_number = 1,
-      .pos_op = PASS_POS_INSERT_AFTER
+      designated_field_init (pass, &pass_lower_starpu),
+      designated_field_init (reference_pass_name, "*build_cgraph_edges"),
+      designated_field_init (ref_pass_instance_number, 1),
+      designated_field_init (pos_op, PASS_POS_INSERT_AFTER)
     };
     };
 
 
   register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP,
   register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP,
@@ -2443,7 +2484,7 @@ plugin_init (struct plugin_name_args *plugin_info,
 
 
   include_dir = getenv ("STARPU_GCC_INCLUDE_DIR");
   include_dir = getenv ("STARPU_GCC_INCLUDE_DIR");
 
 
-  size_t arg;
+  int arg;
   for (arg = 0; arg < plugin_info->argc; arg++)
   for (arg = 0; arg < plugin_info->argc; arg++)
     {
     {
       if (strcmp (plugin_info->argv[arg].key, "include-dir") == 0)
       if (strcmp (plugin_info->argv[arg].key, "include-dir") == 0)

+ 1 - 1
include/starpu_config.h.in

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify

+ 1 - 1
include/starpu_data_interfaces.h

@@ -110,7 +110,7 @@ struct starpu_data_interface_ops
 	/* Free data of the interface on a given node. */
 	/* Free data of the interface on a given node. */
 	void (*free_data_on_node)(void *data_interface, uint32_t node);
 	void (*free_data_on_node)(void *data_interface, uint32_t node);
 	/* ram/cuda/spu/opencl synchronous and asynchronous transfer methods */
 	/* ram/cuda/spu/opencl synchronous and asynchronous transfer methods */
-	const struct starpu_data_copy_methods *copy_methods;
+	struct starpu_data_copy_methods *copy_methods;
 	/* Return the current pointer (if any) for the handle on the given node. */
 	/* Return the current pointer (if any) for the handle on the given node. */
 	void * (*handle_to_pointer)(starpu_data_handle_t handle, uint32_t node);
 	void * (*handle_to_pointer)(starpu_data_handle_t handle, uint32_t node);
 	/* Return an estimation of the size of data, for performance models */
 	/* Return an estimation of the size of data, for performance models */

+ 1 - 1
include/starpu_deprecated_api.h

@@ -23,7 +23,7 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
-#warning Your application is still using deprecated types. Please update to use the latest API, e.g. using tools/dev/rename.sh
+#warning "Your application is using former types. You may want to update to use the latest API, by using tools/dev/rename.sh"
 
 
 typedef starpu_data_handle_t starpu_data_handle;
 typedef starpu_data_handle_t starpu_data_handle;
 typedef struct starpu_block_interface starpu_block_interface_t;
 typedef struct starpu_block_interface starpu_block_interface_t;

+ 4 - 1
include/starpu_util.h

@@ -49,7 +49,10 @@ extern "C"
 #  endif
 #  endif
 #endif
 #endif
 
 
-#define STARPU_ABORT()		assert(0)
+#define STARPU_ABORT() do {                                          \
+	fprintf(stderr, "%s:%d %s\n", __FILE__, __LINE__, __func__); \
+	abort();                                                     \
+} while(0)
 
 
 #if defined(STARPU_HAVE_STRERROR_R)
 #if defined(STARPU_HAVE_STRERROR_R)
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) { \
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) { \

+ 12 - 0
m4/gcc.m4

@@ -149,6 +149,7 @@ AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
     dnl   build_array_ref           -- present but undeclared in 4.6.1
     dnl   build_array_ref           -- present but undeclared in 4.6.1
     dnl   build_zero_cst            -- not in GCC 4.5.x; appears in 4.6
     dnl   build_zero_cst            -- not in GCC 4.5.x; appears in 4.6
     dnl   builtin_decl_explicit     -- new in 4.7, replaces `built_in_decls'
     dnl   builtin_decl_explicit     -- new in 4.7, replaces `built_in_decls'
+    dnl   .affects_type_identity    -- new field in 4.7
     _STARPU_WITH_GCC_PLUGIN_API([
     _STARPU_WITH_GCC_PLUGIN_API([
       AC_CHECK_DECLS([build_call_expr_loc_array, build_call_expr_loc_vec,
       AC_CHECK_DECLS([build_call_expr_loc_array, build_call_expr_loc_vec,
                       build_array_ref, build_zero_cst,
                       build_array_ref, build_zero_cst,
@@ -161,6 +162,14 @@ AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
       AC_CHECK_HEADERS([c-common.h c-pragma.h c-family/c-common.h c-family/c-pragma.h],
       AC_CHECK_HEADERS([c-common.h c-pragma.h c-family/c-common.h c-family/c-pragma.h],
         [], [], [#include <gcc-plugin.h>
         [], [], [#include <gcc-plugin.h>
 	         #include <tree.h>])
 	         #include <tree.h>])
+
+      AC_CHECK_MEMBER([struct attribute_spec.affects_type_identity],
+        [AC_DEFINE([HAVE_ATTRIBUTE_SPEC_AFFECTS_TYPE_IDENTITY], [1],
+	  [Define to 1 when `struct attribute_spec' has the `affects_type_identity' field.])],
+	[],
+	[#include <gcc-plugin.h>
+	 #include <tree.h>])
+
     ])
     ])
 
 
 
 
@@ -175,6 +184,9 @@ AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
     dnl Determine the corresponding Libtool tag.
     dnl Determine the corresponding Libtool tag.
     if test "$GCC_FOR_PLUGIN" = "$CXX"; then
     if test "$GCC_FOR_PLUGIN" = "$CXX"; then
       GCC_FOR_PLUGIN_LIBTOOL_TAG="CXX"
       GCC_FOR_PLUGIN_LIBTOOL_TAG="CXX"
+
+      # Require C++11, for lambdas and `auto'.
+      GCC_FOR_PLUGIN="$GCC_FOR_PLUGIN -std=c++11"
     else
     else
       GCC_FOR_PLUGIN_LIBTOOL_TAG="CC"
       GCC_FOR_PLUGIN_LIBTOOL_TAG="CC"
     fi
     fi

+ 2 - 0
src/core/workers.c

@@ -384,6 +384,8 @@ int starpu_conf_init(struct starpu_conf *conf)
 	     conf->single_combined_worker = 0;
 	     conf->single_combined_worker = 0;
 
 
 	conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
 	conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
+	if (conf->disable_asynchronous_copy == -1)
+		conf->disable_asynchronous_copy = 0;
 
 
 	return 0;
 	return 0;
 }
 }

+ 2 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods bcsr_copy_data_methods_s =
+static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 1 - 1
src/datawizard/interfaces/block_interface.c

@@ -42,7 +42,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods block_copy_data_methods_s =
+static struct starpu_data_copy_methods block_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 2 - 2
src/datawizard/interfaces/csr_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
@@ -42,7 +42,7 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods csr_copy_data_methods_s =
+static struct starpu_data_copy_methods csr_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 6 - 12
src/datawizard/interfaces/data_interface.c

@@ -292,20 +292,14 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 	if (STARPU_UNLIKELY(asynchronous_copy_disabled))
 	if (STARPU_UNLIKELY(asynchronous_copy_disabled))
 	{
 	{
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-	     if (ops->copy_methods->ram_to_cuda_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->ram_to_cuda_async = NULL;
-	     if (ops->copy_methods->cuda_to_ram_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->cuda_to_ram_async = NULL;
-	     if (ops->copy_methods->cuda_to_cuda_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->cuda_to_cuda_async = NULL;
+		ops->copy_methods->ram_to_cuda_async = NULL;
+		ops->copy_methods->cuda_to_ram_async = NULL;
+		ops->copy_methods->cuda_to_cuda_async = NULL;
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
-	     if (ops->copy_methods->ram_to_opencl_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->ram_to_opencl_async = NULL;
-	     if (ops->copy_methods->opencl_to_ram_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->opencl_to_ram_async = NULL;
-	     if (ops->copy_methods->opencl_to_opencl_async)
-		  ((struct starpu_data_copy_methods *)ops->copy_methods)->opencl_to_opencl_async = NULL;
+		ops->copy_methods->ram_to_opencl_async = NULL;
+		ops->copy_methods->opencl_to_ram_async = NULL;
+		ops->copy_methods->opencl_to_opencl_async = NULL;
 #endif
 #endif
 	}
 	}
 
 

+ 1 - 1
src/datawizard/interfaces/matrix_interface.c

@@ -46,7 +46,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
+static struct starpu_data_copy_methods matrix_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 1 - 1
src/datawizard/interfaces/multiformat_interface.c

@@ -41,7 +41,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
+static struct starpu_data_copy_methods multiformat_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 1 - 1
src/datawizard/interfaces/variable_interface.c

@@ -42,7 +42,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods variable_copy_data_methods_s =
+static struct starpu_data_copy_methods variable_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 1 - 1
src/datawizard/interfaces/vector_interface.c

@@ -42,7 +42,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods vector_copy_data_methods_s =
+static struct starpu_data_copy_methods vector_copy_data_methods_s =
 {
 {
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 	.ram_to_spu = NULL,

+ 2 - 2
src/datawizard/interfaces/void_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
  * Copyright (C) 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -33,7 +33,7 @@ static int dummy_cuda_copy_async(void *src_interface, unsigned src_node, void *d
 static int dummy_opencl_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 static int dummy_opencl_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 #endif
 
 
-static const struct starpu_data_copy_methods void_copy_data_methods_s =
+static struct starpu_data_copy_methods void_copy_data_methods_s =
 {
 {
 	.ram_to_ram = dummy_copy,
 	.ram_to_ram = dummy_copy,
 	.ram_to_spu = dummy_copy,
 	.ram_to_spu = dummy_copy,

+ 28 - 15
src/sched_policies/eager_central_priority_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  INRIA
  * Copyright (C) 2011  INRIA
  *
  *
@@ -161,7 +161,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
 
 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
 
 
-	starpu_task_list_push_front(&taskq->taskq[priolevel], task);
+	starpu_task_list_push_back(&taskq->taskq[priolevel], task);
 	taskq->ntasks[priolevel]++;
 	taskq->ntasks[priolevel]++;
 	taskq->total_ntasks++;
 	taskq->total_ntasks++;
 
 
@@ -174,20 +174,19 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
 
 static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 {
 {
-	/* XXX FIXME: should call starpu_worker_can_execute_task!! */
-	struct starpu_task *task = NULL;
+		struct starpu_task *chosen_task = NULL, *task;
+	unsigned workerid = starpu_worker_get_id();
+	int skipped = 0;
 
 
 	eager_central_prio_data *data = (eager_central_prio_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
 	eager_central_prio_data *data = (eager_central_prio_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
 	
 	
 	struct _starpu_priority_taskq *taskq = data->taskq;
 	struct _starpu_priority_taskq *taskq = data->taskq;
 
 
 	/* block until some event happens */
 	/* block until some event happens */
-	_STARPU_PTHREAD_MUTEX_LOCK(&data->sched_mutex);
 
 
 	if ((taskq->total_ntasks == 0) && _starpu_machine_is_running())
 	if ((taskq->total_ntasks == 0) && _starpu_machine_is_running())
 	{
 	{
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->sched_mutex);
 		return NULL;
 		return NULL;
 #else
 #else
 		_STARPU_PTHREAD_COND_WAIT(&data->sched_cond, &data->sched_mutex);
 		_STARPU_PTHREAD_COND_WAIT(&data->sched_cond, &data->sched_mutex);
@@ -201,20 +200,34 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 		{
 		{
 			if (taskq->ntasks[priolevel] > 0)
 			if (taskq->ntasks[priolevel] > 0)
 			{
 			{
-				/* there is some task that we can grab */
-				task = starpu_task_list_pop_back(&taskq->taskq[priolevel]);
-				taskq->ntasks[priolevel]--;
-				taskq->total_ntasks--;
-				_STARPU_TRACE_JOB_POP(task, 0);
+				for (task  = starpu_task_list_begin(&taskq->taskq[priolevel]);
+				     task != starpu_task_list_end(&taskq->taskq[priolevel]);
+				     task  = starpu_task_list_next(task)) {
+					unsigned nimpl;
+					for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+					{
+						if (starpu_worker_can_execute_task(workerid, task, nimpl))
+						{
+							/* there is some task that we can grab */
+							_starpu_get_job_associated_to_task(task)->nimpl = nimpl;
+							starpu_task_list_erase(&taskq->taskq[priolevel], task);
+							chosen_task = task;
+							taskq->ntasks[priolevel]--;
+							taskq->total_ntasks--;
+							_STARPU_TRACE_JOB_POP(task, 0);
+						} else skipped = 1;
+					}
+				}
 			}
 			}
 		}
 		}
-		while (!task && priolevel-- > 0);
+		while (!chosen_task && priolevel-- > 0);
 	}
 	}
-	STARPU_ASSERT_MSG(starpu_worker_can_execute_task(starpu_worker_get_id(), task, 0), "prio does not support \"can_execute\"");
 
 
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->sched_mutex);
+	if (!chosen_task && skipped)
+		/* Notify another worker to do that task */
+		_STARPU_PTHREAD_COND_SIGNAL(&data->sched_mutex);
 
 
-	return task;
+	return chosen_task;
 }
 }
 
 
 struct starpu_sched_policy _starpu_sched_prio_policy =
 struct starpu_sched_policy _starpu_sched_prio_policy =

+ 21 - 1
src/sched_policies/parallel_heft.c

@@ -43,6 +43,9 @@ typedef struct {
 	double beta;
 	double beta;
 	double _gamma;
 	double _gamma;
 	double idle_power;
 	double idle_power;
+* When we push a task on a combined worker we need all the cpu workers it contains
+ * to be locked at once */
+	pthread_mutex_t global_push_mutex;
 } pheft_data;
 } pheft_data;
 
 
 static double worker_exp_start[STARPU_NMAXWORKERS];
 static double worker_exp_start[STARPU_NMAXWORKERS];
@@ -86,6 +89,8 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 {
 {
 	/* make sure someone coule execute that task ! */
 	/* make sure someone coule execute that task ! */
 	STARPU_ASSERT(best_workerid != -1);
 	STARPU_ASSERT(best_workerid != -1);
+	
+	pheft_data *hd = (pheft_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
 
 
 	/* Is this a basic worker or a combined worker ? */
 	/* Is this a basic worker or a combined worker ? */
 	int nbasic_workers = starpu_get_nworkers_of_sched_ctx(sched_ctx_id);
 	int nbasic_workers = starpu_get_nworkers_of_sched_ctx(sched_ctx_id);
@@ -116,7 +121,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		ntasks[best_workerid]++;
 		ntasks[best_workerid]++;
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 
 
+		/* We don't want it to interlace its task with a combined
+		 * worker's one */
+		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
+
 		ret = starpu_push_local_task(best_workerid, task, prio);
 		ret = starpu_push_local_task(best_workerid, task, prio);
+
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -140,6 +151,9 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
 		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
 		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
 		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
 
 
+		/* All cpu workers must be locked at once */
+		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
+
 		int i;
 		int i;
 		for (i = 0; i < worker_size; i++)
 		for (i = 0; i < worker_size; i++)
 		{
 		{
@@ -163,6 +177,10 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 			ret |= starpu_push_local_task(local_worker, alias, prio);
 			ret |= starpu_push_local_task(local_worker, alias, prio);
 		}
 		}
 
 
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex);
+
+		//TODO : free task
+
 	}
 	}
 
 
 	return ret;
 	return ret;
@@ -555,7 +573,8 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 	const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
 	const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
 	if (strval_idle_power)
 	if (strval_idle_power)
 		hd->idle_power = atof(strval_idle_power);
 		hd->idle_power = atof(strval_idle_power);
-
+	
+	_STARPU_PTHREAD_MUTEX_INIT(&hd->global_push_mutex, NULL);
 
 
 }
 }
 
 
@@ -563,6 +582,7 @@ static void parallel_heft_deinit(unsigned sched_ctx_id)
 {
 {
 	pheft_data *hd = (pheft_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
 	pheft_data *hd = (pheft_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
 	starpu_delete_worker_collection_for_sched_ctx(sched_ctx_id);
 	starpu_delete_worker_collection_for_sched_ctx(sched_ctx_id);
+	_STARPU_PTHREAD_MUTEX_DESTROY(&hd->global_push_mutex);
 	free(hd);
 	free(hd);
 }
 }