Przeglądaj źródła

gcc: Support multiple implementations per target.

* gcc-plugin/src/starpu.c
  (build_codelet_initializer)[field_initializer]: Don't convert arrays.
  [implementation_pointer]: Rename to...
  [implementation_pointers]: ... this.  Always traverse all of IMPLS,
  and return an array initializer.
  Initialize both the plural `_funcs' fields and their singular
  counterparts.

* gcc-plugin/tests/base.c (my_other_task_cpu_bis): New function.
  (main): Remove outdated comment.

* gcc-plugin/tests/mocks.h (starpu_insert_task): Check both the plural
  and singular function pointers.

* doc/chapters/c-extensions.texi (Defining Tasks): Mention
  multi-implementation support.
Ludovic Courtès 13 lat temu
rodzic
commit
07565d6ee6

+ 3 - 0
doc/chapters/c-extensions.texi

@@ -40,6 +40,9 @@ The StarPU GCC plug-in views @dfn{tasks} as ``extended'' C functions:
 tasks may have several implementations---e.g., one for CPUs, one written
 in OpenCL, one written in CUDA;
 @item
+tasks may have several implementations of the same target---e.g.,
+several CPU implementations;
+@item
 when a task is invoked, it may run in parallel, and StarPU is free to
 choose any of its implementations.
 @end enumerate

+ 35 - 15
gcc-plugin/src/starpu.c

@@ -1363,9 +1363,13 @@ build_codelet_initializer (tree task_decl)
     field = lookup_field (name);
     init = make_node (TREE_LIST);
     TREE_PURPOSE (init) = field;
-    TREE_VALUE (init) = fold_convert (TREE_TYPE (field), value);
     TREE_CHAIN (init) = NULL_TREE;
 
+    if (TREE_CODE (TREE_TYPE (value)) != ARRAY_TYPE)
+      TREE_VALUE (init) = fold_convert (TREE_TYPE (field), value);
+    else
+      TREE_VALUE (init) = value;
+
     return init;
   }
 
@@ -1391,11 +1395,12 @@ build_codelet_initializer (tree task_decl)
     return build_int_cstu (integer_type_node, where_int);
   }
 
-  tree implementation_pointer (tree impls, int where)
+  tree implementation_pointers (tree impls, int where)
   {
-    tree impl;
+    size_t len;
+    tree impl, pointers;
 
-    for (impl = impls;
+    for (impl = impls, pointers = NULL_TREE, len = 0;
 	 impl != NULL_TREE;
 	 impl = TREE_CHAIN (impl))
       {
@@ -1407,12 +1412,17 @@ build_codelet_initializer (tree task_decl)
 	    /* Return a pointer to the wrapper of IMPL_DECL.  */
 	    tree addr = build_addr (task_implementation_wrapper (impl_decl),
 				    NULL_TREE);
-	    return addr;
+	    pointers = tree_cons (size_int (len), addr, pointers);
+	    len++;
 	  }
       }
 
-    /* Default to a NULL pointer.  */
-    return build_int_cstu (build_pointer_type (void_type_node), 0);
+    /* Return a (potentially empty) array initializer.  */
+    tree index_type = build_index_type (size_int (list_length (pointers)));
+
+    return build_constructor_from_list (build_array_type (ptr_type_node,
+							  index_type),
+					nreverse (pointers));
   }
 
   tree pointer_arg_count (void)
@@ -1430,19 +1440,29 @@ build_codelet_initializer (tree task_decl)
 
   impls = task_implementation_list (task_decl);
 
+#define multiple_impls(x)						\
+  build_int_cstu (ptr_type_node,					\
+		  (uintptr_t) STARPU_MULTIPLE_ ## x ## _IMPLEMENTATIONS)
+
   inits =
     chain_trees (field_initializer ("where", where_init (impls)),
 		 field_initializer ("nbuffers", pointer_arg_count ()),
-		 field_initializer ("cpu_func",
-				    implementation_pointer (impls, STARPU_CPU)),
-		 field_initializer ("opencl_func",
-		 		    implementation_pointer (impls,
-		 					    STARPU_OPENCL)),
-		 field_initializer ("cuda_func",
-		 		    implementation_pointer (impls,
-		 					    STARPU_CUDA)),
+		 field_initializer ("cpu_funcs",
+				    implementation_pointers (impls,
+							     STARPU_CPU)),
+		 field_initializer ("opencl_funcs",
+		 		    implementation_pointers (impls,
+							     STARPU_OPENCL)),
+		 field_initializer ("cuda_funcs",
+		 		    implementation_pointers (impls,
+							     STARPU_CUDA)),
+		 field_initializer ("cpu_func", multiple_impls (CPU)),
+		 field_initializer ("cuda_func", multiple_impls (CUDA)),
+		 field_initializer ("opencl_func", multiple_impls (OPENCL)),
 		 NULL_TREE);
 
+#undef multiple_impls
+
   return build_constructor_from_unsorted_list (codelet_type (), inits);
 }
 

+ 8 - 3
gcc-plugin/tests/base.c

@@ -48,6 +48,8 @@ static void my_other_task (int x) __attribute__ ((task));
 
 static void my_other_task_cpu (int)
   __attribute__ ((task_implementation ("cpu", my_other_task)));
+static void my_other_task_cpu_bis (int)
+  __attribute__ ((task_implementation ("cpu", my_other_task)));
 
 static void
 my_other_task_cpu (int x)
@@ -55,6 +57,12 @@ my_other_task_cpu (int x)
   printf ("cpu\n");
 }
 
+static void
+my_other_task_cpu_bis (int x)
+{
+  printf ("second cpu implementation\n");
+}
+
 static void my_other_task_opencl (int)
   __attribute__ ((task_implementation ("opencl", my_other_task)));
 
@@ -104,9 +112,6 @@ main (int argc, char *argv[])
 
   my_scalar_task (42, 77, 99);
 
-  /* FIXME: Currently this one only works on little endian since we take the
-     address of `y_as_long_int' directly.  Instead a new `char' variable
-     should be introduced, and we should take its address.  */
   my_scalar_task (42, y_as_long_int, 99);
 
   struct insert_task_argument expected2[] =

+ 7 - 3
gcc-plugin/tests/mocks.h

@@ -61,9 +61,13 @@ starpu_insert_task (struct starpu_codelet *cl, ...)
   /* TODO: Call `cpu_func' & co. and check whether they do the right
      thing.  */
 
-  assert (cl->cpu_func != NULL);
-  assert (cl->opencl_func != NULL);
-  assert (cl->cuda_func == NULL);
+  assert (cl->cpu_funcs[0] != NULL);
+  assert (cl->opencl_funcs[0] != NULL);
+  assert (cl->cuda_funcs[0] == NULL);
+
+  assert (cl->cpu_func == STARPU_MULTIPLE_CPU_IMPLEMENTATIONS);
+  assert (cl->opencl_func == STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS);
+  assert (cl->cuda_func == STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS);
 
   va_list args;