|
@@ -17,6 +17,7 @@
|
|
|
* Performance model example::
|
|
|
* Theoretical lower bound on execution time::
|
|
|
* Insert Task Utility::
|
|
|
+* The multiformat interface::
|
|
|
* More examples:: More examples shipped with StarPU
|
|
|
* Debugging:: When things go wrong.
|
|
|
@end menu
|
|
@@ -522,9 +523,91 @@ gdb helpers are also provided to show the whole StarPU state:
|
|
|
(gdb) help starpu
|
|
|
@end smallexample
|
|
|
|
|
|
+@node The multiformat interface
|
|
|
+@section The multiformat interface
|
|
|
+It may be interesting to represent the same piece of data using two different
|
|
|
+data structures : one that would only be used on CPUs, and one that would only
|
|
|
+be used on GPUs. This can be done by using the multiformat interface. StarPU
|
|
|
+will be able to convert data from one data structure to the other when needed.
|
|
|
+Note that the heft scheduler is the only one optimized for this interface. The
|
|
|
+user must provide StarPU with conversion codelets :
|
|
|
+
|
|
|
+@example
|
|
|
+#define NX 1024
|
|
|
+struct point array_of_structs[NX];
|
|
|
+starpu_data_handle_t handle;
|
|
|
+
|
|
|
+/*
|
|
|
+ * The conversion of a piece of data is itself a task, though it is created,
|
|
|
+ * submitted and destroyed by StarPU internals and not by the user. Therefore,
|
|
|
+ * we have to define two codelets.
|
|
|
+ * Note that for now the conversion from the CPU format to the GPU format has to
|
|
|
+ * be executed on the GPU, and the conversion from the GPU to the CPU has to be
|
|
|
+ * executed on the CPU.
|
|
|
+ */
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+void cpu_to_opencl_opencl_func(void *buffers[], void *args);
|
|
|
+struct starpu_codelet cpu_to_opencl_cl = @{
|
|
|
+ .where = STARPU_OPENCL,
|
|
|
+ .opencl_funcs = @{ cpu_to_opencl_opencl_func, NULL @},
|
|
|
+ .nbuffers = 1
|
|
|
+@};
|
|
|
+
|
|
|
+void opencl_to_cpu_func(void *buffers[], void *args);
|
|
|
+struct starpu_codelet opencl_to_cpu_cl = @{
|
|
|
+ .where = STARPU_CPU,
|
|
|
+ .cpu_funcs = @{ opencl_to_cpu_func, NULL @},
|
|
|
+ .nbuffers = 1
|
|
|
+@};
|
|
|
+#endif
|
|
|
+
|
|
|
+struct starpu_multiformat_data_interface_ops format_ops = @{
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+ .opencl_elemsize = 2 * sizeof(float),
|
|
|
+ .cpu_to_opencl_cl = &cpu_to_opencl_cl,
|
|
|
+ .opencl_to_cpu_cl = &opencl_to_cpu_cl,
|
|
|
+#endif
|
|
|
+ .cpu_elemsize = 2 * sizeof(float),
|
|
|
+ ...
|
|
|
+@};
|
|
|
+starpu_multiformat_data_register(handle, 0, &array_of_structs, NX, &format_ops);
|
|
|
+@end example
|
|
|
+
|
|
|
+Kernels can be written almost as for any other interface. Note that
|
|
|
+STARPU_MULTIFORMAT_GET_PTR shall only be used for CPU kernels. CUDA kernels
|
|
|
+must use STARPU_MULTIFORMAT_GET_CUDA_PTR, and OpenCL kernels must use
|
|
|
+STARPU_MULTIFORMAT_GET_OPENCL_PTR. STARPU_MULTIFORMAT_GET_NX may be used in any
|
|
|
+kind of kernel.
|
|
|
+@example
|
|
|
+static void
|
|
|
+multiformat_scal_cpu_func(void *buffers[], void *args)
|
|
|
+@{
|
|
|
+ struct point *aos;
|
|
|
+ unsigned int n;
|
|
|
+
|
|
|
+ aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
|
|
|
+ n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
|
|
|
+ ...
|
|
|
+@}
|
|
|
+
|
|
|
+extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
|
|
|
+@{
|
|
|
+ unsigned int n;
|
|
|
+ struct struct_of_arrays *soa;
|
|
|
+
|
|
|
+ soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
|
|
|
+ n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
|
|
|
+
|
|
|
+ ...
|
|
|
+@}
|
|
|
+@end example
|
|
|
+
|
|
|
+A full example may be found in @code{examples/basic_examples/multiformat.c}.
|
|
|
+
|
|
|
@node More examples
|
|
|
@section More examples
|
|
|
|
|
|
+
|
|
|
More examples are available in the StarPU sources in the @code{examples/}
|
|
|
directory. Simple examples include:
|
|
|
|