|
@@ -0,0 +1,88 @@
|
|
|
+@c -*-texinfo-*-
|
|
|
+
|
|
|
+@c This file is part of the StarPU Handbook.
|
|
|
+@c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
|
|
|
+@c Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
|
+@c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
|
|
|
+@c See the file starpu.texi for copying conditions.
|
|
|
+
|
|
|
+@node StarPU FFT support
|
|
|
+@chapter StarPU FFT support
|
|
|
+
|
|
|
+StarPU provides @code{libstarpufft}, a library whose design is very similar to
|
|
|
+both fftw and cufft, the difference being that it takes benefit from both CPUs
|
|
|
+and GPUs. It should however be noted that GPUs do not have the same precision as
|
|
|
+CPUs, so the results may different by a negligible amount
|
|
|
+
|
|
|
+float, double and long double precisions are available, with the fftw naming
|
|
|
+convention:
|
|
|
+
|
|
|
+@enumerate
|
|
|
+@item double precision structures and functions are named e.g. @code{starpufft_execute}
|
|
|
+@item float precision structures and functions are named e.g. @code{starpufftf_execute}
|
|
|
+@item long double precision structures and functions are named e.g. @code{starpufftl_execute}
|
|
|
+@end enumerate
|
|
|
+
|
|
|
+The documentation below uses names for double precision, replace
|
|
|
+@code{starpufft_} with @code{starpufftf_} or @code{starpufftl_} as appropriate.
|
|
|
+
|
|
|
+Only complex numbers are supported at the moment.
|
|
|
+
|
|
|
+The application has to call @code{starpu_init} before calling starpufft functions.
|
|
|
+
|
|
|
+@subsection Compilation
|
|
|
+
|
|
|
+The flags required to compile or link against the FFT library are accessible
|
|
|
+with the following commands:
|
|
|
+
|
|
|
+@example
|
|
|
+% pkg-config --cflags libstarpufft # options for the compiler
|
|
|
+% pkg-config --libs libstarpufft # options for the linker
|
|
|
+@end example
|
|
|
+
|
|
|
+@subsection Initialisation
|
|
|
+
|
|
|
+@deftypefun {void *} starpufft_malloc (size_t @var{n})
|
|
|
+Allocates memory for @var{n} bytes. This is preferred over @code{malloc}, since
|
|
|
+it allocates pinned memory, which allows overlapped transfers.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun {void *} starpufft_free (void *@var{p})
|
|
|
+Release memory previously allocated.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun {struct starpufft_plan *} starpufft_plan_dft_1d (int @var{n}, int @var{sign}, unsigned @var{flags})
|
|
|
+Initializes a plan for 1D FFT of size @var{n}. @var{sign} can be
|
|
|
+@code{STARPUFFT_FORWARD} or @code{STARPUFFT_INVERSE}. @var{flags} must be 0.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun {struct starpufft_plan *} starpufft_plan_dft_2d (int @var{n}, int @var{m}, int @var{sign}, unsigned @var{flags})
|
|
|
+Initializes a plan for 2D FFT of size (@var{n}, @var{m}). @var{sign} can be
|
|
|
+@code{STARPUFFT_FORWARD} or @code{STARPUFFT_INVERSE}. @var{flags} must be 0.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun {struct starpu_task *} starpufft_start (starpufft_plan @var{p}, void *@var{in}, void *@var{out})
|
|
|
+Start an FFT previously planned as @var{p}, using @var{in} and @var{out} as
|
|
|
+input and output. This only submits the task and does not wait for it.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun {struct starpu_task *} starpufft_start_handle (starpufft_plan @var{p}, starpu_data_handle_t @var{in}, starpu_data_handle_t @var{out})
|
|
|
+Start an FFT previously planned as @var{p}, using data handles @var{in} and
|
|
|
+@var{out} as input and output (assumed to be vectors of elements of the expected
|
|
|
+types). This only submits the task and does not wait for it.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun void starpufft_execute (starpufft_plan @var{p}, void *@var{in}, void *@var{out})
|
|
|
+Execute an FFT previously planned as @var{p}, using @var{in} and @var{out} as
|
|
|
+input and output. This submits and waits for the task.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun void starpufft_execute_handle (starpufft_plan @var{p}, starpu_data_handle_t @var{in}, starpu_data_handle_t @var{out})
|
|
|
+Execute an FFT previously planned as @var{p}, using data handles @var{in} and
|
|
|
+@var{out} as input and output (assumed to be vectors of elements of the expected
|
|
|
+types). This submits and waits for the task.
|
|
|
+@end deftypefun
|
|
|
+
|
|
|
+@deftypefun void starpufft_destroy_plan (starpufft_plan @var{p})
|
|
|
+Destroys plan @var{p}, i.e. release all CPU (fftw) and GPU (cufft) resources.
|
|
|
+@end deftypefun
|