|
@@ -15,7 +15,7 @@ may run on the machine. For instance, a C++ computation class which is not
|
|
|
thread-safe by itself, but for which several instanciated objects of that class
|
|
|
can be used concurrently. This can be used in StarPU by initializing one such
|
|
|
object per worker. For instance, the libstarpufft example does the following to
|
|
|
-be able to use FFTW.
|
|
|
+be able to use FFTW on CPUs.
|
|
|
|
|
|
Some global array stores the instanciated objects:
|
|
|
|
|
@@ -49,21 +49,23 @@ static void fft(void *descr[], void *_args)
|
|
|
}
|
|
|
\endcode
|
|
|
|
|
|
-Another way to go which may be needed is to execute some code from the workers
|
|
|
-themselves thanks to starpu_execute_on_each_worker(). This may be required
|
|
|
-by CUDA to behave properly due to threading issues. For instance, StarPU's
|
|
|
-starpu_cublas_init() looks like the following to call
|
|
|
-<c>cublasInit</c> from the workers themselves:
|
|
|
+This however is not sufficient for FFT on CUDA: initialization has
|
|
|
+to be done from the workers themselves. This can be done thanks to
|
|
|
+starpu_execute_on_each_worker(). For instance libstarpufft does the following.
|
|
|
|
|
|
\code{.c}
|
|
|
-static void init_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED)
|
|
|
+static void fft_plan_gpu(void *args)
|
|
|
{
|
|
|
- cublasStatus cublasst = cublasInit();
|
|
|
- cublasSetKernelStream(starpu_cuda_get_local_stream());
|
|
|
+ plan plan = args;
|
|
|
+ int n2 = plan->n2[0];
|
|
|
+ int workerid = starpu_worker_get_id();
|
|
|
+
|
|
|
+ cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1);
|
|
|
+ cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream());
|
|
|
}
|
|
|
-void starpu_cublas_init(void)
|
|
|
+void starpufft_plan(void)
|
|
|
{
|
|
|
- starpu_execute_on_each_worker(init_cublas_func, NULL, STARPU_CUDA);
|
|
|
+ starpu_execute_on_each_worker(fft_plan_gpu, plan, STARPU_CUDA);
|
|
|
}
|
|
|
\endcode
|
|
|
|