浏览代码

starpufft: support 3D

Samuel Thibault 5 年之前
父节点
当前提交
ccb948172c
共有 6 个文件被更改,包括 228 次插入3 次删除
  1. 1 0
      ChangeLog
  2. 2 1
      starpufft/include/starpufft.h
  3. 1 0
      starpufft/src/Makefile.am
  4. 13 1
      starpufft/src/starpufftx.c
  5. 191 0
      starpufft/src/starpufftx3d.c
  6. 20 1
      starpufft/tests/testx.c

+ 1 - 0
ChangeLog

@@ -40,6 +40,7 @@ New features:
     STARPU_CALLBACK_WITH_ARG_NFREE, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, and
     STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE which set the corresponding
     fields of starpu_task to 0.
+  * starpufft: Support 3D.
 
 Small features:
   * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS

+ 2 - 1
starpufft/include/starpufft.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010,2012,2014,2015,2017,2019            CNRS
- * Copyright (C) 2009,2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009,2011,2014,2019                      Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -43,6 +43,7 @@ typedef struct starpufft(plan) *starpufft(plan); \
 \
 starpufft(plan) starpufft(plan_dft_1d)(int n, int sign, unsigned flags); \
 starpufft(plan) starpufft(plan_dft_2d)(int n, int m, int sign, unsigned flags); \
+starpufft(plan) starpufft(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags); \
 starpufft(plan) starpufft(plan_dft_r2c_1d)(int n, unsigned flags); \
 starpufft(plan) starpufft(plan_dft_c2r_1d)(int n, unsigned flags); \
 \

+ 1 - 0
starpufft/src/Makefile.am

@@ -28,6 +28,7 @@ EXTRA_DIST =			\
 	starpufftx.c		\
 	starpufftx1d.c		\
 	starpufftx2d.c		\
+	starpufftx3d.c		\
 	cuda_kernels.cu		\
 	cudaf_kernels.cu	\
 	cudax_kernels.cu

+ 13 - 1
starpufft/src/starpufftx.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2012,2013,2017                           Inria
- * Copyright (C) 2009-2012,2014,2017                      Université de Bordeaux
+ * Copyright (C) 2009-2012,2014,2017,2019                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -199,6 +199,7 @@ static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIB
 
 #include "starpufftx1d.c"
 #include "starpufftx2d.c"
+#include "starpufftx3d.c"
 
 struct starpu_task *
 STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
@@ -243,6 +244,17 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 			}
 			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
 			break;
+		case 3:
+			starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
+			if (!PARALLEL)
+				starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
+			if (PARALLEL)
+			{
+				for (z = 0; z < plan->totsize1; z++)
+					plan->twist1_tasks[z]->handles[0] = plan->in_handle;
+			}
+			task = STARPUFFT(start3dC2C)(plan, plan->in_handle, plan->out_handle);
+			break;
 		default:
 			STARPU_ABORT();
 			break;

+ 191 - 0
starpufft/src/starpufftx3d.c

@@ -0,0 +1,191 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2013,2014, 2019                                Université de Bordeaux
+ * Copyright (C) 2012,2013                                Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ *
+ * Sequential version
+ *
+ */
+
+#ifdef __STARPU_USE_CUDA
+/* Perform one fft of size n,m */
+static void
+STARPUFFT(fft_3d_plan_gpu)(void *args)
+{
+	STARPUFFT(plan) plan = args;
+	cufftResult cures;
+	int n = plan->n[0];
+	int m = plan->n[1];
+	int p = plan->n[2];
+	int workerid = starpu_worker_get_id_check();
+
+	cures = cufftPlan3d(&plan->plans[workerid].plan_cuda, n, m, p, _CUFFT_C2C);
+	if (cures != CUFFT_SUCCESS)
+		STARPU_CUFFT_REPORT_ERROR(cures);
+	cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream());
+	if (cures != CUFFT_SUCCESS)
+		STARPU_CUFFT_REPORT_ERROR(cures);
+}
+
+static void
+STARPUFFT(fft_3d_kernel_gpu)(void *descr[], void *args)
+{
+	STARPUFFT(plan) plan = args;
+	cufftResult cures;
+
+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
+
+	int workerid = starpu_worker_get_id_check();
+
+	task_per_worker[workerid]++;
+
+	cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
+	if (cures != CUFFT_SUCCESS)
+		STARPU_CUFFT_REPORT_ERROR(cures);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}
+#endif
+
+#ifdef STARPU_HAVE_FFTW
+/* Perform one fft of size n,m */
+static void
+STARPUFFT(fft_3d_kernel_cpu)(void *descr[], void *_args)
+{
+	STARPUFFT(plan) plan = _args;
+	int workerid = starpu_worker_get_id_check();
+
+	task_per_worker[workerid]++;
+
+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
+	STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
+
+	_FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out);
+}
+#endif
+
+static struct starpu_perfmodel STARPUFFT(fft_3d_model) = {
+	.type = STARPU_HISTORY_BASED,
+	.symbol = TYPE"fft_3d"
+};
+
+static struct starpu_codelet STARPUFFT(fft_3d_codelet) = {
+	.where =
+#ifdef __STARPU_USE_CUDA
+		STARPU_CUDA|
+#endif
+#ifdef STARPU_HAVE_FFTW
+		STARPU_CPU|
+#endif
+		0,
+#ifdef __STARPU_USE_CUDA
+	.cuda_funcs = {STARPUFFT(fft_3d_kernel_gpu)},
+#endif
+#ifdef STARPU_HAVE_FFTW
+	.cpu_funcs = {STARPUFFT(fft_3d_kernel_cpu)},
+#endif
+	CAN_EXECUTE
+	.model = &STARPUFFT(fft_3d_model),
+	.nbuffers = 2,
+	.modes = {STARPU_R, STARPU_W},
+	.name = "fft_3d_codelet"
+};
+
+STARPUFFT(plan)
+STARPUFFT(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags)
+{
+	unsigned workerid;
+
+if (PARALLEL) {
+	/* TODO */
+	STARPU_ASSERT(0);
+}
+
+	/* TODO: flags? Automatically set FFTW_MEASURE on calibration? */
+	STARPU_ASSERT(flags == 0);
+
+	STARPUFFT(plan) plan = malloc(sizeof(*plan));
+	memset(plan, 0, sizeof(*plan));
+
+	plan->dim = 3;
+	plan->n = malloc(plan->dim * sizeof(*plan->n));
+	plan->n[0] = n;
+	plan->n[1] = m;
+	plan->n[2] = p;
+
+	plan->totsize = n * m;
+
+	plan->type = C2C;
+	plan->sign = sign;
+
+
+	/* Initialize per-worker working set */
+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
+		switch (starpu_worker_get_type(workerid)) {
+		case STARPU_CPU_WORKER:
+#ifdef STARPU_HAVE_FFTW
+			/* fft plan: one fft of size n, m. */
+			plan->plans[workerid].plan_cpu = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, sign, _FFTW_FLAGS);
+			STARPU_ASSERT(plan->plans[workerid].plan_cpu);
+#else
+/* #warning libstarpufft can not work correctly if libfftw3 is not installed */
+#endif
+			break;
+		case STARPU_CUDA_WORKER:
+			break;
+		default:
+			/* Do not care, we won't be executing anything there. */
+			break;
+		}
+	}
+#ifdef __STARPU_USE_CUDA
+	starpu_execute_on_each_worker(STARPUFFT(fft_3d_plan_gpu), plan, STARPU_CUDA);
+#endif
+
+	return plan;
+}
+
+/* Actually submit all the tasks. */
+static struct starpu_task *
+STARPUFFT(start3dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out)
+{
+	STARPU_ASSERT(plan->type == C2C);
+	int z;
+	int ret;
+
+if (PARALLEL) {
+	/* TODO */
+	STARPU_ASSERT(0);
+} else /* !PARALLEL */ {
+	struct starpu_task *task;
+
+	/* Create FFT task */
+	task = starpu_task_create();
+	task->detach = 0;
+	task->cl = &STARPUFFT(fft_3d_codelet);
+	task->handles[0] = in;
+	task->handles[1] = out;
+	task->cl_arg = plan;
+
+	ret = starpu_task_submit(task);
+	if (ret == -ENODEV) return NULL;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return task;
+}
+}

+ 20 - 1
starpufft/tests/testx.c

@@ -111,7 +111,7 @@ int main(int argc, char *argv[])
 {
 	int i, ret;
 	int size;
-	int n = 0, m = 0;
+	int n = 0, m = 0, p = 0;
 	STARPUFFT(plan) plan;
 	starpu_data_handle_t in_handle, out_handle;
 #ifdef STARPU_HAVE_FFTW
@@ -157,6 +157,15 @@ int main(int argc, char *argv[])
 		/* 2D */
 		size = n * m;
 	}
+	else if (argc == 4)
+	{
+		n = atoi(argv[1]);
+		m = atoi(argv[2]);
+		p = atoi(argv[3]);
+
+		/* 3D */
+		size = n * m * p;
+	}
 	else
 	{
 		assert(0);
@@ -205,6 +214,16 @@ int main(int argc, char *argv[])
 		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
 #endif
 	}
+	else if (argc == 4)
+	{
+		plan = STARPUFFT(plan_dft_3d)(n, m, p, SIGN, 0);
+#ifdef STARPU_HAVE_FFTW
+		fftw_plan = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
+#endif
+#ifdef STARPU_USE_CUDA
+		STARPU_ASSERT(cufftPlan3d(&cuda_plan, n, m, p, _CUFFT_C2C) == CUFFT_SUCCESS);
+#endif
+	}
 	else
 	{
 		assert(0);