Browse Source

Add cpu-only and gpu-only task variants

Samuel Thibault 6 years ago
parent
commit
ab5b2737e9

+ 6 - 0
examples/cholesky/cholesky.h

@@ -165,6 +165,12 @@ extern struct starpu_perfmodel chol_model_22;
 extern struct starpu_codelet cl11;
 extern struct starpu_codelet cl21;
 extern struct starpu_codelet cl22;
+extern struct starpu_codelet cl11_gpu;
+extern struct starpu_codelet cl21_gpu;
+extern struct starpu_codelet cl22_gpu;
+extern struct starpu_codelet cl11_cpu;
+extern struct starpu_codelet cl21_cpu;
+extern struct starpu_codelet cl22_cpu;
 
 void chol_cpu_codelet_update_u11(void **, void *);
 void chol_cpu_codelet_update_u21(void **, void *);

+ 23 - 8
examples/cholesky/cholesky_compil.c

@@ -59,8 +59,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	start = starpu_timing_now();
 
 #define A(i,j) starpu_data_get_sub_data(dataA, 2, j, i)
-#define POTRF(A, prio) do { \
-                int ret = starpu_task_insert(&cl11, \
+#define _POTRF(cl, A, prio) do { \
+		int ret = starpu_task_insert(cl, \
 					 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int) (prio) : (int) STARPU_MAX_PRIO, \
 					 STARPU_RW, A, \
 					 STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), \
@@ -69,8 +69,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); \
 } while (0)
 
-#define TRSM(A, B, prio) do { \
-                int ret = starpu_task_insert(&cl21, \
+#define _TRSM(cl, A, B, prio) do { \
+		int ret = starpu_task_insert(cl, \
 					 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int) (prio) : (int) STARPU_DEFAULT_PRIO, \
 					 STARPU_R, A, \
 					 STARPU_RW, B, \
@@ -81,8 +81,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 } while (0)
 
 /* TODO: use real SYRK */
-#define SYRK(A, C, prio) do { \
-                int ret = starpu_task_insert(&cl22, \
+#define _SYRK(cl, A, C, prio) do { \
+		int ret = starpu_task_insert(cl, \
 					 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int) (prio) : (int) STARPU_DEFAULT_PRIO, \
 					 STARPU_R, A, \
 					 STARPU_R, A, \
@@ -93,8 +93,8 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); \
 } while (0)
 
-#define GEMM(A, B, C, prio) do { \
-                int ret = starpu_task_insert(&cl22, \
+#define _GEMM(cl, A, B, C, prio) do { \
+		int ret = starpu_task_insert(cl, \
 					 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int) (prio) : (int) STARPU_DEFAULT_PRIO, \
 					 STARPU_R, A, \
 					 STARPU_R, B, \
@@ -105,6 +105,21 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); \
 } while (0)
 
+#define POTRF(A, prio)		_POTRF(&cl11, A, prio)
+#define TRSM(A, B, prio)	_TRSM(&cl21, A, B, prio)
+#define SYRK(A, B, prio)	_SYRK(&cl22, A, B, prio)
+#define GEMM(A, B, C, prio)	_GEMM(&cl22, A, B, C, prio)
+
+#define POTRF_GPU(A, prio)	_POTRF(&cl11_gpu, A, prio)
+#define TRSM_GPU(A, B, prio)	_TRSM(&cl21_gpu, A, B, prio)
+#define SYRK_GPU(A, B, prio)	_SYRK(&cl22_gpu, A, B, prio)
+#define GEMM_GPU(A, B, C, prio)	_GEMM(&cl22_gpu, A, B, C, prio)
+
+#define POTRF_CPU(A, prio)	_POTRF(&cl11_cpu, A, prio)
+#define TRSM_CPU(A, B, prio)	_TRSM(&cl21_cpu, A, B, prio)
+#define SYRK_CPU(A, B, prio)	_SYRK(&cl22_cpu, A, B, prio)
+#define GEMM_CPU(A, B, C, prio)	_GEMM(&cl22_cpu, A, B, C, prio)
+
 #include "cholesky_compiled.c"
 
 	starpu_task_wait_for_all();

+ 76 - 0
examples/cholesky/cholesky_kernels.c

@@ -354,3 +354,79 @@ struct starpu_codelet cl22 =
 	.model = &chol_model_22,
 	.color = 0x00ff00,
 };
+
+struct starpu_codelet cl11_gpu =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {chol_cublas_codelet_update_u11},
+#elif defined(STARPU_SIMGRID)
+	.cuda_funcs = {(void*)1},
+#endif
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.model = &chol_model_11,
+	.color = 0xffff00,
+};
+
+struct starpu_codelet cl21_gpu =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {chol_cublas_codelet_update_u21},
+#elif defined(STARPU_SIMGRID)
+	.cuda_funcs = {(void*)1},
+#endif
+	.cuda_flags = {STARPU_CUDA_ASYNC},
+	.nbuffers = 2,
+	.modes = { STARPU_R, STARPU_RW },
+	.model = &chol_model_21,
+	.color = 0x8080ff,
+};
+
+struct starpu_codelet cl22_gpu =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {chol_cublas_codelet_update_u22},
+#elif defined(STARPU_SIMGRID)
+	.cuda_funcs = {(void*)1},
+#endif
+	.cuda_flags = {STARPU_CUDA_ASYNC},
+	.nbuffers = 3,
+	.modes = { STARPU_R, STARPU_R, STARPU_RW },
+	.model = &chol_model_22,
+	.color = 0x00ff00,
+};
+
+struct starpu_codelet cl11_cpu =
+{
+	.type = STARPU_SEQ,
+	.cpu_funcs = {chol_cpu_codelet_update_u11},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u11"},
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.model = &chol_model_11,
+	.color = 0xffff00,
+};
+
+struct starpu_codelet cl21_cpu =
+{
+	.type = STARPU_SEQ,
+	.cpu_funcs = {chol_cpu_codelet_update_u21},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u21"},
+	.nbuffers = 2,
+	.modes = { STARPU_R, STARPU_RW },
+	.model = &chol_model_21,
+	.color = 0x8080ff,
+};
+
+struct starpu_codelet cl22_cpu =
+{
+	.type = STARPU_SEQ,
+	.max_parallelism = INT_MAX,
+	.cpu_funcs = {chol_cpu_codelet_update_u22},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u22"},
+	.nbuffers = 3,
+	.modes = { STARPU_R, STARPU_R, STARPU_RW },
+	.model = &chol_model_22,
+	.color = 0x00ff00,
+};
+