瀏覽代碼

Use adaptive problem sizes in cholesky and lu, according to ncpus and ncuda

Samuel Thibault 8 年之前
父節點
當前提交
c6576d1cfe

+ 25 - 7
examples/cholesky/cholesky.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2015  Université de Bordeaux
+ * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -113,15 +113,33 @@
 
 /* End of magma code */
 
+static unsigned size;
+static unsigned nblocks;
+static unsigned nbigblocks;
+
+static inline void init_sizes(void) {
+	int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count();
+	int power_sqrt = sqrt(power)/2;
+	if (power_sqrt < 1)
+		power_sqrt = 1;
+
 #ifdef STARPU_QUICK_CHECK
-static unsigned size = 320*4;
-static unsigned nblocks = 4;
-static unsigned nbigblocks = 2;
+	if (!size)
+		size = 320*2*power_sqrt;
+	if (!nblocks)
+		nblocks = 2*power_sqrt;
+	if (!nbigblocks)
+		nbigblocks = power_sqrt;
 #else
-static unsigned size = 960*16;
-static unsigned nblocks = 16;
-static unsigned nbigblocks = 8;
+	if (!size)
+		size = 960*8*power_sqrt;
+	if (!nblocks)
+		nblocks = 8*power_sqrt;
+	if (!nbigblocks)
+		nbigblocks = 4*power_sqrt;
 #endif
+}
+
 static unsigned pinned = 1;
 static unsigned noprio = 0;
 static unsigned check = 0;

+ 7 - 5
examples/cholesky/cholesky_grain_tag.c

@@ -264,7 +264,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	}
 }
 
-static void initialize_system(float **A, unsigned dim, unsigned pinned)
+static void initialize_system(int argc, char **argv, float **A, unsigned pinned)
 {
 	int ret;
 	int flags = STARPU_MALLOC_SIMULATION_FOLDED;
@@ -278,6 +278,10 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 		exit(77);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);
@@ -292,7 +296,7 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 
 	if (pinned)
 		flags |= STARPU_MALLOC_PINNED;
-	starpu_malloc_flags((void **)A, dim*dim*sizeof(float), flags);
+	starpu_malloc_flags((void **)A, size*size*sizeof(float), flags);
 }
 
 int cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned pinned)
@@ -338,10 +342,8 @@ int main(int argc, char **argv)
 
      	int ret;
 
-	parse_args(argc, argv);
-
 	float *mat = NULL;
-	initialize_system(&mat, size, pinned);
+	initialize_system(argc, argv, &mat, pinned);
 
 #ifndef STARPU_SIMGRID
 	unsigned i,j;

+ 9 - 7
examples/cholesky/cholesky_implicit.c

@@ -312,22 +312,24 @@ int main(int argc, char **argv)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 
-	parse_args(argc, argv);
-
-	if(with_ctxs || with_noctxs || chole1 || chole2)
-		parse_args_ctx(argc, argv);
-
 #ifdef STARPU_HAVE_MAGMA
 	magma_init();
 #endif
 
 	int ret;
 	ret = starpu_init(NULL);
-	//starpu_fxt_stop_profiling();
-
 	if (ret == -ENODEV) return 77;
         STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	//starpu_fxt_stop_profiling();
+
+	init_sizes();
+
+	parse_args(argc, argv);
+
+	if(with_ctxs || with_noctxs || chole1 || chole2)
+		parse_args_ctx(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);

+ 7 - 5
examples/cholesky/cholesky_tag.c

@@ -227,7 +227,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, (flop/timing/1000.0f));
 }
 
-static int initialize_system(float **A, unsigned dim, unsigned pinned)
+static int initialize_system(int argc, char **argv, float **A, unsigned pinned)
 {
 	int ret;
 	int flags = STARPU_MALLOC_SIMULATION_FOLDED;
@@ -241,6 +241,10 @@ static int initialize_system(float **A, unsigned dim, unsigned pinned)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);
@@ -255,7 +259,7 @@ static int initialize_system(float **A, unsigned dim, unsigned pinned)
 
 	if (pinned)
 		flags |= STARPU_MALLOC_PINNED;
-	starpu_malloc_flags((void **)A, dim*dim*sizeof(float), flags);
+	starpu_malloc_flags((void **)A, size*size*sizeof(float), flags);
 
 	return 0;
 }
@@ -308,10 +312,8 @@ int main(int argc, char **argv)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 
-	parse_args(argc, argv);
-
 	float *mat = NULL;
-	int ret = initialize_system(&mat, size, pinned);
+	int ret = initialize_system(argc, argv, &mat, pinned);
 	if (ret) return ret;
 
 #ifndef STARPU_SIMGRID

+ 7 - 5
examples/cholesky/cholesky_tile_tag.c

@@ -220,11 +220,6 @@ int main(int argc, char **argv)
 	unsigned x, y;
 	int ret;
 
-	parse_args(argc, argv);
-	assert(nblocks <= NMAXBLOCKS);
-
-	FPRINTF(stderr, "BLOCK SIZE = %d\n", size / nblocks);
-
 #ifdef STARPU_HAVE_MAGMA
 	magma_init();
 #endif
@@ -234,6 +229,13 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	init_sizes();
+
+	parse_args(argc, argv);
+	assert(nblocks <= NMAXBLOCKS);
+
+	FPRINTF(stderr, "BLOCK SIZE = %d\n", size / nblocks);
+
 #ifdef STARPU_USE_CUDA
 	initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost);
 	initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost);

+ 21 - 14
examples/lu/lu_example.c

@@ -27,13 +27,8 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#ifdef STARPU_QUICK_CHECK
-static unsigned long size = 320*4;
-static unsigned nblocks = 4;
-#else
-static unsigned long size = 960*16;
-static unsigned nblocks = 16;
-#endif
+static unsigned long size = 0;
+static unsigned nblocks = 0;
 static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
@@ -313,18 +308,30 @@ int main(int argc, char **argv)
 {
 	int ret;
 
-#ifdef STARPU_QUICK_CHECK
-	size /= 4;
-	nblocks /= 4;
-#endif
-
-	parse_args(argc, argv);
-
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count();
+	int power_sqrt = sqrt(power)/2;
+	if (power_sqrt < 1)
+		power_sqrt = 1;
+
+#ifdef STARPU_QUICK_CHECK
+	if (!size)
+		size = 320*2*power_sqrt;
+	if (!nblocks)
+		nblocks = 2*power_sqrt;
+#else
+	if (!size)
+		size = 960*8*power_sqrt;
+	if (!nblocks)
+		nblocks = 8*power_sqrt;
+#endif
+
+	parse_args(argc, argv);
+
 	starpu_cublas_init();
 
 	init_matrix();