Преглед изворни кода

double-buffer 12 and 21 contributions

Samuel Thibault пре 15 година
родитељ
комит
e9acd82e11
3 измењених фајлова са 86 додато и 0 уклоњено
  1. 46 0
      mpi/examples/mpi_lu/plu_example.c
  2. 34 0
      mpi/examples/mpi_lu/pxlu.c
  3. 6 0
      mpi/examples/mpi_lu/pxlu.h

+ 46 - 0
mpi/examples/mpi_lu/plu_example.c

@@ -43,10 +43,17 @@ static TYPE *tmp_11_block;
 static starpu_data_handle *tmp_11_block_handles;
 static TYPE **tmp_11_block;
 #endif
+#ifdef SINGLE_TMP1221
 static starpu_data_handle *tmp_12_block_handles;
 static TYPE **tmp_12_block;
 static starpu_data_handle *tmp_21_block_handles;
 static TYPE **tmp_21_block;
+#else
+static starpu_data_handle *(tmp_12_block_handles[2]);
+static TYPE **(tmp_12_block[2]);
+static starpu_data_handle *(tmp_21_block_handles[2]);
+static TYPE **(tmp_21_block[2]);
+#endif
 
 static void parse_args(int argc, char **argv)
 {
@@ -111,6 +118,7 @@ starpu_data_handle STARPU_PLU(get_tmp_11_block_handle)(unsigned k)
 }
 #endif
 
+#ifdef SINGLE_TMP1221
 starpu_data_handle STARPU_PLU(get_tmp_12_block_handle)(unsigned j)
 {
 	return tmp_12_block_handles[j];
@@ -120,6 +128,17 @@ starpu_data_handle STARPU_PLU(get_tmp_21_block_handle)(unsigned i)
 {
 	return tmp_21_block_handles[i];
 }
+#else
+starpu_data_handle STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k)
+{
+	return tmp_12_block_handles[k%2][j];
+}
+
+starpu_data_handle STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k)
+{
+	return tmp_21_block_handles[k%2][i];
+}
+#endif
 
 static void init_matrix(int rank)
 {
@@ -194,13 +213,23 @@ static void init_matrix(int rank)
 #endif
 
 	/* tmp buffers 12 and 21 */
+#ifdef SINGLE_TMP1221
 	tmp_12_block_handles = calloc(nblocks, sizeof(starpu_data_handle));
 	tmp_21_block_handles = calloc(nblocks, sizeof(starpu_data_handle));
 	tmp_12_block = calloc(nblocks, sizeof(TYPE *));
 	tmp_21_block = calloc(nblocks, sizeof(TYPE *));
+#else
+	for (i = 0; i < 2; i++) {
+		tmp_12_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle));
+		tmp_21_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle));
+		tmp_12_block[i] = calloc(nblocks, sizeof(TYPE *));
+		tmp_21_block[i] = calloc(nblocks, sizeof(TYPE *));
+	}
+#endif
 	
 	for (k = 0; k < nblocks; k++)
 	{
+#ifdef SINGLE_TMP1221
 		starpu_malloc_pinned_if_possible((void **)&tmp_12_block[k], blocksize);
 		STARPU_ASSERT(tmp_12_block[k]);
 
@@ -214,6 +243,23 @@ static void init_matrix(int rank)
 		starpu_register_blas_data(&tmp_21_block_handles[k], 0,
 			(uintptr_t)tmp_21_block[k],
 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
+#else
+	for (i = 0; i < 2; i++) {
+		starpu_malloc_pinned_if_possible((void **)&tmp_12_block[i][k], blocksize);
+		STARPU_ASSERT(tmp_12_block[i][k]);
+
+		starpu_register_blas_data(&tmp_12_block_handles[i][k], 0,
+			(uintptr_t)tmp_12_block[i][k],
+			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
+
+		starpu_malloc_pinned_if_possible((void **)&tmp_21_block[i][k], blocksize);
+		STARPU_ASSERT(tmp_21_block[i][k]);
+
+		starpu_register_blas_data(&tmp_21_block_handles[i][k], 0,
+			(uintptr_t)tmp_21_block[i][k],
+			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
+	}
+#endif
 	}
 
 	//display_all_blocks(nblocks, size/nblocks);

+ 34 - 0
mpi/examples/mpi_lu/pxlu.c

@@ -338,15 +338,28 @@ static void create_task_12_recv(unsigned k, unsigned j)
 	unsigned ndeps = 0;
 	starpu_tag_t tag_array[nblocks];
 	
+#ifdef SINGLE_TMP1221
 	if (k > 0)
 	for (i = (k-1)+1; i < nblocks; i++)
+#else
+	if (k > 1)
+	for (i = (k-2)+1; i < nblocks; i++)
+#endif
 	{
 		if (rank == get_block_rank(i, j))
+#ifdef SINGLE_TMP1221
 			tag_array[ndeps++] = TAG22(k-1, i, j);
+#else
+			tag_array[ndeps++] = TAG22(k-2, i, j);
+#endif
 	}
 	
 	int source = get_block_rank(k, j);
+#ifdef SINGLE_TMP1221
 	starpu_data_handle block_handle = STARPU_PLU(get_tmp_12_block_handle)(j);
+#else
+	starpu_data_handle block_handle = STARPU_PLU(get_tmp_12_block_handle)(j,k);
+#endif
 	int mpi_tag = MPI_TAG12(k, j);
 	starpu_tag_t partial_tag = TAG12_SAVE_PARTIAL(k, j);
 	starpu_tag_t unlocked_tag = TAG12_SAVE(k, j);
@@ -497,15 +510,28 @@ static void create_task_21_recv(unsigned k, unsigned i)
 	unsigned ndeps = 0;
 	starpu_tag_t tag_array[nblocks];
 	
+#ifdef SINGLE_TMP1221
 	if (k > 0)
 	for (j = (k-1)+1; j < nblocks; j++)
+#else
+	if (k > 1)
+	for (j = (k-2)+1; j < nblocks; j++)
+#endif
 	{
 		if (rank == get_block_rank(i, j))
+#ifdef SINGLE_TMP1221
 			tag_array[ndeps++] = TAG22(k-1, i, j);
+#else
+			tag_array[ndeps++] = TAG22(k-2, i, j);
+#endif
 	}
 
 	int source = get_block_rank(i, k);
+#ifdef SINGLE_TMP1221
 	starpu_data_handle block_handle = STARPU_PLU(get_tmp_21_block_handle)(i);
+#else
+	starpu_data_handle block_handle = STARPU_PLU(get_tmp_21_block_handle)(i, k);
+#endif
 	int mpi_tag = MPI_TAG21(k, i);
 	starpu_tag_t partial_tag = TAG21_SAVE_PARTIAL(k, i);
 	starpu_tag_t unlocked_tag = TAG21_SAVE(k, i);
@@ -666,7 +692,11 @@ static void create_task_22_real(unsigned k, unsigned i, unsigned j)
 	}
 	else 
 	{
+#ifdef SINGLE_TMP1221
 		block21 = STARPU_PLU(get_tmp_21_block_handle)(i);
+#else
+		block21 = STARPU_PLU(get_tmp_21_block_handle)(i, k);
+#endif
 		tag_21_dep = TAG21_SAVE(k, i);
 	}
 
@@ -683,7 +713,11 @@ static void create_task_22_real(unsigned k, unsigned i, unsigned j)
 	}
 	else 
 	{
+#ifdef SINGLE_TMP1221
 		block12 = STARPU_PLU(get_tmp_12_block_handle)(j);
+#else
+		block12 = STARPU_PLU(get_tmp_12_block_handle)(j, k);
+#endif
 		tag_12_dep = TAG12_SAVE(k, j);
 	}
 

+ 6 - 0
mpi/examples/mpi_lu/pxlu.h

@@ -29,6 +29,7 @@
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
 //#define SINGLE_TMP11	1
+//#define SINGLE_TMP1221	1
 
 struct debug_info {
 	unsigned i;
@@ -52,8 +53,13 @@ starpu_data_handle STARPU_PLU(get_tmp_11_block_handle)(void);
 #else
 starpu_data_handle STARPU_PLU(get_tmp_11_block_handle)(unsigned k);
 #endif
+#ifdef SINGLE_TMP1221
 starpu_data_handle STARPU_PLU(get_tmp_12_block_handle)(unsigned j);
 starpu_data_handle STARPU_PLU(get_tmp_21_block_handle)(unsigned i);
+#else
+starpu_data_handle STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k);
+starpu_data_handle STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k);
+#endif
 
 void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize);