|
@@ -1,7 +1,7 @@
|
|
|
/* StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * Copyright (C) 2009, 2010 Université de Bordeaux 1
|
|
|
- * Copyright (C) 2010 Centre National de la Recherche Scientifique
|
|
|
+ * Copyright (C) 2009, 2010-2011 Université de Bordeaux 1
|
|
|
+ * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
|
*
|
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
@@ -46,18 +46,17 @@ static struct starpu_task *create_task(starpu_tag_t id)
|
|
|
return task;
|
|
|
}
|
|
|
|
|
|
-static void create_task_pivot(starpu_data_handle *dataAp, unsigned lu_nblocks,
|
|
|
+static void create_task_pivot(starpu_data_handle *dataAp, unsigned nblocks,
|
|
|
struct piv_s *piv_description,
|
|
|
unsigned k, unsigned i,
|
|
|
- starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned),
|
|
|
- struct starpu_sched_ctx *sched_ctx)
|
|
|
+ starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
|
struct starpu_task *task = create_task(PIVOT(k, i));
|
|
|
|
|
|
task->cl = &cl_pivot;
|
|
|
|
|
|
/* which sub-data is manipulated ? */
|
|
|
- task->buffers[0].handle = get_block(dataAp, lu_nblocks, k, i);
|
|
|
+ task->buffers[0].handle = get_block(dataAp, nblocks, k, i);
|
|
|
task->buffers[0].mode = STARPU_RW;
|
|
|
|
|
|
task->cl_arg = &piv_description[k];
|
|
@@ -76,24 +75,24 @@ static void create_task_pivot(starpu_data_handle *dataAp, unsigned lu_nblocks,
|
|
|
starpu_tag_declare_deps(PIVOT(k, i), 2, TAG11(k), TAG22(k-1, i, k));
|
|
|
}
|
|
|
else {
|
|
|
- starpu_tag_t *tags = malloc((lu_nblocks - k)*sizeof(starpu_tag_t));
|
|
|
+ starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t));
|
|
|
|
|
|
tags[0] = TAG11(k);
|
|
|
unsigned ind, ind2;
|
|
|
- for (ind = k + 1, ind2 = 0; ind < lu_nblocks; ind++, ind2++)
|
|
|
+ for (ind = k + 1, ind2 = 0; ind < nblocks; ind++, ind2++)
|
|
|
{
|
|
|
tags[1 + ind2] = TAG22(k-1, ind, k);
|
|
|
}
|
|
|
|
|
|
/* perhaps we could do better ... :/ */
|
|
|
- starpu_tag_declare_deps_array(PIVOT(k, i), (lu_nblocks-k), tags);
|
|
|
+ starpu_tag_declare_deps_array(PIVOT(k, i), (nblocks-k), tags);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- starpu_task_submit_to_ctx(task, sched_ctx);
|
|
|
+ starpu_task_submit(task);
|
|
|
}
|
|
|
|
|
|
-static struct starpu_task *create_task_11_pivot(starpu_data_handle *dataAp, unsigned lu_nblocks,
|
|
|
+static struct starpu_task *create_task_11_pivot(starpu_data_handle *dataAp, unsigned nblocks,
|
|
|
unsigned k, struct piv_s *piv_description,
|
|
|
starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
@@ -104,7 +103,7 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle *dataAp, unsi
|
|
|
task->cl_arg = &piv_description[k];
|
|
|
|
|
|
/* which sub-data is manipulated ? */
|
|
|
- task->buffers[0].handle = get_block(dataAp, lu_nblocks, k, k);
|
|
|
+ task->buffers[0].handle = get_block(dataAp, nblocks, k, k);
|
|
|
task->buffers[0].mode = STARPU_RW;
|
|
|
|
|
|
/* this is an important task */
|
|
@@ -119,11 +118,10 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle *dataAp, unsi
|
|
|
return task;
|
|
|
}
|
|
|
|
|
|
-static void create_task_12(starpu_data_handle *dataAp, unsigned lu_nblocks, unsigned k, unsigned j,
|
|
|
- starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned),
|
|
|
- struct starpu_sched_ctx *sched_ctx)
|
|
|
+static void create_task_12(starpu_data_handle *dataAp, unsigned nblocks, unsigned k, unsigned j,
|
|
|
+ starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
|
-// printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG12(k,i));
|
|
|
+/* printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG12(k,i)); */
|
|
|
|
|
|
struct starpu_task *task = create_task(TAG12(k, j));
|
|
|
|
|
@@ -132,9 +130,9 @@ static void create_task_12(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
task->cl_arg = (void *)(task->tag_id);
|
|
|
|
|
|
/* which sub-data is manipulated ? */
|
|
|
- task->buffers[0].handle = get_block(dataAp, lu_nblocks, k, k);
|
|
|
+ task->buffers[0].handle = get_block(dataAp, nblocks, k, k);
|
|
|
task->buffers[0].mode = STARPU_R;
|
|
|
- task->buffers[1].handle = get_block(dataAp, lu_nblocks, j, k);
|
|
|
+ task->buffers[1].handle = get_block(dataAp, nblocks, j, k);
|
|
|
task->buffers[1].mode = STARPU_RW;
|
|
|
|
|
|
if (!no_prio && (j == k+1)) {
|
|
@@ -152,21 +150,20 @@ static void create_task_12(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
|
|
|
}
|
|
|
|
|
|
- starpu_task_submit_to_ctx(task, sched_ctx);
|
|
|
+ starpu_task_submit(task);
|
|
|
}
|
|
|
|
|
|
-static void create_task_21(starpu_data_handle *dataAp, unsigned lu_nblocks, unsigned k, unsigned i,
|
|
|
- starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned),
|
|
|
- struct starpu_sched_ctx *sched_ctx)
|
|
|
+static void create_task_21(starpu_data_handle *dataAp, unsigned nblocks, unsigned k, unsigned i,
|
|
|
+ starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
|
struct starpu_task *task = create_task(TAG21(k, i));
|
|
|
|
|
|
task->cl = &cl21;
|
|
|
|
|
|
/* which sub-data is manipulated ? */
|
|
|
- task->buffers[0].handle = get_block(dataAp, lu_nblocks, k, k);
|
|
|
+ task->buffers[0].handle = get_block(dataAp, nblocks, k, k);
|
|
|
task->buffers[0].mode = STARPU_R;
|
|
|
- task->buffers[1].handle = get_block(dataAp, lu_nblocks, k, i);
|
|
|
+ task->buffers[1].handle = get_block(dataAp, nblocks, k, i);
|
|
|
task->buffers[1].mode = STARPU_RW;
|
|
|
|
|
|
if (!no_prio && (i == k+1)) {
|
|
@@ -178,14 +175,13 @@ static void create_task_21(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
/* enforce dependencies ... */
|
|
|
starpu_tag_declare_deps(TAG21(k, i), 1, PIVOT(k, i));
|
|
|
|
|
|
- starpu_task_submit_to_ctx(task, sched_ctx);
|
|
|
+ starpu_task_submit(task);
|
|
|
}
|
|
|
|
|
|
-static void create_task_22(starpu_data_handle *dataAp, unsigned lu_nblocks, unsigned k, unsigned i, unsigned j,
|
|
|
- starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned),
|
|
|
- struct starpu_sched_ctx *sched_ctx)
|
|
|
+static void create_task_22(starpu_data_handle *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
|
|
|
+ starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
|
-// printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22(k,i,j));
|
|
|
+/* printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22(k,i,j)); */
|
|
|
|
|
|
struct starpu_task *task = create_task(TAG22(k, i, j));
|
|
|
|
|
@@ -194,11 +190,11 @@ static void create_task_22(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
task->cl_arg = (void *)(task->tag_id);
|
|
|
|
|
|
/* which sub-data is manipulated ? */
|
|
|
- task->buffers[0].handle = get_block(dataAp, lu_nblocks, k, i); /* produced by TAG21(k, i) */
|
|
|
+ task->buffers[0].handle = get_block(dataAp, nblocks, k, i); /* produced by TAG21(k, i) */
|
|
|
task->buffers[0].mode = STARPU_R;
|
|
|
- task->buffers[1].handle = get_block(dataAp, lu_nblocks, j, k); /* produced by TAG12(k, j) */
|
|
|
+ task->buffers[1].handle = get_block(dataAp, nblocks, j, k); /* produced by TAG12(k, j) */
|
|
|
task->buffers[1].mode = STARPU_R;
|
|
|
- task->buffers[2].handle = get_block(dataAp, lu_nblocks, j, i); /* produced by TAG22(k-1, i, j) */
|
|
|
+ task->buffers[2].handle = get_block(dataAp, nblocks, j, i); /* produced by TAG22(k-1, i, j) */
|
|
|
task->buffers[2].mode = STARPU_RW;
|
|
|
|
|
|
if (!no_prio && (i == k + 1) && (j == k +1) ) {
|
|
@@ -213,7 +209,7 @@ static void create_task_22(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
|
|
|
}
|
|
|
|
|
|
- starpu_task_submit_to_ctx(task, sched_ctx);
|
|
|
+ starpu_task_submit(task);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -222,9 +218,8 @@ static void create_task_22(starpu_data_handle *dataAp, unsigned lu_nblocks, unsi
|
|
|
|
|
|
static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
|
|
|
struct piv_s *piv_description,
|
|
|
- unsigned lu_nblocks,
|
|
|
- starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned),
|
|
|
- struct starpu_sched_ctx *sched_ctx)
|
|
|
+ unsigned nblocks,
|
|
|
+ starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
|
|
|
{
|
|
|
struct timeval start;
|
|
|
struct timeval end;
|
|
@@ -234,46 +229,46 @@ static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
|
|
|
/* create all the DAG nodes */
|
|
|
unsigned i,j,k;
|
|
|
|
|
|
- for (k = 0; k < lu_nblocks; k++)
|
|
|
+ for (k = 0; k < nblocks; k++)
|
|
|
{
|
|
|
- struct starpu_task *task = create_task_11_pivot(dataAp, lu_nblocks, k, piv_description, get_block);
|
|
|
+ struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
|
|
|
|
|
|
/* we defer the launch of the first task */
|
|
|
if (k == 0) {
|
|
|
entry_task = task;
|
|
|
}
|
|
|
else {
|
|
|
- starpu_task_submit_to_ctx(task, sched_ctx);
|
|
|
+ starpu_task_submit(task);
|
|
|
}
|
|
|
|
|
|
- for (i = 0; i < lu_nblocks; i++)
|
|
|
+ for (i = 0; i < nblocks; i++)
|
|
|
{
|
|
|
if (i != k)
|
|
|
- create_task_pivot(dataAp, lu_nblocks, piv_description, k, i, get_block, sched_ctx);
|
|
|
+ create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
|
|
|
}
|
|
|
|
|
|
- for (i = k+1; i<lu_nblocks; i++)
|
|
|
+ for (i = k+1; i<nblocks; i++)
|
|
|
{
|
|
|
- create_task_12(dataAp, lu_nblocks, k, i, get_block, sched_ctx);
|
|
|
- create_task_21(dataAp, lu_nblocks, k, i, get_block, sched_ctx);
|
|
|
+ create_task_12(dataAp, nblocks, k, i, get_block);
|
|
|
+ create_task_21(dataAp, nblocks, k, i, get_block);
|
|
|
}
|
|
|
|
|
|
- for (i = k+1; i<lu_nblocks; i++)
|
|
|
+ for (i = k+1; i<nblocks; i++)
|
|
|
{
|
|
|
- for (j = k+1; j<lu_nblocks; j++)
|
|
|
+ for (j = k+1; j<nblocks; j++)
|
|
|
{
|
|
|
- create_task_22(dataAp, lu_nblocks, k, i, j, get_block, sched_ctx);
|
|
|
+ create_task_22(dataAp, nblocks, k, i, j, get_block);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* we wait the last task (TAG11(lu_nblocks - 1)) and all the pivot tasks */
|
|
|
- starpu_tag_t *tags = malloc(lu_nblocks*lu_nblocks*sizeof(starpu_tag_t));
|
|
|
+ /* we wait the last task (TAG11(nblocks - 1)) and all the pivot tasks */
|
|
|
+ starpu_tag_t *tags = malloc(nblocks*nblocks*sizeof(starpu_tag_t));
|
|
|
unsigned ndeps = 0;
|
|
|
|
|
|
- tags[ndeps++] = TAG11(lu_nblocks - 1);
|
|
|
+ tags[ndeps++] = TAG11(nblocks - 1);
|
|
|
|
|
|
- for (j = 0; j < lu_nblocks; j++)
|
|
|
+ for (j = 0; j < nblocks; j++)
|
|
|
{
|
|
|
for (i = 0; i < j; i++)
|
|
|
{
|
|
@@ -283,17 +278,16 @@ static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
|
|
|
|
|
|
/* schedule the codelet */
|
|
|
gettimeofday(&start, NULL);
|
|
|
- int ret = starpu_task_submit_to_ctx(entry_task, sched_ctx);
|
|
|
+ int ret = starpu_task_submit(entry_task);
|
|
|
if (STARPU_UNLIKELY(ret == -ENODEV))
|
|
|
{
|
|
|
- fprintf(stderr, "No worker may execute this task\n");
|
|
|
+ FPRINTF(stderr, "No worker may execute this task\n");
|
|
|
exit(-1);
|
|
|
}
|
|
|
|
|
|
/* stall the application until the end of computations */
|
|
|
starpu_tag_wait_array(ndeps, tags);
|
|
|
- printf("lu pivot finish waiting for %d blocks \n", lu_nblocks);
|
|
|
-// starpu_task_wait_for_all();
|
|
|
+/* starpu_task_wait_for_all(); */
|
|
|
|
|
|
gettimeofday(&end, NULL);
|
|
|
|
|
@@ -302,14 +296,14 @@ static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
|
|
|
}
|
|
|
|
|
|
starpu_data_handle get_block_with_striding(starpu_data_handle *dataAp,
|
|
|
- unsigned lu_nblocks __attribute__((unused)), unsigned j, unsigned i)
|
|
|
+ unsigned nblocks __attribute__((unused)), unsigned j, unsigned i)
|
|
|
{
|
|
|
/* we use filters */
|
|
|
return starpu_data_get_sub_data(*dataAp, 2, j, i);
|
|
|
}
|
|
|
|
|
|
|
|
|
-double STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned lu_nblocks, struct starpu_sched_ctx *sched_ctx)
|
|
|
+void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
|
|
|
{
|
|
|
starpu_data_handle dataA;
|
|
|
|
|
@@ -322,11 +316,11 @@ double STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned si
|
|
|
|
|
|
struct starpu_data_filter f;
|
|
|
f.filter_func = starpu_vertical_block_filter_func;
|
|
|
- f.nchildren = lu_nblocks;
|
|
|
+ f.nchildren = nblocks;
|
|
|
|
|
|
struct starpu_data_filter f2;
|
|
|
f2.filter_func = starpu_block_filter_func;
|
|
|
- f2.nchildren = lu_nblocks;
|
|
|
+ f2.nchildren = nblocks;
|
|
|
|
|
|
starpu_data_map_filters(dataA, 2, &f, &f2);
|
|
|
|
|
@@ -334,88 +328,89 @@ double STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned si
|
|
|
for (i = 0; i < size; i++)
|
|
|
ipiv[i] = i;
|
|
|
|
|
|
- struct piv_s *piv_description = malloc(lu_nblocks*sizeof(struct piv_s));
|
|
|
+ struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s));
|
|
|
unsigned block;
|
|
|
- for (block = 0; block < lu_nblocks; block++)
|
|
|
+ for (block = 0; block < nblocks; block++)
|
|
|
{
|
|
|
piv_description[block].piv = ipiv;
|
|
|
- piv_description[block].first = block * (size / lu_nblocks);
|
|
|
- piv_description[block].last = (block + 1) * (size / lu_nblocks);
|
|
|
+ piv_description[block].first = block * (size / nblocks);
|
|
|
+ piv_description[block].last = (block + 1) * (size / nblocks);
|
|
|
}
|
|
|
|
|
|
#if 0
|
|
|
unsigned j;
|
|
|
- for (j = 0; j < lu_nblocks; j++)
|
|
|
- for (i = 0; i < lu_nblocks; i++)
|
|
|
+ for (j = 0; j < nblocks; j++)
|
|
|
+ for (i = 0; i < nblocks; i++)
|
|
|
{
|
|
|
- printf("BLOCK %d %d %p\n", i, j, &matA[i*(size/lu_nblocks) + j * (size/lu_nblocks)*ld]);
|
|
|
+ printf("BLOCK %d %d %p\n", i, j, &matA[i*(size/nblocks) + j * (size/nblocks)*ld]);
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
double timing;
|
|
|
- timing = dw_codelet_facto_pivot(&dataA, piv_description, lu_nblocks, get_block_with_striding, sched_ctx);
|
|
|
+ timing = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding);
|
|
|
|
|
|
- fprintf(stderr, "Computation took (in ms)\n");
|
|
|
- fprintf(stderr, "%2.2f\n", timing/1000);
|
|
|
+ FPRINTF(stderr, "Computation took (in ms)\n");
|
|
|
+ FPRINTF(stderr, "%2.2f\n", timing/1000);
|
|
|
|
|
|
unsigned n = starpu_matrix_get_nx(dataA);
|
|
|
double flop = (2.0f*n*n*n)/3.0f;
|
|
|
- double gflops = flop/timing/1000.0f;
|
|
|
+ FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
|
|
|
|
|
|
/* gather all the data */
|
|
|
starpu_data_unpartition(dataA, 0);
|
|
|
- return gflops;
|
|
|
}
|
|
|
|
|
|
|
|
|
-starpu_data_handle get_block_with_no_striding(starpu_data_handle *dataAp, unsigned lu_nblocks, unsigned j, unsigned i)
|
|
|
+starpu_data_handle get_block_with_no_striding(starpu_data_handle *dataAp, unsigned nblocks, unsigned j, unsigned i)
|
|
|
{
|
|
|
/* dataAp is an array of data handle */
|
|
|
- return dataAp[i+j*lu_nblocks];
|
|
|
+ return dataAp[i+j*nblocks];
|
|
|
}
|
|
|
|
|
|
-double STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned lu_nblocks, struct starpu_sched_ctx *sched_ctx)
|
|
|
+void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
|
|
|
{
|
|
|
- starpu_data_handle *dataAp = malloc(lu_nblocks*lu_nblocks*sizeof(starpu_data_handle));
|
|
|
+ starpu_data_handle *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle));
|
|
|
|
|
|
/* monitor and partition the A matrix into blocks :
|
|
|
* one block is now determined by 2 unsigned (i,j) */
|
|
|
unsigned bi, bj;
|
|
|
- for (bj = 0; bj < lu_nblocks; bj++)
|
|
|
- for (bi = 0; bi < lu_nblocks; bi++)
|
|
|
+ for (bj = 0; bj < nblocks; bj++)
|
|
|
+ for (bi = 0; bi < nblocks; bi++)
|
|
|
{
|
|
|
- starpu_matrix_data_register(&dataAp[bi+lu_nblocks*bj], 0,
|
|
|
- (uintptr_t)matA[bi+lu_nblocks*bj], size/lu_nblocks,
|
|
|
- size/lu_nblocks, size/lu_nblocks, sizeof(TYPE));
|
|
|
+ starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0,
|
|
|
+ (uintptr_t)matA[bi+nblocks*bj], size/nblocks,
|
|
|
+ size/nblocks, size/nblocks, sizeof(TYPE));
|
|
|
|
|
|
/* We already enforce deps by hand */
|
|
|
- starpu_data_set_sequential_consistency_flag(dataAp[bi+lu_nblocks*bj], 0);
|
|
|
+ starpu_data_set_sequential_consistency_flag(dataAp[bi+nblocks*bj], 0);
|
|
|
}
|
|
|
|
|
|
unsigned i;
|
|
|
for (i = 0; i < size; i++)
|
|
|
ipiv[i] = i;
|
|
|
|
|
|
- struct piv_s *piv_description = malloc(lu_nblocks*sizeof(struct piv_s));
|
|
|
+ struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s));
|
|
|
unsigned block;
|
|
|
- for (block = 0; block < lu_nblocks; block++)
|
|
|
+ for (block = 0; block < nblocks; block++)
|
|
|
{
|
|
|
piv_description[block].piv = ipiv;
|
|
|
- piv_description[block].first = block * (size / lu_nblocks);
|
|
|
- piv_description[block].last = (block + 1) * (size / lu_nblocks);
|
|
|
+ piv_description[block].first = block * (size / nblocks);
|
|
|
+ piv_description[block].last = (block + 1) * (size / nblocks);
|
|
|
}
|
|
|
|
|
|
double timing;
|
|
|
- timing = dw_codelet_facto_pivot(dataAp, piv_description, lu_nblocks, get_block_with_no_striding, sched_ctx);
|
|
|
+ timing = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding);
|
|
|
+
|
|
|
+ FPRINTF(stderr, "Computation took (in ms)\n");
|
|
|
+ FPRINTF(stderr, "%2.2f\n", timing/1000);
|
|
|
|
|
|
- unsigned n = starpu_matrix_get_nx(dataAp[0])*lu_nblocks;
|
|
|
+ unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
|
|
|
double flop = (2.0f*n*n*n)/3.0f;
|
|
|
- double gflops = flop/timing/1000.0f;
|
|
|
+ FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
|
|
|
|
|
|
- for (bj = 0; bj < lu_nblocks; bj++)
|
|
|
- for (bi = 0; bi < lu_nblocks; bi++)
|
|
|
+ for (bj = 0; bj < nblocks; bj++)
|
|
|
+ for (bi = 0; bi < nblocks; bi++)
|
|
|
{
|
|
|
- starpu_data_unregister(dataAp[bi+lu_nblocks*bj]);
|
|
|
+ starpu_data_unregister(dataAp[bi+nblocks*bj]);
|
|
|
}
|
|
|
- return gflops;
|
|
|
}
|