Browse Source

Barrier for the 2 ctxs in order to start working at the same time

Andra Hugo 14 years ago
parent
commit
14ba4ba56f

+ 6 - 6
examples/cholesky_and_lu/cholesky/cholesky.h

@@ -54,8 +54,8 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
 
-static unsigned size = 4*1024;
-static unsigned nblocks = 16;
+//static unsigned size = 4*1024;
+//static unsigned nblocks = 16;
 static unsigned nbigblocks = 8;
 static unsigned nbigblocks = 8;
 static unsigned pinned = 0;
 static unsigned pinned = 0;
 static unsigned noprio = 0;
 static unsigned noprio = 0;
@@ -72,7 +72,7 @@ void chol_cublas_codelet_update_u22(void *descr[], void *_args);
 #endif
 #endif
 
 
 int run_cholesky_grain_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
 int run_cholesky_grain_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
-double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char **argv, double *timing);
+double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char **argv, double *timing, pthread_barrier_t *barrier);
 int run_cholesky_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
 int run_cholesky_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
 double run_cholesky_tile_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
 double run_cholesky_tile_tag(struct starpu_sched_ctx *sched_ctx, int argc, char **argv);
 
 
@@ -80,18 +80,18 @@ extern struct starpu_perfmodel_t chol_model_11;
 extern struct starpu_perfmodel_t chol_model_21;
 extern struct starpu_perfmodel_t chol_model_21;
 extern struct starpu_perfmodel_t chol_model_22;
 extern struct starpu_perfmodel_t chol_model_22;
 
 
-static void __attribute__((unused)) parse_args(int argc, char **argv)
+static void __attribute__((unused)) parse_args(int argc, char **argv, unsigned *size, unsigned *nblocks)
 {
 {
 	int i;
 	int i;
 	for (i = 1; i < argc; i++) {
 	for (i = 1; i < argc; i++) {
 		if (strcmp(argv[i], "-size") == 0) {
 		if (strcmp(argv[i], "-size") == 0) {
 		        char *argptr;
 		        char *argptr;
-			size = strtol(argv[++i], &argptr, 10);
+			(*size) = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
 		if (strcmp(argv[i], "-nblocks") == 0) {
 		if (strcmp(argv[i], "-nblocks") == 0) {
 		        char *argptr;
 		        char *argptr;
-			nblocks = strtol(argv[++i], &argptr, 10);
+			(*nblocks) = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
 		if (strcmp(argv[i], "-nbigblocks") == 0) {
 		if (strcmp(argv[i], "-nbigblocks") == 0) {

+ 6 - 6
examples/cholesky_and_lu/cholesky/cholesky_implicit.c

@@ -101,7 +101,6 @@ static double _cholesky(starpu_data_handle dataA, unsigned nblocks, struct starp
 		for (j = k+1; j<nblocks; j++)
 		for (j = k+1; j<nblocks; j++)
 		{
 		{
                         starpu_data_handle sdatakj = starpu_data_get_sub_data(dataA, 2, k, j);
                         starpu_data_handle sdatakj = starpu_data_get_sub_data(dataA, 2, k, j);
-
 			if(sched_ctx != NULL)
 			if(sched_ctx != NULL)
 				starpu_insert_task_to_ctx(sched_ctx, &cl21,
 				starpu_insert_task_to_ctx(sched_ctx, &cl21,
 							  STARPU_PRIORITY, (j == k+1)?prio_level:STARPU_DEFAULT_PRIO,
 							  STARPU_PRIORITY, (j == k+1)?prio_level:STARPU_DEFAULT_PRIO,
@@ -179,18 +178,19 @@ static double cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks
 		f2.get_child_ops = NULL;
 		f2.get_child_ops = NULL;
 
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 	starpu_data_map_filters(dataA, 2, &f, &f2);
-
 	return _cholesky(dataA, nblocks, sched_ctx, timing);
 	return _cholesky(dataA, nblocks, sched_ctx, timing);
 }
 }
 
 
-double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char **argv, double *timing)
+double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char **argv, double *timing, pthread_barrier_t *barrier)
 {
 {
 	/* create a simple definite positive symetric matrix example
 	/* create a simple definite positive symetric matrix example
 	 *
 	 *
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 	 * */
 
 
-	parse_args(argc, argv);
+	unsigned size = 4 * 1024;
+	unsigned nblocks = 16;
+	parse_args(argc, argv, &size, &nblocks);
 
 
 	//	starpu_init(NULL);
 	//	starpu_init(NULL);
 
 
@@ -198,7 +198,6 @@ double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char
 
 
 	float *mat;
 	float *mat;
 	starpu_data_malloc_pinned_if_possible((void **)&mat, (size_t)size*size*sizeof(float));
 	starpu_data_malloc_pinned_if_possible((void **)&mat, (size_t)size*size*sizeof(float));
-
 	unsigned i,j;
 	unsigned i,j;
 	for (i = 0; i < size; i++)
 	for (i = 0; i < size; i++)
 	{
 	{
@@ -227,7 +226,8 @@ double run_cholesky_implicit(struct starpu_sched_ctx *sched_ctx, int argc, char
 		printf("\n");
 		printf("\n");
 	}
 	}
 #endif
 #endif
-
+	if(barrier != NULL)
+	  pthread_barrier_wait(barrier);
 	double gflops = cholesky(mat, size, size, nblocks, sched_ctx, timing);
 	double gflops = cholesky(mat, size, size, nblocks, sched_ctx, timing);
 
 
 #ifdef PRINT_OUTPUT
 #ifdef PRINT_OUTPUT

+ 36 - 34
examples/cholesky_and_lu/cholesky_and_lu.c

@@ -17,13 +17,11 @@ struct starpu_sched_ctx sched_ctx;
 struct starpu_sched_ctx sched_ctx2;
 struct starpu_sched_ctx sched_ctx2;
 struct starpu_sched_ctx sched_ctx3;
 struct starpu_sched_ctx sched_ctx3;
 struct starpu_sched_ctx sched_ctx4;
 struct starpu_sched_ctx sched_ctx4;
+pthread_barrier_t barrier;
 
 
 void* func_cholesky(void *val){
 void* func_cholesky(void *val){
   params *p = (params*)val;
   params *p = (params*)val;
 
 
-  int procs[] = {1, 2, 3, 4, 5, 6};
-  starpu_create_sched_ctx(&sched_ctx, "heft", procs, 6, "cholesky1");
-
   int i;
   int i;
   retvals *rv  = (retvals*)malloc(sizeof(retvals));
   retvals *rv  = (retvals*)malloc(sizeof(retvals));
   rv->flops = 0;
   rv->flops = 0;
@@ -31,7 +29,7 @@ void* func_cholesky(void *val){
   double timing = 0;
   double timing = 0;
   for(i = 0; i < NSAMPLES; i++)
   for(i = 0; i < NSAMPLES; i++)
     {
     {
-      rv->flops += run_cholesky_implicit(&sched_ctx, p->argc, p->argv, &timing);
+      rv->flops += run_cholesky_implicit(&sched_ctx, p->argc, p->argv, &timing, &barrier);
       rv->avg_timing += timing;
       rv->avg_timing += timing;
     }
     }
 
 
@@ -43,9 +41,6 @@ void* func_cholesky(void *val){
 void* func_cholesky2(void *val){
 void* func_cholesky2(void *val){
   params *p = (params*)val;
   params *p = (params*)val;
 
 
-  int procs[] = {0, 7, 8, 9, 10, 11};
-  starpu_create_sched_ctx(&sched_ctx2, "heft", procs, 6, "cholesky2");
-
   int i;
   int i;
   retvals *rv  = (retvals*)malloc(sizeof(retvals));
   retvals *rv  = (retvals*)malloc(sizeof(retvals));
   rv->flops = 0;
   rv->flops = 0;
@@ -54,7 +49,7 @@ void* func_cholesky2(void *val){
 
 
   for(i = 0; i < NSAMPLES; i++)
   for(i = 0; i < NSAMPLES; i++)
     {
     {
-      rv->flops += run_cholesky_implicit(&sched_ctx2, p->argc, p->argv, &timing);
+      rv->flops += run_cholesky_implicit(&sched_ctx2, p->argc, p->argv, &timing, &barrier);
       rv->avg_timing += timing;
       rv->avg_timing += timing;
     }
     }
 
 
@@ -74,7 +69,7 @@ void* func_cholesky3(void *val){
 
 
   for(i = 0; i < NSAMPLES; i++)
   for(i = 0; i < NSAMPLES; i++)
     {
     {
-      rv->flops += run_cholesky_implicit(NULL, p->argc, p->argv, &timing);
+      rv->flops += run_cholesky_implicit(NULL, p->argc, p->argv, &timing, NULL);
       rv->avg_timing += timing;
       rv->avg_timing += timing;
     }
     }
 
 
@@ -89,7 +84,14 @@ void cholesky_vs_cholesky(params *p){
   starpu_init(NULL);
   starpu_init(NULL);
   starpu_helper_cublas_init();
   starpu_helper_cublas_init();
 
 
+  int procs[] = {1, 2, 3, 4, 5, 6};
+  starpu_create_sched_ctx(&sched_ctx, "heft", procs, 6, "cholesky1");
+
+  int procs2[] = {0, 7, 8, 9, 10, 11};
+  starpu_create_sched_ctx(&sched_ctx2, "heft", procs2, 6, "cholesky2");
+
   pthread_t tid[2];
   pthread_t tid[2];
+  pthread_barrier_init(&barrier, NULL, 2);
 
 
   pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p);
   pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p);
   pthread_create(&tid[1], NULL, (void*)func_cholesky2, (void*)p);
   pthread_create(&tid[1], NULL, (void*)func_cholesky2, (void*)p);
@@ -103,43 +105,43 @@ void cholesky_vs_cholesky(params *p){
   starpu_helper_cublas_shutdown();
   starpu_helper_cublas_shutdown();
   starpu_shutdown();
   starpu_shutdown();
 
 
-  /* 1 cholesky all alone on the whole machine */
-  starpu_init(NULL);
-  starpu_helper_cublas_init();
+  /* /\* 1 cholesky all alone on the whole machine *\/ */
+  /* starpu_init(NULL); */
+  /* starpu_helper_cublas_init(); */
 
 
-  void *gflops_cholesky3 = func_cholesky3(p);
+  /* void *gflops_cholesky3 = func_cholesky3(p); */
 
 
-  starpu_helper_cublas_shutdown();
-  starpu_shutdown();
+  /* starpu_helper_cublas_shutdown(); */
+  /* starpu_shutdown(); */
 
 
 
 
-  /* 2 cholesky in a single ctx */
-  starpu_init(NULL);
-  starpu_helper_cublas_init();
+  /* /\* 2 cholesky in a single ctx *\/ */
+  /* starpu_init(NULL); */
+  /* starpu_helper_cublas_init(); */
 
 
-  pthread_t tid2[2];
+  /* pthread_t tid2[2]; */
 
 
-  pthread_create(&tid2[0], NULL, (void*)func_cholesky3, (void*)p);
-  pthread_create(&tid2[1], NULL, (void*)func_cholesky3, (void*)p);
+  /* pthread_create(&tid2[0], NULL, (void*)func_cholesky3, (void*)p); */
+  /* pthread_create(&tid2[1], NULL, (void*)func_cholesky3, (void*)p); */
 
 
-  void *gflops_cholesky4;
-  void *gflops_cholesky5;
+  /* void *gflops_cholesky4; */
+  /* void *gflops_cholesky5; */
  
  
-  pthread_join(tid2[0], &gflops_cholesky4);
-  pthread_join(tid2[1], &gflops_cholesky5);
+  /* pthread_join(tid2[0], &gflops_cholesky4); */
+  /* pthread_join(tid2[1], &gflops_cholesky5); */
 
 
-  starpu_helper_cublas_shutdown();
-  starpu_shutdown();
+  /* starpu_helper_cublas_shutdown(); */
+  /* starpu_shutdown(); */
 
 
-  printf("%2.2f %2.2f %2.2f %2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops, ((retvals*)gflops_cholesky3)->flops, ((retvals*)gflops_cholesky4)->flops, ((retvals*)gflops_cholesky5)->flops);
+  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops, ((retvals*)gflops_cholesky3)->flops, ((retvals*)gflops_cholesky4)->flops, ((retvals*)gflops_cholesky5)->flops); */
 
 
-  printf("%2.2f %2.2f %2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, ((retvals*)gflops_cholesky3)->avg_timing, ((retvals*)gflops_cholesky4)->avg_timing, ((retvals*)gflops_cholesky5)->avg_timing);
+  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, ((retvals*)gflops_cholesky3)->avg_timing, ((retvals*)gflops_cholesky4)->avg_timing, ((retvals*)gflops_cholesky5)->avg_timing); */
 
 
-  free(gflops_cholesky1);
-  free(gflops_cholesky2);
-  free(gflops_cholesky3);
-  free(gflops_cholesky4);
-  free(gflops_cholesky5);
+  /* free(gflops_cholesky1); */
+  /* free(gflops_cholesky2); */
+  /* free(gflops_cholesky3); */
+  /* free(gflops_cholesky4); */
+  /* free(gflops_cholesky5); */
 }
 }
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)

+ 0 - 10
src/core/sched_policy.c

@@ -316,11 +316,6 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
 
 	_starpu_profiling_set_task_push_end_time(task);
 	_starpu_profiling_set_task_push_end_time(task);
 
 
- 	/* if(task) */
-	/*   { */
-	/*     printf("task %s pushed with strateg %s\n", task->name, task->sched_ctx->sched_policy->policy_name); */
-	/*   } */
-
         _STARPU_LOG_OUT();
         _STARPU_LOG_OUT();
         return ret;
         return ret;
 }
 }
@@ -354,11 +349,6 @@ struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker)
 		  }
 		  }
 	  }
 	  }
 
 
- 	/* if(task) */
-	/*   { */
-	/*     printf("task %s poped by th %d for %d  with strateg %s\n", task->name, worker->workerid, worker->arch, task->sched_ctx->sched_policy->policy_name); */
-	/*   } */
-
 	/* Note that we may get a NULL task in case the scheduler was unlocked
 	/* Note that we may get a NULL task in case the scheduler was unlocked
 	 * for some reason. */
 	 * for some reason. */
 	if (profiling && task)
 	if (profiling && task)

+ 0 - 1
src/drivers/cpu/driver_cpu.c

@@ -183,7 +183,6 @@ void *_starpu_cpu_worker(void *arg)
 
 
 		PTHREAD_MUTEX_UNLOCK(sched_mutex);	
 		PTHREAD_MUTEX_UNLOCK(sched_mutex);	
 
 
-
 		STARPU_ASSERT(task);
 		STARPU_ASSERT(task);
 		j = _starpu_get_job_associated_to_task(task);
 		j = _starpu_get_job_associated_to_task(task);
 	
 	

+ 1 - 1
tests/cholesky_and_lu/sched.sh

@@ -36,7 +36,7 @@ do
 
 
     echo "$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS"
     echo "$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS"
 
 
-    val=`STARPU_NCUDA=2 $ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS`
+    val=`$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS`
 
 
     echo "$size $val"
     echo "$size $val"
     echo "$size $val" >> $filename
     echo "$size $val" >> $filename