Browse Source

Perf changes

Andra Hugo 14 years ago
parent
commit
4ba5968806

+ 5 - 2
examples/cholesky_and_lu/cholesky/cholesky_implicit.c

@@ -135,8 +135,11 @@ static double _cholesky(starpu_data_handle dataA, unsigned nblocks, unsigned sch
 	unsigned long n = starpu_matrix_get_nx(dataA);
 
 	double flop = (1.0f*n*n*n)/3.0f;
-	
-	return (flop/(*timing)/1000.0f);
+
+	double gflops = (flop/(*timing)/1000.0f);
+	(*timing) /= 1000000.0f; //sec
+	(*timing) /= 60.0f; //min
+	return gflops;
 }
 
 static double cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned sched_ctx, double *timing)

+ 51 - 16
examples/cholesky_and_lu/cholesky_and_lu.c

@@ -13,7 +13,7 @@ typedef struct {
   double avg_timing;
 } retvals;
 
-#define NSAMPLES 1
+#define NSAMPLES 5
 
 pthread_barrier_t barrier;
 
@@ -37,28 +37,40 @@ void* func_cholesky(void *val){
   return (void*)rv;
 }
 
-void cholesky_vs_cholesky(params *p1, params *p2, params *p3){
+void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int ncpus2){
   /* 2 cholesky in different ctxs */
   starpu_init(NULL);
   starpu_helper_cublas_init();
 
-  int procs[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-		 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-		 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
-		 39, 40, 41, 42, 43, 44, 45, 46, 47,
-		 48, 49, 50, 51, 52, 53, 54,
-		 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
-		 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,  78,
-		 79, 80, 81};
-  p1->ctx = starpu_create_sched_ctx("heft", procs, 82, "cholesky1");
+  int procs[ncpus1];
+  int i;
+  for(i = 0; i < ncpus1; i++)
+    procs[i] = i;
+
+  p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
+
+  int procs2[ncpus2];
+
+  for(i = 0; i < ncpus2; i++)
+    procs2[i] = ncpus1+i;
 
-  int procs2[] =  { 82, 83, 84, 85, 86, 87, 88, 89, 90,
-		   91, 92, 93, 94, 95};
-  p2->ctx = starpu_create_sched_ctx("heft", procs2, 14, "cholesky2");
+  p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
+
+/*   int procs[] = {0, 2, 3, 4, 5, 6, 7, 8, 9, 10}; */
+/*   p1->ctx = starpu_create_sched_ctx("heft", procs, 10, "cholesky1"); */
+
+/*   int procs2[] =  {1, 11}; */
+/*   p2->ctx = starpu_create_sched_ctx("heft", procs2, 2, "cholesky2"); */
 
   pthread_t tid[2];
   pthread_barrier_init(&barrier, NULL, 2);
 
+  struct timeval start;
+  struct timeval end;
+
+  gettimeofday(&start, NULL);
+
+
   pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p1);
   pthread_create(&tid[1], NULL, (void*)func_cholesky, (void*)p2);
 
@@ -68,8 +80,17 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3){
   pthread_join(tid[0], &gflops_cholesky1);
   pthread_join(tid[1], &gflops_cholesky2);
 
+  gettimeofday(&end, NULL);
+
   starpu_helper_cublas_shutdown();
   starpu_shutdown();
+  
+  double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+  timing /= 1000000;
+  timing /= 60;
+
+  printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
+  printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
 
   /* /\* 1 cholesky all alone on the whole machine *\/ */
   /* starpu_init(NULL); */
@@ -113,6 +134,20 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3){
 int main(int argc, char **argv)
 {
   //  printf("argc = %d\n", argc);
+  int ncpus1, ncpus2;
+  int i;
+  for (i = 9; i < argc; i++) {
+    if (strcmp(argv[i], "-ncpus1") == 0) {
+      char *argptr;
+      ncpus1 = strtol(argv[++i], &argptr, 10);
+    }
+    
+    if (strcmp(argv[i], "-ncpus2") == 0) {
+      char *argptr;
+      ncpus2 = strtol(argv[++i], &argptr, 10);
+    }    
+  }
+  //  printf("%d %d\n", ncpus1, ncpus2);
   params p1;
   p1.start = 1;
   p1.argc = 5;
@@ -120,14 +155,14 @@ int main(int argc, char **argv)
 
   params p2;
   p2.start = 5;
-  p2.argc = argc;
+  p2.argc = 9;
   p2.argv = argv;
 
   params p3;
   p3.argc = argc;
   p3.argv = argv;
   p3.ctx = 0;
-  cholesky_vs_cholesky(&p1, &p2,&p3);
+  cholesky_vs_cholesky(&p1, &p2,&p3, ncpus1, ncpus2);
 
   return 0;
 }

+ 1 - 1
src/sched_policies/heft.c

@@ -166,7 +166,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 						double *local_data_penalty,
 						double *local_power, int *forced_best,
 						struct starpu_task_bundle *bundle,
-						 struct starpu_sched_ctx *sched_ctx )
+						struct starpu_sched_ctx *sched_ctx )
 {
   int calibrating = 0;
   double max_exp_end = DBL_MIN;

+ 74 - 13
tests/cholesky_and_lu/sched.sh

@@ -20,29 +20,90 @@
 DIR=$PWD
 ROOTDIR=$DIR/../..
 TIMINGDIR=$DIR/timings-sched/
-mkdir -p $TIMINGDIR
+#mkdir -p $TIMINGDIR
 BENCH_NAME=cholesky_and_lu
-ns=10
+nsamples=5
 
 filename=$TIMINGDIR/$BENCH_NAME
-    
-for blocks in `seq 10 2 24`
+
+nmaxcpus=96
+nmincpus1=40
+nmincpus2=30
+
+blocks1=60
+blocks2=40
+
+size1=$(($blocks1*1024))
+size2=$(($blocks2*1024))
+
+for i in `seq $nmincpus1 2 $(($nmaxcpus-1))`
 do
-    size=$(($blocks*1024))
-    
-    echo "size : $size"
+    if [ $i -gt $(($nmaxcpus-$nmincpus2)) ]
+    then
+	break
+    fi
+
+    ncpus1=$i
+    ncpus2=$(($nmaxcpus-$i))    
     
-    OPTIONS="-pin -nblocks $blocks -size $size"
+    OPTIONS="-pin -nblocks $blocks1 -size $size1 -nblocks $blocks2 -size $size2 -ncpus1 $ncpus1 -ncpus2 $ncpus2"
 
-    echo "$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS"
+    gflops1_avg=0
+    gflops2_avg=0
 
-    val=`$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS`
+    t1_avg=0
+    t2_avg=0
+    t_total_avg=0
 
-    echo "$size $val"
-    echo "$size $val" >> $filename
-done
+    for s in `seq 1 $nsamples`
+    do
+	echo "$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS"
+	
+	val=`$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS`
+
+	echo "$val"
 
+	val=`echo $val|tr " " "\n"`
+	
+	i=0
+	for x in $val
+	do
+	    if [ $i -eq 0 ]
+	    then
+		gflops1_avg=$(echo "$gflops1_avg + $x"|bc -l)
+	    fi
+	    if [ $i -eq 1 ]
+	    then
+		gflops2_avg=$(echo "$gflops2_avg+$x"|bc -l)
+	    fi
+	    if [ $i -eq 2 ]
+	    then
+		t1_avg=$(echo "$t1_avg+$x"|bc -l)
+	    fi
+	    
+	    if [ $i -eq 3 ]
+	    then
+		t2_avg=$(echo "$t2_avg+$x"|bc -l)
+	    fi
 
+	    if [ $i -eq 4 ]
+	    then
+		t_total_avg=$(echo "$t_total_avg+$x"|bc -l)
+	    fi
+	    i=$(($i+1))
+	done
+	
+    done
 
+    gflops1_avg=$(echo "$gflops1_avg / $nsamples"|bc -l)
+    gflops2_avg=$(echo "$gflops2_avg / $nsamples"|bc -l)
+    t1_avg=$(echo "$t1_avg / $nsamples"|bc -l)
+    t2_avg=$(echo "$t2_avg / $nsamples"|bc -l)
+    t_total_avg=$(echo "$t_total_avg / $nsamples"|bc -l)
+
+    echo "$ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`"
+    echo "$ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`" >> $filename
+
+done