Browse Source

small modifs

Andra Hugo 14 years ago
parent
commit
fbb9f8dfd7

+ 2 - 1
examples/cholesky_and_lu/cholesky/cholesky_implicit.c

@@ -136,9 +136,10 @@ static double _cholesky(starpu_data_handle dataA, unsigned nblocks, unsigned sch
 
 	double flop = (1.0f*n*n*n)/3.0f;
 
+	//	printf("n= %2.2f\n", flop);
 	double gflops = (flop/(*timing)/1000.0f);
 	(*timing) /= 1000000.0f; //sec
-	(*timing) /= 60.0f; //min
+	//	(*timing) /= 60.0f; //min
 	return gflops;
 }
 

+ 67 - 31
examples/cholesky_and_lu/cholesky_and_lu.c

@@ -16,7 +16,7 @@ typedef struct {
   double avg_timing;
 } retvals;
 
-#define NSAMPLES 5
+#define NSAMPLES 3
 int first = 1;
 pthread_mutex_t mut;
 
@@ -40,11 +40,10 @@ void* func_cholesky(void *val){
 
 
   pthread_mutex_lock(&mut);
-  if(first)
-    {
+  if(first){
       starpu_delete_sched_ctx(p->ctx, the_other_ctx);
-      //      starpu_add_workers_to_sched_ctx(p->procs, p->ncpus, the_other_ctx);
-    }
+  }
+
   first = 0;
   pthread_mutex_unlock(&mut);
  
@@ -55,10 +54,13 @@ void* func_cholesky(void *val){
 }
 
 void cholesky_vs_cholesky(params *p1, params *p2, params *p3, 
-			  unsigned cpu_start1, unsigned cpu_start2,
-			  unsigned cpu_end1, unsigned cpu_end2){
-  int ncpus1 = cpu_end1 - cpu_start1;
-  int ncpus2 = cpu_end2 - cpu_start2;
+			  //			  unsigned cpu_start1, unsigned cpu_start2,
+			  //			  unsigned cpu_end1, unsigned cpu_end2,
+			  unsigned cpu1, unsigned cpu2,
+			  unsigned gpu, unsigned gpu1, unsigned gpu2){
+
+  int ncpus1 = cpu1 + gpu + gpu1;
+  int ncpus2 = cpu2 + gpu + gpu2;
 
   /* 2 cholesky in different ctxs */
   starpu_init(NULL);
@@ -67,12 +69,26 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
   int procs[ncpus1];
   int i;
   int k = 0;
-  for(i = cpu_start1; i < cpu_end1; i++)
+
+  for(i = 0; i < gpu; i++)
     {
-      printf("%d ", i);
       procs[k++] = i;
+      //      printf("%d ", i);
     }
-  printf("\n");
+
+  for(i = gpu; i < gpu + gpu1; i++)
+    {
+      procs[k++] = i;
+      //      printf("%d ", i);
+    }
+
+  for(i = 3; i < 3 + cpu1; i++)
+    {
+      procs[k++] = i;
+      //  printf("%d ", i);
+    }
+  //  printf("\n");
+
   p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
   p2->the_other_ctx = (int)p1->ctx;
   p1->procs = procs;
@@ -80,12 +96,23 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
   int procs2[ncpus2];
 
   k = 0;
-  for(i = cpu_start2; i < cpu_end2; i++){
-    printf("%d ", i);
+
+  for(i = 0; i < gpu; i++){
+    procs2[k++] = i;
+    //printf("%d ", i);
+  }
+
+  for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++){
+    procs2[k++] = i;
+    //    printf("%d ", i);
+  }
+
+  for(i = 3  + cpu1; i < 3 + cpu1 + cpu2; i++){
     procs2[k++] = i;
+    //    printf("%d ", i);
   }
 
-  printf("\n");
+  //   printf("\n");
 
   p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
   p1->the_other_ctx = (int)p2->ctx;
@@ -119,41 +146,50 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
   
   double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
   timing /= 1000000;
-  timing /= 60;
+  //  timing /= 60;
 
   printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
   printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
-
+  //printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, 0.0 );    
+  //  printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, 0.0, timing);
 
 }
 
 int main(int argc, char **argv)
 {
-  unsigned cpu_start1 = 0, cpu_end1 = 0, cpu_start2 = 0, cpu_end2 = 0;
+  unsigned cpu1 = 0, cpu2 = 0;
 
+  unsigned gpu = 0, gpu1 = 0, gpu2 = 0;
   int i;
   
   for (i = 9; i < argc; i++) {
-    if (strcmp(argv[i], "-cpu_start1") == 0) {
+
+    if (strcmp(argv[i], "-cpu1") == 0) {
       char *argptr;
-      cpu_start1 = strtol(argv[++i], &argptr, 10);
-    }
-    
-    if (strcmp(argv[i], "-cpu_start2") == 0) {
+      cpu1 = strtol(argv[++i], &argptr, 10);
+    }    
+
+    if (strcmp(argv[i], "-cpu2") == 0) {
       char *argptr;
-      cpu_start2 = strtol(argv[++i], &argptr, 10);
+      cpu2 = strtol(argv[++i], &argptr, 10);
     }    
 
-    if (strcmp(argv[i], "-cpu_end1") == 0) {
+    if (strcmp(argv[i], "-gpu") == 0) {
       char *argptr;
-      cpu_end1 = strtol(argv[++i], &argptr, 10);
-    }
-    
-    if (strcmp(argv[i], "-cpu_end2") == 0) {
+      gpu = strtol(argv[++i], &argptr, 10);
+    }    
+
+    if (strcmp(argv[i], "-gpu1") == 0) {
       char *argptr;
-      cpu_end2 = strtol(argv[++i], &argptr, 10);
+      gpu1 = strtol(argv[++i], &argptr, 10);
     }    
 
+    if (strcmp(argv[i], "-gpu2") == 0) {
+      char *argptr;
+      gpu2 = strtol(argv[++i], &argptr, 10);
+    }    
+
+
   }
 
   params p1;
@@ -170,7 +206,7 @@ int main(int argc, char **argv)
   p3.argc = argc;
   p3.argv = argv;
   p3.ctx = 0;
-  cholesky_vs_cholesky(&p1, &p2,&p3, cpu_start1, cpu_start2, cpu_end1, cpu_end2);
+  cholesky_vs_cholesky(&p1, &p2,&p3, cpu1, cpu2, gpu, gpu1, gpu2);
 
   return 0;
 }

+ 41 - 15
tests/cholesky_and_lu/sched.sh

@@ -24,31 +24,47 @@ mkdir -p $TIMINGDIR
 BENCH_NAME=cholesky_and_lu
 nsamples=3
 
-filename=$TIMINGDIR/$BENCH_NAME
+filename=$TIMINGDIR/$BENCH_NAME_$1
+
+gpu=$2
+gpu1=$3
+gpu2=$4
+
+nmaxcpus=9
 
-nmaxcpus=12
 nmincpus1=1
 nmincpus2=1
 
+if [ $gpu1 -gt 0 ]
+then
+    nmincpus1=0
+fi
+
+if [ $gpu2 -gt 0 ]
+then
+    nmincpus2=0
+fi
+
+
 blocks1=40
-blocks2=20
+blocks2=40
 
 size1=20000
 size2=10000
 #size1=$(($blocks1*1024))
 #size2=$(($blocks2*1024))
 
-for i in `seq $nmincpus1 1 $(($nmaxcpus-1))`
+for j in `seq $nmincpus1 1 $(($nmaxcpus-1))`
 do
-    if [ $i -gt $(($nmaxcpus-$nmincpus2)) ]
+    if [ $j -gt $(($nmaxcpus-$nmincpus2)) ]
     then
 	break
     fi
 
-    ncpus1=$i
-    ncpus2=$(($nmaxcpus-$i))    
+    ncpus1=$j
+    ncpus2=$(($nmaxcpus-$j))    
     
-    OPTIONS="-pin -nblocks $blocks1 -size $size1 -nblocks $blocks2 -size $size2 -ncpus1 $ncpus1 -ncpus2 $ncpus2"
+    OPTIONS="-pin -nblocks $blocks1 -size $size1 -nblocks $blocks2 -size $size2 -gpu $gpu -gpu1 $gpu1 -gpu2 $gpu2 -cpu1 $ncpus1 -cpu2 $ncpus2"
 
     gflops1_avg=0
     gflops2_avg=0
@@ -57,6 +73,8 @@ do
     t2_avg=0
     t_total_avg=0
 
+    exec_nsamples=$nsamples
+
     for s in `seq 1 $nsamples`
     do
 	echo "$ROOTDIR/examples/$BENCH_NAME/$BENCH_NAME $OPTIONS"
@@ -94,17 +112,25 @@ do
 	    fi
 	    i=$(($i+1))
 	done
+
+
+	# if [ "$val" == "" ]
+	# then
+	#     echo "no val"
+	#     exec_nsamples=$(($exec_nsamples-1))
+	# fi
 	
     done
 
-    gflops1_avg=$(echo "$gflops1_avg / $nsamples"|bc -l)
-    gflops2_avg=$(echo "$gflops2_avg / $nsamples"|bc -l)
-    t1_avg=$(echo "$t1_avg / $nsamples"|bc -l)
-    t2_avg=$(echo "$t2_avg / $nsamples"|bc -l)
-    t_total_avg=$(echo "$t_total_avg / $nsamples"|bc -l)
+    gflops1_avg=$(echo "$gflops1_avg / $exec_nsamples"|bc -l)
+    gflops2_avg=$(echo "$gflops2_avg / $exec_nsamples"|bc -l)
+    t1_avg=$(echo "$t1_avg / $exec_nsamples"|bc -l)
+    t2_avg=$(echo "$t2_avg / $exec_nsamples"|bc -l)
+    t_total_avg=$(echo "$t_total_avg / $exec_nsamples"|bc -l)
 
-    echo "$ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`"
-    echo "$ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`" >> $filename
+    echo "$exec_nsamples"
+    echo "$gpu $gpu1 $gpu2 $ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`"
+    echo "$gpu $gpu1 $gpu2 $ncpus1 $ncpus2 `printf '%2.2f %2.2f %2.2f %2.2f %2.2f' $gflops1_avg $gflops2_avg $t1_avg $t2_avg $t_total_avg`" >> $filename
 
 done