Prechádzať zdrojové kódy

modified redimention

Andra Hugo 14 rokov pred
rodič
commit
3afb6621cb

+ 2 - 2
examples/cholesky_and_lu/cholesky/cholesky_implicit.c

@@ -211,8 +211,8 @@ double run_cholesky_implicit(unsigned sched_ctx, int start, int argc, char **arg
 		printf("\n");
 	}
 #endif
- 	if(barrier != NULL)
-	  pthread_barrier_wait(barrier);
+	// 	if(barrier != NULL)
+	//	  pthread_barrier_wait(barrier);
 	double gflops = cholesky(mat, size, size, nblocks, sched_ctx, timing);
 
 #ifdef PRINT_OUTPUT

+ 28 - 45
examples/cholesky_and_lu/cholesky_and_lu.c

@@ -6,6 +6,9 @@ typedef struct {
   int argc;
   char **argv;
   unsigned ctx;
+  int the_other_ctx;
+  int *procs;
+  int ncpus;
 } params;
 
 typedef struct {
@@ -13,13 +16,16 @@ typedef struct {
   double avg_timing;
 } retvals;
 
-#define NSAMPLES 5
+#define NSAMPLES 3
+int first = 1;
+pthread_mutex_t mut;
 
 pthread_barrier_t barrier;
 
 void* func_cholesky(void *val){
   params *p = (params*)val;
   unsigned sched_ctx = p->ctx;
+  int the_other_ctx = p->the_other_ctx;
 
   int i;
   retvals *rv  = (retvals*)malloc(sizeof(retvals));
@@ -32,6 +38,17 @@ void* func_cholesky(void *val){
       rv->avg_timing += timing;
     }
 
+
+  pthread_mutex_lock(&mut);
+  if(first)
+    {
+      starpu_delete_sched_ctx(p->ctx);
+      starpu_add_workers_to_sched_ctx(p->procs, p->ncpus, the_other_ctx);
+    }
+  first = 0;
+  pthread_mutex_unlock(&mut);
+ 
+
   rv->flops /= NSAMPLES;
   rv->avg_timing /= NSAMPLES;
   return (void*)rv;
@@ -48,22 +65,22 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
     procs[i] = i;
 
   p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
-
+  p2->the_other_ctx = (int)p1->ctx;
+  p1->procs = procs;
+  p1->ncpus = ncpus1;
   int procs2[ncpus2];
 
   for(i = 0; i < ncpus2; i++)
     procs2[i] = ncpus1+i;
 
   p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
-
-/*   int procs[] = {0, 2, 3, 4, 5, 6, 7, 8, 9, 10}; */
-/*   p1->ctx = starpu_create_sched_ctx("heft", procs, 10, "cholesky1"); */
-
-/*   int procs2[] =  {1, 11}; */
-/*   p2->ctx = starpu_create_sched_ctx("heft", procs2, 2, "cholesky2"); */
+  p1->the_other_ctx = (int)p2->ctx;
+  p2->procs = procs2;
+  p2->ncpus = ncpus2;
 
   pthread_t tid[2];
   pthread_barrier_init(&barrier, NULL, 2);
+  pthread_mutex_init(&mut, NULL);
 
   struct timeval start;
   struct timeval end;
@@ -82,6 +99,7 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
 
   gettimeofday(&end, NULL);
 
+  pthread_mutex_destroy(&mut);
   starpu_helper_cublas_shutdown();
   starpu_shutdown();
   
@@ -92,50 +110,15 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
   printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
   printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
 
-  /* /\* 1 cholesky all alone on the whole machine *\/ */
-  /* starpu_init(NULL); */
-  /* starpu_helper_cublas_init(); */
-
-  /* void *gflops_cholesky3 = func_cholesky((void*)p3); */
-
-  /* starpu_helper_cublas_shutdown(); */
-  /* starpu_shutdown(); */
 
-
-  /* /\* 2 cholesky in a single ctx *\/ */
-  /* starpu_init(NULL); */
-  /* starpu_helper_cublas_init(); */
-
-  /* pthread_t tid2[2]; */
-
-  /* pthread_create(&tid2[0], NULL, (void*)func_cholesky, (void*)p3); */
-  /* pthread_create(&tid2[1], NULL, (void*)func_cholesky, (void*)p3); */
-
-  /* void *gflops_cholesky4; */
-  /* void *gflops_cholesky5; */
- 
-  /* pthread_join(tid2[0], &gflops_cholesky4); */
-  /* pthread_join(tid2[1], &gflops_cholesky5); */
-
-  /* starpu_helper_cublas_shutdown(); */
-  /* starpu_shutdown(); */
-
-  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops, ((retvals*)gflops_cholesky3)->flops, ((retvals*)gflops_cholesky4)->flops, ((retvals*)gflops_cholesky5)->flops); */
-
-  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, ((retvals*)gflops_cholesky3)->avg_timing, ((retvals*)gflops_cholesky4)->avg_timing, ((retvals*)gflops_cholesky5)->avg_timing); */
-
-  /* free(gflops_cholesky1); */
-  /* free(gflops_cholesky2); */
-  /* free(gflops_cholesky3); */
-  /* free(gflops_cholesky4); */
-  /* free(gflops_cholesky5); */
 }
 
 int main(int argc, char **argv)
 {
   //  printf("argc = %d\n", argc);
-  int ncpus1, ncpus2;
+  int ncpus1=0, ncpus2=0;
   int i;
+  
   for (i = 9; i < argc; i++) {
     if (strcmp(argv[i], "-ncpus1") == 0) {
       char *argptr;

+ 3 - 0
include/starpu_scheduler.h

@@ -66,6 +66,9 @@ struct starpu_sched_policy_s {
 	/* Initialize the scheduling policy. */
 	void (*init_sched)(unsigned);
 
+	/* Initialize the scheduling policy only for certain workers. */
+	void (*init_sched_for_workers)(unsigned, int);
+
 	/* Cleanup the scheduling policy. */
 	void (*deinit_sched)(unsigned);
 

+ 38 - 17
src/core/sched_ctx.c

@@ -199,6 +199,7 @@ static void _starpu_remove_sched_ctx_from_worker(struct starpu_worker_s *workera
 		  {
 			workerarg->sched_ctx[i] = NULL;
 			to_remove = 1;
+			break;
 		  }
 	  }
 	
@@ -208,27 +209,38 @@ static void _starpu_remove_sched_ctx_from_worker(struct starpu_worker_s *workera
 	return;
 }
 
+static void _starpu_manage_delete_sched_ctx(struct starpu_sched_ctx *sched_ctx)
+{
+	int nworkers = sched_ctx->nworkers_in_ctx;
+	int workerid;
+	int i;
+	for(i = 0; i < nworkers; i++)
+	  {
+	    workerid = sched_ctx->workerid[i];
+	    struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
+	    _starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
+	  }
+	
+	free(sched_ctx->sched_policy);
+	free(sched_ctx->sched_mutex);
+	free(sched_ctx->sched_cond);
+	sched_ctx->sched_policy = NULL;
+	struct starpu_machine_config_s *config = _starpu_get_machine_config();
+	config->topology.nsched_ctxs--;
+
+}
 void starpu_delete_sched_ctx(unsigned sched_ctx_id)
 {
 	if(!starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
 	  {
-		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
 
-		int nworkers = sched_ctx->nworkers_in_ctx;
-		int workerid;
+		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
 
-		int i;
-		for(i = 0; i < nworkers; i++)
-		  {
-			workerid = sched_ctx->workerid[i];
-			struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
-			_starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
-		  }
-	
-		free(sched_ctx->sched_policy);
-		free(sched_ctx->sched_mutex);
-		free(sched_ctx->sched_cond);
-		sched_ctx->sched_policy = NULL;
+		/* block the workers until the contex is switched */
+		set_changing_ctx_flag(STATUS_CHANGING_CTX, sched_ctx->nworkers_in_ctx, sched_ctx->workerid);
+		_starpu_manage_delete_sched_ctx(sched_ctx);
+		/* also wait the workers to wake up before using the context */
+		set_changing_ctx_flag(STATUS_UNKNOWN, sched_ctx->nworkers_in_ctx, sched_ctx->workerid);
 	  }		
 	return;	
 }
@@ -241,7 +253,13 @@ void _starpu_delete_all_sched_ctxs()
 
 	for(i = 0; i < nsched_ctxs; i++)
 	  {
-	    starpu_delete_sched_ctx((int)i);
+	    if(!starpu_wait_for_all_tasks_of_sched_ctx(i))
+	      {
+
+		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(i);
+
+		_starpu_manage_delete_sched_ctx(sched_ctx);
+	      }		
 	  }
 	return;
 }
@@ -350,8 +368,11 @@ static void _starpu_add_workers_to_sched_ctx(int *workerids_in_ctx, int nworkeri
 				  }
 			  }
 		  }
-		sched_ctx->nworkers_in_ctx = nworkerids_in_ctx;
+		sched_ctx->nworkers_in_ctx += nworkerids_in_ctx;
 	  }
+
+	sched_ctx->sched_policy->init_sched_for_workers(sched_ctx->sched_ctx_id, nworkerids_in_ctx);
+
 	return;
 }
 

+ 1 - 0
src/core/sched_policy.c

@@ -100,6 +100,7 @@ static void load_sched_policy(struct starpu_sched_policy_s *sched_policy, struct
 	policy->pop_every_task = sched_policy->pop_every_task;
 	policy->push_task_notify = sched_policy->push_task_notify;
 	policy->policy_name = sched_policy->policy_name;
+	policy->init_sched_for_workers = sched_policy->init_sched_for_workers;
 }
 
 static struct starpu_sched_policy_s *find_sched_policy_from_name(const char *policy_name)

+ 3 - 0
src/core/topology.c

@@ -328,6 +328,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 		config->workers[topology->nworkers + cudagpu].devid = devid;
 		config->workers[topology->nworkers + cudagpu].perf_arch = arch; 
 		config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
+		config->workers[topology->nworkers + cudagpu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
 		config->worker_mask |= STARPU_CUDA;
 
                 uint32_t key = _starpu_crc32_be(devid, 0);
@@ -394,6 +395,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 		config->workers[topology->nworkers + openclgpu].devid = devid;
 		config->workers[topology->nworkers + openclgpu].perf_arch = arch; 
 		config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
+		config->workers[topology->nworkers + openclgpu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
 		config->worker_mask |= STARPU_OPENCL;
 	}
 
@@ -428,6 +430,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 		config->workers[topology->nworkers + spu].id = spu;
 		config->workers[topology->nworkers + spu].worker_is_running = 0;
 		config->workers[topology->nworkers + spu].worker_mask = STARPU_GORDON;
+		config->workers[topology->nworkers + spu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
 		config->worker_mask |= STARPU_GORDON;
 	}
 

+ 47 - 5
src/sched_policies/heft.c

@@ -37,6 +37,29 @@ typedef struct {
 	double *ntasks;
 } heft_data;
 
+static void heft_init_for_workers(unsigned sched_ctx_id, int nnew_workers)
+{
+	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
+
+	unsigned nworkers_ctx = sched_ctx->nworkers_in_ctx;
+	heft_data *hd = (heft_data*)sched_ctx->policy_data;
+
+	unsigned initial_nworkers = nworkers_ctx - nnew_workers;
+
+	unsigned workerid_ctx;
+	for (workerid_ctx = initial_nworkers; workerid_ctx < nworkers_ctx; workerid_ctx++)
+	  {
+	    hd->exp_start[workerid_ctx] = starpu_timing_now();
+	    hd->exp_len[workerid_ctx] = 0.0;
+	    hd->exp_end[workerid_ctx] = hd->exp_start[workerid_ctx]; 
+	    hd->ntasks[workerid_ctx] = 0;
+	    	    
+	    sched_ctx->sched_mutex[workerid_ctx] = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
+	    sched_ctx->sched_cond[workerid_ctx] = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
+	    PTHREAD_MUTEX_INIT(sched_ctx->sched_mutex[workerid_ctx], NULL);
+	    PTHREAD_COND_INIT(sched_ctx->sched_cond[workerid_ctx], NULL);
+	  }
+}
 static void heft_init(unsigned sched_ctx_id)
 {
 	heft_data *hd = (heft_data*)malloc(sizeof(heft_data));
@@ -50,10 +73,10 @@ static void heft_init(unsigned sched_ctx_id)
 	unsigned nworkers = sched_ctx->nworkers_in_ctx;
 	sched_ctx->policy_data = (void*)hd;
 
-	hd->exp_start = (double*)malloc(nworkers*sizeof(double));
-	hd->exp_end = (double*)malloc(nworkers*sizeof(double));
-	hd->exp_len = (double*)malloc(nworkers*sizeof(double));
-	hd->ntasks = (double*)malloc(nworkers*sizeof(double));
+	hd->exp_start = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
+	hd->exp_end = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
+	hd->exp_len = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
+	hd->ntasks = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
 
 	const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
 	if (strval_alpha)
@@ -399,6 +422,23 @@ static void heft_deinit(unsigned sched_ctx_id)
 	free(ht);
 }
 
+
+static void heft_deinit_for_workers(unsigned sched_ctx_id) 
+{
+  //TODO: solve pb with indexes before
+	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
+	int workerid_in_ctx;
+	int nworkers = sched_ctx->nworkers_in_ctx;
+
+	for (workerid_in_ctx = 0; workerid_in_ctx < nworkers; workerid_in_ctx++){
+		PTHREAD_MUTEX_DESTROY(sched_ctx->sched_mutex[workerid_in_ctx]);
+		PTHREAD_COND_DESTROY(sched_ctx->sched_cond[workerid_in_ctx]);
+		free(sched_ctx->sched_mutex[workerid_in_ctx]);
+		free(sched_ctx->sched_cond[workerid_in_ctx]);
+	}
+
+}
+
 struct starpu_sched_policy_s heft_policy = {
 	.init_sched = heft_init,
 	.deinit_sched = heft_deinit,
@@ -409,5 +449,7 @@ struct starpu_sched_policy_s heft_policy = {
 	.pop_every_task = NULL,
 	.post_exec_hook = heft_post_exec_hook,
 	.policy_name = "heft",
-	.policy_description = "Heterogeneous Earliest Finish Task"
+	.policy_description = "Heterogeneous Earliest Finish Task",
+	.init_sched_for_workers = heft_init_for_workers
+	
 };

+ 12 - 10
tests/cholesky_and_lu/sched.sh

@@ -20,23 +20,25 @@
 DIR=$PWD
 ROOTDIR=$DIR/../..
 TIMINGDIR=$DIR/timings-sched/
-#mkdir -p $TIMINGDIR
+mkdir -p $TIMINGDIR
 BENCH_NAME=cholesky_and_lu
-nsamples=5
+nsamples=3
 
 filename=$TIMINGDIR/$BENCH_NAME
 
-nmaxcpus=96
-nmincpus1=40
-nmincpus2=30
+nmaxcpus=12
+nmincpus1=1
+nmincpus2=1
 
-blocks1=60
-blocks2=40
+blocks1=40
+blocks2=20
 
-size1=$(($blocks1*1024))
-size2=$(($blocks2*1024))
+size1=20000
+size2=10000
+#size1=$(($blocks1*1024))
+#size2=$(($blocks2*1024))
 
-for i in `seq $nmincpus1 2 $(($nmaxcpus-1))`
+for i in `seq $nmincpus1 1 $(($nmaxcpus-1))`
 do
     if [ $i -gt $(($nmaxcpus-$nmincpus2)) ]
     then