14 rokov pred · 3afb6621cb
--- a/examples/cholesky_and_lu/cholesky/cholesky_implicit.c
+++ b/examples/cholesky_and_lu/cholesky/cholesky_implicit.c
@@ -211,8 +211,8 @@ double run_cholesky_implicit(unsigned sched_ctx, int start, int argc, char **arg
 
				 		printf("\n");
			
 
				 	}
			
 
				 #endif
			
 
				- 	if(barrier != NULL)
			
 
				-	  pthread_barrier_wait(barrier);
			
 
				+	// 	if(barrier != NULL)
			
 
				+	//	  pthread_barrier_wait(barrier);
			
 
				 	double gflops = cholesky(mat, size, size, nblocks, sched_ctx, timing);
			
 
				 
			
 
				 #ifdef PRINT_OUTPUT
			
--- a/examples/cholesky_and_lu/cholesky_and_lu.c
+++ b/examples/cholesky_and_lu/cholesky_and_lu.c
@@ -6,6 +6,9 @@ typedef struct {
 
				   int argc;
			
 
				   char **argv;
			
 
				   unsigned ctx;
			
 
				+  int the_other_ctx;
			
 
				+  int *procs;
			
 
				+  int ncpus;
			
 
				 } params;
			
 
				 
			
 
				 typedef struct {
			
@@ -13,13 +16,16 @@ typedef struct {
 
				   double avg_timing;
			
 
				 } retvals;
			
 
				 
			
 
				-#define NSAMPLES 5
			
 
				+#define NSAMPLES 3
			
 
				+int first = 1;
			
 
				+pthread_mutex_t mut;
			
 
				 
			
 
				 pthread_barrier_t barrier;
			
 
				 
			
 
				 void* func_cholesky(void *val){
			
 
				   params *p = (params*)val;
			
 
				   unsigned sched_ctx = p->ctx;
			
 
				+  int the_other_ctx = p->the_other_ctx;
			
 
				 
			
 
				   int i;
			
 
				   retvals *rv  = (retvals*)malloc(sizeof(retvals));
			
@@ -32,6 +38,17 @@ void* func_cholesky(void *val){
 
				       rv->avg_timing += timing;
			
 
				     }
			
 
				 
			
 
				+
			
 
				+  pthread_mutex_lock(&mut);
			
 
				+  if(first)
			
 
				+    {
			
 
				+      starpu_delete_sched_ctx(p->ctx);
			
 
				+      starpu_add_workers_to_sched_ctx(p->procs, p->ncpus, the_other_ctx);
			
 
				+    }
			
 
				+  first = 0;
			
 
				+  pthread_mutex_unlock(&mut);
			
 
				+ 
			
 
				+
			
 
				   rv->flops /= NSAMPLES;
			
 
				   rv->avg_timing /= NSAMPLES;
			
 
				   return (void*)rv;
			
@@ -48,22 +65,22 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
 
				     procs[i] = i;
			
 
				 
			
 
				   p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
			
 
				-
			
 
				+  p2->the_other_ctx = (int)p1->ctx;
			
 
				+  p1->procs = procs;
			
 
				+  p1->ncpus = ncpus1;
			
 
				   int procs2[ncpus2];
			
 
				 
			
 
				   for(i = 0; i < ncpus2; i++)
			
 
				     procs2[i] = ncpus1+i;
			
 
				 
			
 
				   p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
			
 
				-
			
 
				-/*   int procs[] = {0, 2, 3, 4, 5, 6, 7, 8, 9, 10}; */
			
 
				-/*   p1->ctx = starpu_create_sched_ctx("heft", procs, 10, "cholesky1"); */
			
 
				-
			
 
				-/*   int procs2[] =  {1, 11}; */
			
 
				-/*   p2->ctx = starpu_create_sched_ctx("heft", procs2, 2, "cholesky2"); */
			
 
				+  p1->the_other_ctx = (int)p2->ctx;
			
 
				+  p2->procs = procs2;
			
 
				+  p2->ncpus = ncpus2;
			
 
				 
			
 
				   pthread_t tid[2];
			
 
				   pthread_barrier_init(&barrier, NULL, 2);
			
 
				+  pthread_mutex_init(&mut, NULL);
			
 
				 
			
 
				   struct timeval start;
			
 
				   struct timeval end;
			
@@ -82,6 +99,7 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
 
				 
			
 
				   gettimeofday(&end, NULL);
			
 
				 
			
 
				+  pthread_mutex_destroy(&mut);
			
 
				   starpu_helper_cublas_shutdown();
			
 
				   starpu_shutdown();
			
 
				   
			
@@ -92,50 +110,15 @@ void cholesky_vs_cholesky(params *p1, params *p2, params *p3, int ncpus1, int nc
 
				   printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
			
 
				   printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
			
 
				 
			
 
				-  /* /\* 1 cholesky all alone on the whole machine *\/ */
			
 
				-  /* starpu_init(NULL); */
			
 
				-  /* starpu_helper_cublas_init(); */
			
 
				-
			
 
				-  /* void *gflops_cholesky3 = func_cholesky((void*)p3); */
			
 
				-
			
 
				-  /* starpu_helper_cublas_shutdown(); */
			
 
				-  /* starpu_shutdown(); */
			
 
				 
			
 
				-
			
 
				-  /* /\* 2 cholesky in a single ctx *\/ */
			
 
				-  /* starpu_init(NULL); */
			
 
				-  /* starpu_helper_cublas_init(); */
			
 
				-
			
 
				-  /* pthread_t tid2[2]; */
			
 
				-
			
 
				-  /* pthread_create(&tid2[0], NULL, (void*)func_cholesky, (void*)p3); */
			
 
				-  /* pthread_create(&tid2[1], NULL, (void*)func_cholesky, (void*)p3); */
			
 
				-
			
 
				-  /* void *gflops_cholesky4; */
			
 
				-  /* void *gflops_cholesky5; */
			
 
				- 
			
 
				-  /* pthread_join(tid2[0], &gflops_cholesky4); */
			
 
				-  /* pthread_join(tid2[1], &gflops_cholesky5); */
			
 
				-
			
 
				-  /* starpu_helper_cublas_shutdown(); */
			
 
				-  /* starpu_shutdown(); */
			
 
				-
			
 
				-  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops, ((retvals*)gflops_cholesky3)->flops, ((retvals*)gflops_cholesky4)->flops, ((retvals*)gflops_cholesky5)->flops); */
			
 
				-
			
 
				-  /* printf("%2.2f %2.2f %2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, ((retvals*)gflops_cholesky3)->avg_timing, ((retvals*)gflops_cholesky4)->avg_timing, ((retvals*)gflops_cholesky5)->avg_timing); */
			
 
				-
			
 
				-  /* free(gflops_cholesky1); */
			
 
				-  /* free(gflops_cholesky2); */
			
 
				-  /* free(gflops_cholesky3); */
			
 
				-  /* free(gflops_cholesky4); */
			
 
				-  /* free(gflops_cholesky5); */
			
 
				 }
			
 
				 
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				   //  printf("argc = %d\n", argc);
			
 
				-  int ncpus1, ncpus2;
			
 
				+  int ncpus1=0, ncpus2=0;
			
 
				   int i;
			
 
				+  
			
 
				   for (i = 9; i < argc; i++) {
			
 
				     if (strcmp(argv[i], "-ncpus1") == 0) {
			
 
				       char *argptr;
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -66,6 +66,9 @@ struct starpu_sched_policy_s {
 
				 	/* Initialize the scheduling policy. */
			
 
				 	void (*init_sched)(unsigned);
			
 
				 
			
 
				+	/* Initialize the scheduling policy only for certain workers. */
			
 
				+	void (*init_sched_for_workers)(unsigned, int);
			
 
				+
			
 
				 	/* Cleanup the scheduling policy. */
			
 
				 	void (*deinit_sched)(unsigned);
			
 
				 
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -199,6 +199,7 @@ static void _starpu_remove_sched_ctx_from_worker(struct starpu_worker_s *workera
 
				 		  {
			
 
				 			workerarg->sched_ctx[i] = NULL;
			
 
				 			to_remove = 1;
			
 
				+			break;
			
 
				 		  }
			
 
				 	  }
			
 
				 	
			
@@ -208,27 +209,38 @@ static void _starpu_remove_sched_ctx_from_worker(struct starpu_worker_s *workera
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+static void _starpu_manage_delete_sched_ctx(struct starpu_sched_ctx *sched_ctx)
			
 
				+{
			
 
				+	int nworkers = sched_ctx->nworkers_in_ctx;
			
 
				+	int workerid;
			
 
				+	int i;
			
 
				+	for(i = 0; i < nworkers; i++)
			
 
				+	  {
			
 
				+	    workerid = sched_ctx->workerid[i];
			
 
				+	    struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
			
 
				+	    _starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
			
 
				+	  }
			
 
				+	
			
 
				+	free(sched_ctx->sched_policy);
			
 
				+	free(sched_ctx->sched_mutex);
			
 
				+	free(sched_ctx->sched_cond);
			
 
				+	sched_ctx->sched_policy = NULL;
			
 
				+	struct starpu_machine_config_s *config = _starpu_get_machine_config();
			
 
				+	config->topology.nsched_ctxs--;
			
 
				+
			
 
				+}
			
 
				 void starpu_delete_sched_ctx(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	if(!starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
			
 
				 	  {
			
 
				-		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
			
 
				 
			
 
				-		int nworkers = sched_ctx->nworkers_in_ctx;
			
 
				-		int workerid;
			
 
				+		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
			
 
				 
			
 
				-		int i;
			
 
				-		for(i = 0; i < nworkers; i++)
			
 
				-		  {
			
 
				-			workerid = sched_ctx->workerid[i];
			
 
				-			struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
			
 
				-			_starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
			
 
				-		  }
			
 
				-	
			
 
				-		free(sched_ctx->sched_policy);
			
 
				-		free(sched_ctx->sched_mutex);
			
 
				-		free(sched_ctx->sched_cond);
			
 
				-		sched_ctx->sched_policy = NULL;
			
 
				+		/* block the workers until the contex is switched */
			
 
				+		set_changing_ctx_flag(STATUS_CHANGING_CTX, sched_ctx->nworkers_in_ctx, sched_ctx->workerid);
			
 
				+		_starpu_manage_delete_sched_ctx(sched_ctx);
			
 
				+		/* also wait the workers to wake up before using the context */
			
 
				+		set_changing_ctx_flag(STATUS_UNKNOWN, sched_ctx->nworkers_in_ctx, sched_ctx->workerid);
			
 
				 	  }		
			
 
				 	return;	
			
 
				 }
			
@@ -241,7 +253,13 @@ void _starpu_delete_all_sched_ctxs()
 
				 
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	  {
			
 
				-	    starpu_delete_sched_ctx((int)i);
			
 
				+	    if(!starpu_wait_for_all_tasks_of_sched_ctx(i))
			
 
				+	      {
			
 
				+
			
 
				+		struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(i);
			
 
				+
			
 
				+		_starpu_manage_delete_sched_ctx(sched_ctx);
			
 
				+	      }		
			
 
				 	  }
			
 
				 	return;
			
 
				 }
			
@@ -350,8 +368,11 @@ static void _starpu_add_workers_to_sched_ctx(int *workerids_in_ctx, int nworkeri
 
				 				  }
			
 
				 			  }
			
 
				 		  }
			
 
				-		sched_ctx->nworkers_in_ctx = nworkerids_in_ctx;
			
 
				+		sched_ctx->nworkers_in_ctx += nworkerids_in_ctx;
			
 
				 	  }
			
 
				+
			
 
				+	sched_ctx->sched_policy->init_sched_for_workers(sched_ctx->sched_ctx_id, nworkerids_in_ctx);
			
 
				+
			
 
				 	return;
			
 
				 }
			
 
				 
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -100,6 +100,7 @@ static void load_sched_policy(struct starpu_sched_policy_s *sched_policy, struct
 
				 	policy->pop_every_task = sched_policy->pop_every_task;
			
 
				 	policy->push_task_notify = sched_policy->push_task_notify;
			
 
				 	policy->policy_name = sched_policy->policy_name;
			
 
				+	policy->init_sched_for_workers = sched_policy->init_sched_for_workers;
			
 
				 }
			
 
				 
			
 
				 static struct starpu_sched_policy_s *find_sched_policy_from_name(const char *policy_name)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -328,6 +328,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
				 		config->workers[topology->nworkers + cudagpu].devid = devid;
			
 
				 		config->workers[topology->nworkers + cudagpu].perf_arch = arch; 
			
 
				 		config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
			
 
				+		config->workers[topology->nworkers + cudagpu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
			
 
				 		config->worker_mask |= STARPU_CUDA;
			
 
				 
			
 
				                 uint32_t key = _starpu_crc32_be(devid, 0);
			
@@ -394,6 +395,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
				 		config->workers[topology->nworkers + openclgpu].devid = devid;
			
 
				 		config->workers[topology->nworkers + openclgpu].perf_arch = arch; 
			
 
				 		config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
			
 
				+		config->workers[topology->nworkers + openclgpu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
			
 
				 		config->worker_mask |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
@@ -428,6 +430,7 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
				 		config->workers[topology->nworkers + spu].id = spu;
			
 
				 		config->workers[topology->nworkers + spu].worker_is_running = 0;
			
 
				 		config->workers[topology->nworkers + spu].worker_mask = STARPU_GORDON;
			
 
				+		config->workers[topology->nworkers + spu].sched_ctx = (struct starpu_sched_ctx**)malloc(sizeof(struct starpu_sched_ctx*));
			
 
				 		config->worker_mask |= STARPU_GORDON;
			
 
				 	}
			
 
				 
			
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -37,6 +37,29 @@ typedef struct {
 
				 	double *ntasks;
			
 
				 } heft_data;
			
 
				 
			
 
				+static void heft_init_for_workers(unsigned sched_ctx_id, int nnew_workers)
			
 
				+{
			
 
				+	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
			
 
				+
			
 
				+	unsigned nworkers_ctx = sched_ctx->nworkers_in_ctx;
			
 
				+	heft_data *hd = (heft_data*)sched_ctx->policy_data;
			
 
				+
			
 
				+	unsigned initial_nworkers = nworkers_ctx - nnew_workers;
			
 
				+
			
 
				+	unsigned workerid_ctx;
			
 
				+	for (workerid_ctx = initial_nworkers; workerid_ctx < nworkers_ctx; workerid_ctx++)
			
 
				+	  {
			
 
				+	    hd->exp_start[workerid_ctx] = starpu_timing_now();
			
 
				+	    hd->exp_len[workerid_ctx] = 0.0;
			
 
				+	    hd->exp_end[workerid_ctx] = hd->exp_start[workerid_ctx]; 
			
 
				+	    hd->ntasks[workerid_ctx] = 0;
			
 
				+	    	    
			
 
				+	    sched_ctx->sched_mutex[workerid_ctx] = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
			
 
				+	    sched_ctx->sched_cond[workerid_ctx] = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
			
 
				+	    PTHREAD_MUTEX_INIT(sched_ctx->sched_mutex[workerid_ctx], NULL);
			
 
				+	    PTHREAD_COND_INIT(sched_ctx->sched_cond[workerid_ctx], NULL);
			
 
				+	  }
			
 
				+}
			
 
				 static void heft_init(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	heft_data *hd = (heft_data*)malloc(sizeof(heft_data));
			
@@ -50,10 +73,10 @@ static void heft_init(unsigned sched_ctx_id)
 
				 	unsigned nworkers = sched_ctx->nworkers_in_ctx;
			
 
				 	sched_ctx->policy_data = (void*)hd;
			
 
				 
			
 
				-	hd->exp_start = (double*)malloc(nworkers*sizeof(double));
			
 
				-	hd->exp_end = (double*)malloc(nworkers*sizeof(double));
			
 
				-	hd->exp_len = (double*)malloc(nworkers*sizeof(double));
			
 
				-	hd->ntasks = (double*)malloc(nworkers*sizeof(double));
			
 
				+	hd->exp_start = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
			
 
				+	hd->exp_end = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
			
 
				+	hd->exp_len = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
			
 
				+	hd->ntasks = (double*)malloc(STARPU_NMAXWORKERS*sizeof(double));
			
 
				 
			
 
				 	const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
			
 
				 	if (strval_alpha)
			
@@ -399,6 +422,23 @@ static void heft_deinit(unsigned sched_ctx_id)
 
				 	free(ht);
			
 
				 }
			
 
				 
			
 
				+
			
 
				+static void heft_deinit_for_workers(unsigned sched_ctx_id) 
			
 
				+{
			
 
				+  //TODO: solve pb with indexes before
			
 
				+	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
			
 
				+	int workerid_in_ctx;
			
 
				+	int nworkers = sched_ctx->nworkers_in_ctx;
			
 
				+
			
 
				+	for (workerid_in_ctx = 0; workerid_in_ctx < nworkers; workerid_in_ctx++){
			
 
				+		PTHREAD_MUTEX_DESTROY(sched_ctx->sched_mutex[workerid_in_ctx]);
			
 
				+		PTHREAD_COND_DESTROY(sched_ctx->sched_cond[workerid_in_ctx]);
			
 
				+		free(sched_ctx->sched_mutex[workerid_in_ctx]);
			
 
				+		free(sched_ctx->sched_cond[workerid_in_ctx]);
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				 struct starpu_sched_policy_s heft_policy = {
			
 
				 	.init_sched = heft_init,
			
 
				 	.deinit_sched = heft_deinit,
			
@@ -409,5 +449,7 @@ struct starpu_sched_policy_s heft_policy = {
 
				 	.pop_every_task = NULL,
			
 
				 	.post_exec_hook = heft_post_exec_hook,
			
 
				 	.policy_name = "heft",
			
 
				-	.policy_description = "Heterogeneous Earliest Finish Task"
			
 
				+	.policy_description = "Heterogeneous Earliest Finish Task",
			
 
				+	.init_sched_for_workers = heft_init_for_workers
			
 
				+	
			
 
				 };
			
--- a/tests/cholesky_and_lu/sched.sh
+++ b/tests/cholesky_and_lu/sched.sh
@@ -20,23 +20,25 @@
 
				 DIR=$PWD
			
 
				 ROOTDIR=$DIR/../..
			
 
				 TIMINGDIR=$DIR/timings-sched/
			
 
				-#mkdir -p $TIMINGDIR
			
 
				+mkdir -p $TIMINGDIR
			
 
				 BENCH_NAME=cholesky_and_lu
			
 
				-nsamples=5
			
 
				+nsamples=3
			
 
				 
			
 
				 filename=$TIMINGDIR/$BENCH_NAME
			
 
				 
			
 
				-nmaxcpus=96
			
 
				-nmincpus1=40
			
 
				-nmincpus2=30
			
 
				+nmaxcpus=12
			
 
				+nmincpus1=1
			
 
				+nmincpus2=1
			
 
				 
			
 
				-blocks1=60
			
 
				-blocks2=40
			
 
				+blocks1=40
			
 
				+blocks2=20
			
 
				 
			
 
				-size1=$(($blocks1*1024))
			
 
				-size2=$(($blocks2*1024))
			
 
				+size1=20000
			
 
				+size2=10000
			
 
				+#size1=$(($blocks1*1024))
			
 
				+#size2=$(($blocks2*1024))
			
 
				 
			
 
				-for i in `seq $nmincpus1 2 $(($nmaxcpus-1))`
			
 
				+for i in `seq $nmincpus1 1 $(($nmaxcpus-1))`
			
 
				 do
			
 
				     if [ $i -gt $(($nmaxcpus-$nmincpus2)) ]
			
 
				     then