Browse Source

mic: merge trunk

Thibaud Lambert 12 years ago
parent
commit
b2615f92ee
41 changed files with 94118 additions and 356591 deletions
  1. 93452 356126
      doc/doxygen/chapters/data_trace.eps
  2. BIN
      doc/doxygen/chapters/data_trace.pdf
  3. BIN
      doc/doxygen/chapters/data_trace.png
  4. 6 4
      doc/doxygen/chapters/performance_feedback.doxy
  5. 1 1
      sc_hypervisor/examples/lp_test/lp_resize_test.c
  6. 1 1
      sc_hypervisor/examples/lp_test/lp_test.c
  7. 3 0
      sc_hypervisor/include/sc_hypervisor.h
  8. 4 0
      sc_hypervisor/include/sc_hypervisor_lp.h
  9. 1 1
      sc_hypervisor/src/hypervisor_policies/app_driven_policy.c
  10. 1 1
      sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c
  11. 4 7
      sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
  12. 2 1
      sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
  13. 5 219
      sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
  14. 3 7
      sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
  15. 3 2
      sc_hypervisor/src/policies_utils/dichotomy.c
  16. 216 1
      sc_hypervisor/src/policies_utils/lp_programs.c
  17. 47 26
      sc_hypervisor/src/policies_utils/lp_tools.c
  18. 5 3
      sc_hypervisor/src/policies_utils/policy_tools.c
  19. 9 5
      sc_hypervisor/src/policies_utils/speed.c
  20. 25 6
      sc_hypervisor/src/sc_hypervisor.c
  21. 2 0
      sc_hypervisor/src/sc_hypervisor_intern.h
  22. 2 0
      src/Makefile.am
  23. 26 79
      src/core/sched_ctx.c
  24. 1 6
      src/core/sched_ctx.h
  25. 86 0
      src/core/sched_ctx_list.c
  26. 32 0
      src/core/sched_ctx_list.h
  27. 33 54
      src/core/sched_policy.c
  28. 0 4
      src/core/topology.c
  29. 1 1
      src/core/workers.c
  30. 2 2
      src/core/workers.h
  31. 60 13
      src/debug/traces/starpu_fxt.c
  32. 4 4
      src/drivers/driver_common/driver_common.c
  33. 5 3
      src/drivers/mp_common/sink_common.c
  34. 2 1
      src/drivers/mp_common/source_common.c
  35. 2 2
      src/sched_policies/deque_modeling_policy_data_aware.c
  36. 1 1
      src/sched_policies/parallel_heft.c
  37. 2 0
      tests/datawizard/acquire_cb_insert.c
  38. 6 2
      tests/datawizard/commute.c
  39. 2 0
      tests/datawizard/data_invalidation.c
  40. 0 1
      tests/errorcheck/invalid_blocking_calls.c
  41. 61 7
      tools/starpu_fxt_data_trace.c

File diff suppressed because it is too large
+ 93452 - 356126
doc/doxygen/chapters/data_trace.eps


BIN
doc/doxygen/chapters/data_trace.pdf


BIN
doc/doxygen/chapters/data_trace.png


+ 6 - 4
doc/doxygen/chapters/performance_feedback.doxy

@@ -585,11 +585,13 @@ Synthetic GFlops : 44.21
 \section DataTrace Data trace and tasks length
 It is possible to get statistics about tasks length and data size by using :
 \verbatim
-$starpu_fxt_data_trace filename
+$starpu_fxt_data_trace filename [codelet1 codelet2 ... codeletn]
 \endverbatim
-Where filename is the FxT trace file. This will create 2 files : <c>data_total.txt</c> which
-shows each task length and total data size and <c>data_trace.gp</c> which can be plotted to 
-get a .eps image of these results. On the image, each point represents a task.
+Where filename is the FxT trace file and codeletX the names of the codelets you 
+want to profile (if no names are specified, starpu_fxt_data_trace will use them all). 
+This will create a file, <c>data_trace.gp</c> which
+can be plotted to get a .eps image of these results. On the image, each point represents a 
+task, and each color corresponds to a codelet.
 
 \image html data_trace.png
 \image latex data_trace.eps "" width=\textwidth

+ 1 - 1
sc_hypervisor/examples/lp_test/lp_resize_test.c

@@ -73,7 +73,7 @@ void* submit_tasks_thread(void *arg)
 	}
 
 	starpu_task_wait_for_all();
-	return;
+	return NULL;
 }
 
 int main()

+ 1 - 1
sc_hypervisor/examples/lp_test/lp_test.c

@@ -72,7 +72,7 @@ void* submit_tasks_thread(void *arg)
 	}
 
 	starpu_task_wait_for_all();
-	return;
+	return NULL;
 }
 
 int main()

+ 3 - 0
sc_hypervisor/include/sc_hypervisor.h

@@ -131,6 +131,9 @@ void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ct
 /* change dynamically the total number of flops of a context, move the deadline of the finishing time of the context */
 void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops);
 
+/* change dynamically the number of the elapsed flops in a context, modify the past in order to better compute the speed */
+void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops);
+
 #ifdef __cplusplus
 }
 #endif

+ 4 - 0
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -72,6 +72,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 					       double times[nw][nt], unsigned is_integer, double tmax, unsigned *in_sched_ctxs,
 					       struct sc_hypervisor_policy_task_pool *tmp_task_pools);
 
+/* linear program that simulates a distribution of flops over the workers on particular sample of the execution
+   of the application such that the entire sample would finish in a minimum amount of time */
+double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, 
+							 double **speed, double flops[ns], double **final_flops_on_w);
 #endif // STARPU_HAVE_GLPK_H
 
 #ifdef __cplusplus

+ 1 - 1
sc_hypervisor/src/hypervisor_policies/app_driven_policy.c

@@ -15,7 +15,7 @@
  */
 #include <sc_hypervisor_policy.h>
 
-static void app_driven_handle_post_exec_hook(unsigned sched_ctx, int task_tag)
+static void app_driven_handle_post_exec_hook(unsigned sched_ctx, __attribute__((unused)) int task_tag)
 {
 	sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 1);
 }

+ 1 - 1
sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c

@@ -56,7 +56,7 @@ static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], unsign
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
 
-	float timing = (float)(diff_s*1000000 + diff_us)/1000;
+	__attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000;
 
 	if(res > 0.0)
 		return 1;

+ 4 - 7
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -46,13 +46,14 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
 	
-	float timing = (float)(diff_s*1000000 + diff_us)/1000;
+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000;
 	
 	if(vmax != 0.0)
 	{
 		int nworkers_per_ctx_rounded[nsched_ctxs][nw];
 		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded);
-		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
+//		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
+		sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw);
 	}
 }
 
@@ -142,12 +143,8 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 		unsigned criteria = sc_hypervisor_get_resize_criteria();
 		if(criteria != SC_NOTHING && criteria == SC_IDLE)
 		{
-			
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
-			{
 				_try_resizing(NULL, -1, NULL, -1);
-//				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
-			}
 		}
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
@@ -182,7 +179,7 @@ struct sc_hypervisor_policy feft_lp_policy = {
 	.resize_ctxs = feft_lp_resize_ctxs,
 	.handle_poped_task = feft_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = feft_lp_handle_idle_cycle, //NULL,
+	.handle_idle_cycle = feft_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 2 - 1
sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c

@@ -289,7 +289,8 @@ static void gflops_rate_resize(unsigned sched_ctx)
 	}
 }
 
-static void gflops_rate_handle_poped_task(unsigned sched_ctx, int worker)
+static void gflops_rate_handle_poped_task(unsigned sched_ctx, __attribute__((unused)) int worker, 
+					  __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
 	gflops_rate_resize(sched_ctx);
 }

+ 5 - 219
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -28,13 +28,9 @@ struct ispeed_lp_data
 	int *workers;
 };
 
-/*
- * GNU Linear Programming Kit backend
- */
 #ifdef STARPU_HAVE_GLPK_H
-#include <glpk.h>
-static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
-			    unsigned is_integer, double tmax, void *specific_data)
+static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw],
+					unsigned is_integer, double tmax, void *specific_data)
 {
 	struct ispeed_lp_data *sd = (struct ispeed_lp_data *)specific_data;
 
@@ -43,220 +39,11 @@ static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
 	
 	double **final_flops_on_w = sd->flops_on_w;
 	
-	double w_in_s[ns][nw];
-	double flops_on_w[ns][nw];
-
-	int w, s;
-	glp_prob *lp;
-
-//	printf("try with tmax %lf\n", tmax);
-	lp = glp_create_prob();
-	glp_set_prob_name(lp, "StarPU theoretical bound");
-	glp_set_obj_dir(lp, GLP_MAX);
-	glp_set_obj_name(lp, "total execution time");
-
-	{
-		int ne = 5 * ns * nw /* worker execution time */
-			+ 1; /* glp dumbness */
-		int n = 1;
-		int ia[ne], ja[ne];
-		double ar[ne];
-
-
-		/* Variables: number of flops assigned to worker w in context s, and 
-		 the acknwoledgment that the worker w belongs to the context s */
-		glp_add_cols(lp, 2*nw*ns);
-#define colnum(w, s) ((s)*nw+(w)+1)
-		for(s = 0; s < ns; s++)
-			for(w = 0; w < nw; w++)
-				glp_set_obj_coef(lp, nw*ns+colnum(w,s), 1.);
-		
-		for(s = 0; s < ns; s++)
-			for(w = 0; w < nw; w++)
-			{
-				char name[32];
-				snprintf(name, sizeof(name), "flopsw%ds%dn", w, s);
-				glp_set_col_name(lp, colnum(w,s), name);
-				glp_set_col_bnds(lp, colnum(w,s), GLP_LO, 0., 0.);
-
-				snprintf(name, sizeof(name), "w%ds%dn", w, s);
-				glp_set_col_name(lp, nw*ns+colnum(w,s), name);
-				if (is_integer)
-				{
-                                        glp_set_col_kind(lp, nw*ns+colnum(w, s), GLP_IV);
-					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0, 1);
-				}
-				else
-					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
-
-			}
-
-
-		int curr_row_idx = 0;
-		/* Total worker execution time */
-		glp_add_rows(lp, nw*ns);
-
-		/*nflops[s][w]/v[s][w] < x[s][w]*tmax */
-		for(s = 0; s < ns; s++)
-		{
-			for (w = 0; w < nw; w++)
-			{
-				char name[32], title[64];
-				starpu_worker_get_name(w, name, sizeof(name));
-				snprintf(title, sizeof(title), "worker %s", name);
-				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
-
-				/* nflosp[s][w] */
-				ia[n] = curr_row_idx+s*nw+w+1;
-				ja[n] = colnum(w, s);
-				ar[n] = 1 / speed[s][w];
-
-				n++;
-				
-				/* x[s][w] = 1 | 0 */
-				ia[n] = curr_row_idx+s*nw+w+1;
-				ja[n] = nw*ns+colnum(w,s);
-				ar[n] = (-1) * tmax;
-				n++;
-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
-			}
-		}
-
-		curr_row_idx += nw*ns;
-
-		/* sum(flops[s][w]) = flops[s] */
-		glp_add_rows(lp, ns);
-		for (s = 0; s < ns; s++)
-		{
-			char name[32], title[64];
-			starpu_worker_get_name(w, name, sizeof(name));
-			snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s);
-			glp_set_row_name(lp, curr_row_idx+s+1, title);
-			for (w = 0; w < nw; w++)
-			{
-				ia[n] = curr_row_idx+s+1;
-				ja[n] = colnum(w, s);
-				ar[n] = 1;
-				n++;
-			}
-			glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]);
-		}
-
-		curr_row_idx += ns;
-
-		/* sum(x[s][w]) = 1 */
-		glp_add_rows(lp, nw);
-		for (w = 0; w < nw; w++)
-		{
-			char name[32], title[64];
-			starpu_worker_get_name(w, name, sizeof(name));
-			snprintf(title, sizeof(title), "w%x", w);
-			glp_set_row_name(lp, curr_row_idx+w+1, title);
-			for(s = 0; s < ns; s++)
-			{
-				ia[n] = curr_row_idx+w+1;
-				ja[n] = nw*ns+colnum(w,s);
-				ar[n] = 1;
-				n++;
-			}
-			if(is_integer)				
-				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
-			else
-				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
-		}
-
-		curr_row_idx += nw;
-
-		/* sum(nflops[s][w]) > 0*/
-		glp_add_rows(lp, nw);
-		for (w = 0; w < nw; w++)
-		{
-			char name[32], title[64];
-			starpu_worker_get_name(w, name, sizeof(name));
-			snprintf(title, sizeof(title), "flopsw%x", w);
-			glp_set_row_name(lp, curr_row_idx+w+1, title);
-			for(s = 0; s < ns; s++)
-			{
-				ia[n] = curr_row_idx+w+1;
-				ja[n] = colnum(w,s);
-				ar[n] = 1;
-				n++;
-			}
-
-			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.);
-		}
-
-		if(n != ne)
-			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
-		STARPU_ASSERT(n == ne);
-
-		glp_load_matrix(lp, ne-1, ia, ja, ar);
-	}
-
-	glp_smcp parm;
-	glp_init_smcp(&parm);
-	parm.msg_lev = GLP_MSG_OFF;
-	int ret = glp_simplex(lp, &parm);
-	if (ret)
-	{
-		glp_delete_prob(lp);
-		lp = NULL;
-		return 0.0;
-	}
-
-        if (is_integer)
-        {
-                glp_iocp iocp;
-                glp_init_iocp(&iocp);
-                iocp.msg_lev = GLP_MSG_OFF;
-                glp_intopt(lp, &iocp);
-		int stat = glp_mip_status(lp);
-		/* if we don't have a solution return */
-		if(stat == GLP_NOFEAS)
-		{
-			glp_delete_prob(lp);
-			lp = NULL;
-			return 0.0;
-		}
-        }
-
-	int stat = glp_get_prim_stat(lp);
-	/* if we don't have a solution return */
-	if(stat == GLP_NOFEAS)
-	{
-		glp_delete_prob(lp);
-		lp = NULL;
-		return 0.0;
-	}
-
-	double res = glp_get_obj_val(lp);
-
-	for(s = 0; s < ns; s++)
-		for(w = 0; w < nw; w++)
-		{
-			flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
-			if (is_integer)
-				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum(w, s));
-			else
-				w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
-//			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
-		}
-
-	glp_delete_prob(lp);
-	for(s = 0; s < ns; s++)
-		for(w = 0; w < nw; w++)
-		{
-			final_w_in_s[s][w] = w_in_s[s][w];
-			final_flops_on_w[s][w] = flops_on_w[s][w];
-		}
-
-	return res;
+	return sc_hypervisor_lp_simulate_distrib_flops_on_sample(ns, nw, final_w_in_s, is_integer, tmax, speed, flops, final_flops_on_w);
 }
 
 static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double **flops_on_w, unsigned *sched_ctxs, int *workers)
 {
-//	double flops[ns];
-//	double speed[ns][nw];
 	double *flops = (double*)malloc(ns*sizeof(double));
 	double **speed = (double **)malloc(ns*sizeof(double*));
 	int i;
@@ -312,7 +99,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
         specific_data.workers = workers;
 
         unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _glp_resolve);
+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
 
 	for(i = 0; i < ns; i++)
 		free(speed[i]);
@@ -416,7 +203,6 @@ static void ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
                         {
                                 _try_resizing(NULL, -1, NULL, -1);
-//                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                                
                         }
                 }
                 starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
@@ -433,7 +219,7 @@ static void ispeed_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *w
 	}
 }
 
-static void ispeed_lp_end_ctx(unsigned sched_ctx)
+static void ispeed_lp_end_ctx(__attribute__((unused))unsigned sched_ctx)
 {
 /* 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */
 /* 	int worker; */

+ 3 - 7
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -85,7 +85,6 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 		nt++;
 
 	double w_in_s[ns][nw];
-//	double tasks[nw][nt];
 	double **tasks=(double**)malloc(nw*sizeof(double*));
 	int i;
 	for(i = 0; i < nw; i++)
@@ -175,7 +174,7 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
 	int nt = 0; /* Number of different kinds of tasks */
 	
-//			starpu_pthread_mutex_lock(&mutex);
+//	starpu_pthread_mutex_lock(&mutex);
 	
 	/* we don't take the mutex bc a correct value of the number of tasks is
 	   not required but we do a copy in order to be sure
@@ -190,7 +189,6 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 	
 	
 	double w_in_s[ns][nw];
-//			double tasks_per_worker[nw][nt];
 	double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
 	int i;
 	for(i = 0; i < nw; i++)
@@ -208,13 +206,13 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 	   compute the nr of flops and not the tasks */
         /*lp computes it in s but it's converted to ms just before return */
 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
-	double smallest_tmax = 0.0;//possible_tmax / 3;
+	double smallest_tmax = 0.0;
 	double tmax = possible_tmax * ns;
 	double tmin = smallest_tmax;
 
 	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
 								tmin, tmax, smallest_tmax, _compute_workers_distrib);
-//			starpu_pthread_mutex_unlock(&mutex);
+//	starpu_pthread_mutex_unlock(&mutex);
 	
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
@@ -282,7 +280,6 @@ static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 			return;
 		}
 
-
 		unsigned criteria = sc_hypervisor_get_resize_criteria();
 		if(criteria != SC_NOTHING && criteria == SC_IDLE)
 		{
@@ -290,7 +287,6 @@ static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
 			{
 				_try_resizing(NULL, -1, NULL, -1);
-//				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
 			}
 		}
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);

+ 3 - 2
sc_hypervisor/src/policies_utils/dichotomy.c

@@ -19,6 +19,8 @@
 #include <math.h>
 #include <sys/time.h>
 
+/* executes the function lp_estimated_distrib_func over the interval [tmin, tmax] until it finds the lowest value that
+   still has solutions */
 unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data,
 					    double tmin, double tmax, double smallest_tmax,
 					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
@@ -80,9 +82,8 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
 
-	float timing = (float)(diff_s*1000000 + diff_us)/1000;
+	__attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000;
 
-//        fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
 	return found_sol;
 }
 

+ 216 - 1
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -292,7 +292,8 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
 	/*1/tmax should belong to the interval [0.0;1.0]*/
 	glp_set_col_name(lp, n, "vmax");
-	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
+//	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
+	glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
 	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
 	glp_set_obj_coef(lp, n, 1.0);
 
@@ -429,6 +430,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
 	double vmax = glp_get_obj_val(lp);
 
+//	printf("vmax = %lf \n", vmax);
 	n = 1;
 	for(s = 0; s < ns; s++)
 	{
@@ -447,4 +449,217 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 	return vmax;
 }
 
+double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, 
+							 double **speed, double flops[ns], double **final_flops_on_w)
+{
+	double w_in_s[ns][nw];
+	double flops_on_w[ns][nw];
+
+	int w, s;
+	glp_prob *lp;
+
+//	printf("try with tmax %lf\n", tmax);
+	lp = glp_create_prob();
+	glp_set_prob_name(lp, "StarPU theoretical bound");
+	glp_set_obj_dir(lp, GLP_MAX);
+	glp_set_obj_name(lp, "total execution time");
+
+	{
+		int ne = 5 * ns * nw /* worker execution time */
+			+ 1; /* glp dumbness */
+		int n = 1;
+		int ia[ne], ja[ne];
+		double ar[ne];
+
+
+		/* Variables: number of flops assigned to worker w in context s, and 
+		 the acknwoledgment that the worker w belongs to the context s */
+		glp_add_cols(lp, 2*nw*ns);
+#define colnum_sample(w, s) ((s)*nw+(w)+1)
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+				glp_set_obj_coef(lp, nw*ns+colnum_sample(w,s), 1.);
+		
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+			{
+				char name[32];
+				snprintf(name, sizeof(name), "flopsw%ds%dn", w, s);
+				glp_set_col_name(lp, colnum_sample(w,s), name);
+				glp_set_col_bnds(lp, colnum_sample(w,s), GLP_LO, 0., 0.);
+
+				snprintf(name, sizeof(name), "w%ds%dn", w, s);
+				glp_set_col_name(lp, nw*ns+colnum_sample(w,s), name);
+				if (is_integer)
+				{
+                                        glp_set_col_kind(lp, nw*ns+colnum_sample(w, s), GLP_IV);
+					glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0, 1);
+				}
+				else
+					glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0.0, 1.0);
+
+			}
+
+
+		int curr_row_idx = 0;
+		/* Total worker execution time */
+		glp_add_rows(lp, nw*ns);
+
+		/*nflops[s][w]/v[s][w] < x[s][w]*tmax */
+		for(s = 0; s < ns; s++)
+		{
+			for (w = 0; w < nw; w++)
+			{
+				char name[32], title[64];
+				starpu_worker_get_name(w, name, sizeof(name));
+				snprintf(title, sizeof(title), "worker %s", name);
+				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
+
+				/* nflosp[s][w] */
+				ia[n] = curr_row_idx+s*nw+w+1;
+				ja[n] = colnum_sample(w, s);
+				ar[n] = 1 / speed[s][w];
+
+				n++;
+				
+				/* x[s][w] = 1 | 0 */
+				ia[n] = curr_row_idx+s*nw+w+1;
+				ja[n] = nw*ns+colnum_sample(w,s);
+				ar[n] = (-1) * tmax;
+				n++;
+				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
+			}
+		}
+
+		curr_row_idx += nw*ns;
+
+		/* sum(flops[s][w]) = flops[s] */
+		glp_add_rows(lp, ns);
+		for (s = 0; s < ns; s++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s);
+			glp_set_row_name(lp, curr_row_idx+s+1, title);
+			for (w = 0; w < nw; w++)
+			{
+				ia[n] = curr_row_idx+s+1;
+				ja[n] = colnum_sample(w, s);
+				ar[n] = 1;
+				n++;
+			}
+			glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]);
+		}
+
+		curr_row_idx += ns;
+
+		/* sum(x[s][w]) = 1 */
+		glp_add_rows(lp, nw);
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "w%x", w);
+			glp_set_row_name(lp, curr_row_idx+w+1, title);
+			for(s = 0; s < ns; s++)
+			{
+				ia[n] = curr_row_idx+w+1;
+				ja[n] = nw*ns+colnum_sample(w,s);
+				ar[n] = 1;
+				n++;
+			}
+			if(is_integer)				
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
+			else
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+		}
+
+		curr_row_idx += nw;
+
+		/* sum(nflops[s][w]) > 0*/
+		glp_add_rows(lp, nw);
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "flopsw%x", w);
+			glp_set_row_name(lp, curr_row_idx+w+1, title);
+			for(s = 0; s < ns; s++)
+			{
+				ia[n] = curr_row_idx+w+1;
+				ja[n] = colnum_sample(w,s);
+				ar[n] = 1;
+				n++;
+			}
+
+			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.);
+		}
+
+		if(n != ne)
+			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
+		STARPU_ASSERT(n == ne);
+
+		glp_load_matrix(lp, ne-1, ia, ja, ar);
+	}
+
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	int ret = glp_simplex(lp, &parm);
+	if (ret)
+	{
+		glp_delete_prob(lp);
+		lp = NULL;
+		return 0.0;
+	}
+
+        if (is_integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+                glp_intopt(lp, &iocp);
+		int stat = glp_mip_status(lp);
+		/* if we don't have a solution return */
+		if(stat == GLP_NOFEAS)
+		{
+			glp_delete_prob(lp);
+			lp = NULL;
+			return 0.0;
+		}
+        }
+
+	int stat = glp_get_prim_stat(lp);
+	/* if we don't have a solution return */
+	if(stat == GLP_NOFEAS)
+	{
+		glp_delete_prob(lp);
+		lp = NULL;
+		return 0.0;
+	}
+
+	double res = glp_get_obj_val(lp);
+
+	for(s = 0; s < ns; s++)
+		for(w = 0; w < nw; w++)
+		{
+			flops_on_w[s][w] = glp_get_col_prim(lp, colnum_sample(w, s));
+			if (is_integer)
+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum_sample(w, s));
+			else
+				w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum_sample(w,s));
+//			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
+		}
+
+	glp_delete_prob(lp);
+	for(s = 0; s < ns; s++)
+		for(w = 0; w < nw; w++)
+		{
+			final_w_in_s[s][w] = w_in_s[s][w];
+			final_flops_on_w[s][w] = flops_on_w[s][w];
+		}
+
+	return res;
+
+}
 #endif // STARPU_HAVE_GLPK_H

+ 47 - 26
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -20,11 +20,6 @@
 #include "sc_hypervisor_intern.h"
 #include <starpu_config.h>
 
-#ifdef STARPU_HAVE_GLPK_H
-
-
-#endif //STARPU_HAVE_GLPK_H
-
 double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], 
 					     int total_nw[ntypes_of_workers], struct types_of_workers *tw)
 {
@@ -39,23 +34,11 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 	for(i = 0; i < nsched_ctxs; i++)
 	{
 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
-/* #ifdef STARPU_USE_CUDA */
-/* 		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); */
-/* 		if(ncuda != 0) */
-/* 		{ */
-/* 			v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER); */
-/* 			v[i][1] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER); */
-/* 		} */
-/* 		else */
-/* 			v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER); */
-/* #else */
-/* 		v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER); */
-/* #endif // STARPU_USE_CUDA */
 		int w;
 		for(w = 0; w < nw; w++)
 			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
 		
-		flops[i] = sc_w->remaining_flops < 0.0 ? 0.0 : sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
+		flops[i] = sc_w->remaining_flops < 0.0 ? 0.0 : sc_w->remaining_flops/1000000000; /* in gflops*/
 //		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
 	}
 
@@ -171,12 +154,19 @@ void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched
 			if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
 			{
 				int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
-				if(target_res == 0.0 && nworkers_to_move > 0)
-					nworkers_to_move--;
 				int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
 				int i;
-				for(i = 0; i < nworkers_to_move; i++)
-					tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
+				if(target_res == 0.0 && nworkers_to_move > 0)
+				{
+					tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0];
+					for(i = 1; i < nworkers_to_move; i++)
+						tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
+				}
+				else
+				{
+					for(i = 0; i < nworkers_to_move; i++)
+						tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
+				}
 				free(workers_to_move);
 			}
 		}
@@ -240,7 +230,7 @@ void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ct
 		
 		int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
 		int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
-		
+
 		if( nw_needed > 0 && tmp_nw_move[w] > 0)
 		{
 			*nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
@@ -263,7 +253,7 @@ void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ct
 		int x = floor(needed);
 		double x_double = (double)x;
 		double diff = needed - x_double;
-		if(diff > 0.3 && tmp_nw_add[w] > 0)
+		if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0)
 		{
 			*nw_add = tmp_nw_add[w];
 			int i = 0;
@@ -335,7 +325,6 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 		_lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s, 
 					      tmp_nw_move, tmp_workers_move, 
 					      tmp_nw_add, tmp_workers_add, res_rounded, res, tw);
-
 		for(s2 = 0; s2 < ns; s2++)
 		{
 			if(sched_ctxs[s2] != sched_ctxs[s])
@@ -381,6 +370,34 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
 	}
 }
+int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
+{
+	int nw_remove = 0;
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	int worker;
+
+	struct starpu_sched_ctx_iterator it;
+	if(workers->init_iterator)
+		workers->init_iterator(workers, &it);
+
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		int i;
+		unsigned found = 0;
+		for(i = 0; i < nw_add; i++)
+		{
+			if(worker == workers_add[i])
+			{
+				found = 1;
+				break;
+			}
+		}
+		if(!found)
+			workers_remove[nw_remove++] = worker;
+	}
+	return nw_remove;
+}
 
 void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw)
 {
@@ -406,8 +423,9 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 				if(target_res == 0.0)
 				{
 					nworkers_to_add=1;
-					start[w]--;
+					int old_start = start[w];
 					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
+					start[w] = old_start;
 					int i;
 					for(i = 0; i < nworkers_to_add; i++)
 						workers_add[nw_add++] = workers_to_add[i];
@@ -455,6 +473,9 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 		if(nw_add > 0)
 		{
 			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
+			int workers_remove[STARPU_NMAXWORKERS];
+			int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
+			sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], 0);
 			sc_hypervisor_start_resize(sched_ctxs[s]);
 		}
 

+ 5 - 3
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -352,7 +352,7 @@ double sc_hypervisor_get_fastest_ctx_exec_time(void)
 
 void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw)
 {
-	int w;
+	unsigned w;
 	for(w = 0; w < tw->nw; w++)
 		total_nw[w] = 0;
 
@@ -382,8 +382,9 @@ enum starpu_worker_archtype sc_hypervisor_get_arch_for_index(unsigned w, struct
 	else
 		if(tw->ncuda != 0)
 			return STARPU_CUDA_WORKER;
-}
 
+	return STARPU_CPU_WORKER;
+}
 
 unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, struct types_of_workers *tw)
 {
@@ -403,6 +404,7 @@ unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, stru
 				return 0;
 		}
 	}
+	return 0;
 }
 
 void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools)
@@ -521,7 +523,7 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 				{
 					v[w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
 					
-					optimal_v[i] += nworkers_per_ctx[i][w];
+					optimal_v[i] += nworkers_per_ctx[i][w]*v[w];
 				}
 				_set_optimal_v(i, optimal_v[i]);
 			}

+ 9 - 5
sc_hypervisor/src/policies_utils/speed.c

@@ -112,7 +112,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 		double speed = 0.0;
 		unsigned nworkers = 0;
 		double all_workers_flops = 0.0;
-		double all_workers_idle_time = 0.0;
+		double max_workers_idle_time = 0.0;
 		while(workers->has_next(workers, &it))
 		{
 			worker = workers->get_next(workers, &it);
@@ -120,7 +120,8 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 			if(arch == req_arch)
 			{
 				all_workers_flops += sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
-				all_workers_idle_time += sc_w->idle_time[worker]; /* in seconds */
+				if(max_workers_idle_time < sc_w->idle_time[worker])
+					max_workers_idle_time = sc_w->idle_time[worker]; /* in seconds */
 				nworkers++;
 			}
 		}			
@@ -131,7 +132,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 			
 			/* compute speed for the last frame */
 			double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
-			elapsed_time -= all_workers_idle_time;
+			elapsed_time -= max_workers_idle_time;
 			speed = (all_workers_flops / elapsed_time) / nworkers;
 		}
 		else
@@ -165,17 +166,20 @@ double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper*
 	return -1.0;
 }
 
+/* returns the speed necessary for the linear programs (either the monitored one either a default value) */
 double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch)
 {
-
+	/* monitored speed in the last frame */
 	double speed = sc_hypervisor_get_speed_per_worker_type(sc_w, arch);
 	if(speed == -1.0)
 	{
+		/* avg value of the monitored speed over the entier current execution */
 		speed = sc_hypervisor_get_ref_speed_per_worker_type(sc_w, arch);
 	}
 	if(speed == -1.0)
 	{
-		speed = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
+		/* a default value */
+		speed = arch == STARPU_CPU_WORKER ? SC_HYPERVISOR_DEFAULT_CPU_SPEED : SC_HYPERVISOR_DEFAULT_CUDA_SPEED;
 	}
        
 	return speed;

+ 25 - 6
sc_hypervisor/src/sc_hypervisor.c

@@ -247,7 +247,6 @@ static void _print_current_time()
 
 void sc_hypervisor_shutdown(void)
 {
-//	printf("shutdown\n");
 	int i;
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 	{
@@ -344,8 +343,6 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 	hypervisor.sched_ctx_w[sched_ctx].sched_ctx = STARPU_NMAX_SCHED_CTXS;
 	_remove_config(sched_ctx);
 
-/* 	free(hypervisor.configurations[sched_ctx]); */
-/* 	free(hypervisor.resize_requests[sched_ctx]); */
 	starpu_pthread_mutex_destroy(&hypervisor.conf_mut[sched_ctx]);
 	starpu_pthread_mutex_destroy(&hypervisor.resize_mut[sched_ctx]);
 	if(hypervisor.nsched_ctxs == 1)
@@ -601,7 +598,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 {
 	if(worker != -1 && !starpu_sched_ctx_contains_worker(worker, sched_ctx))
 		return 0;
-
+	
 	struct sc_hypervisor_resize_ack *resize_ack = NULL;
 	unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
 
@@ -796,7 +793,14 @@ static void notify_poped_task(unsigned sched_ctx, int worker, struct starpu_task
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += task->flops;
-	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops; //sc_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
+	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops;
+/* 	if(hypervisor.sched_ctx_w[sched_ctx].remaining_flops < 0.0) */
+/* 		hypervisor.sched_ctx_w[sched_ctx].remaining_flops = 0.0; */
+//	double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
+/* 	printf("*****************STARPU_STARPU_STARPU: decrement %lf flops  remaining flops %lf total flops %lf elapseed flops %lf in ctx %d \n", */
+/* 	       task->flops, hypervisor.sched_ctx_w[sched_ctx].remaining_flops,  hypervisor.sched_ctx_w[sched_ctx].total_flops, ctx_elapsed_flops, sched_ctx); */
+	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 
 	if(hypervisor.resize[sched_ctx])
 	{	
@@ -887,7 +891,7 @@ static void notify_delete_context(unsigned sched_ctx)
 void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
-	int curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : nsched_ctxs;
+	unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs;
 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	unsigned s;
@@ -986,8 +990,23 @@ struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsign
 void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops)
 {
 //	double diff = total_flops - hypervisor.sched_ctx_w[sched_ctx].total_flops;
+//	printf("*****************STARPU_STARPU_STARPU: update diff flops %lf to ctx %d \n", diff_total_flops, sched_ctx);
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
 	hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops;
 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops;	
+/* 	printf("*****************STARPU_STARPU_STARPU: total flops %lf remaining flops %lf in ctx %d \n", */
+/* 	       hypervisor.sched_ctx_w[sched_ctx].total_flops, hypervisor.sched_ctx_w[sched_ctx].remaining_flops, sched_ctx); */
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
+
+void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops)
+{
+	int workerid = starpu_worker_get_id();
+	if(workerid != -1)
+	{
+		starpu_pthread_mutex_lock(&act_hypervisor_mutex);
+		hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[workerid] += diff_elapsed_flops;
+		hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops;
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}

+ 2 - 0
sc_hypervisor/src/sc_hypervisor_intern.h

@@ -18,6 +18,8 @@
 #include <common/uthash.h>
 
 #define SC_SPEED_MAX_GAP_DEFAULT 50
+#define SC_HYPERVISOR_DEFAULT_CPU_SPEED 5.0
+#define SC_HYPERVISOR_DEFAULT_CUDA_SPEED 100.0
 
 struct size_request
 {

+ 2 - 0
src/Makefile.am

@@ -66,6 +66,7 @@ noinst_HEADERS = 						\
 	core/progress_hook.h                                    \
 	core/sched_policy.h					\
 	core/sched_ctx.h					\
+	core/sched_ctx_list.h					\
 	core/perfmodel/perfmodel.h				\
 	core/perfmodel/regression.h				\
 	core/jobs.h						\
@@ -170,6 +171,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
 	core/sched_policy.c					\
 	core/simgrid.c						\
 	core/sched_ctx.c					\
+	core/sched_ctx_list.c					\
 	core/parallel_task.c					\
 	core/detect_combined_workers.c				\
 	sched_policies/eager_central_policy.c			\

+ 26 - 79
src/core/sched_ctx.c

@@ -29,21 +29,16 @@ unsigned with_hypervisor = 0;
 double max_time_worker_on_ctx = -1.0;
 
 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
-static unsigned _starpu_worker_get_first_free_sched_ctx(struct _starpu_worker *worker);
-
-static unsigned _starpu_worker_get_sched_ctx_id(struct _starpu_worker *worker, unsigned sched_ctx_id);
 
 static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker)
 {
-	unsigned worker_sched_ctx_id = _starpu_worker_get_sched_ctx_id(worker, sched_ctx_id);
+	unsigned ret_sched_ctx = _starpu_sched_ctx_list_get_sched_ctx(worker->sched_ctx_list, sched_ctx_id);
 	/* the worker was planning to go away in another ctx but finally he changed his mind & 
 	   he's staying */
-	if (worker_sched_ctx_id  == STARPU_NMAX_SCHED_CTXS)
+	if (ret_sched_ctx == STARPU_NMAX_SCHED_CTXS)
 	{
-		worker_sched_ctx_id = _starpu_worker_get_first_free_sched_ctx(worker);
-		struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 		/* add context to worker */
-		worker->sched_ctx[worker_sched_ctx_id] = sched_ctx;
+		_starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx_id);
 		worker->nsched_ctxs++;
 		worker->active_ctx = sched_ctx_id;
 	}
@@ -53,12 +48,16 @@ static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_w
 
 void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker)
 {
-	unsigned worker_sched_ctx_id = _starpu_worker_get_sched_ctx_id(worker, sched_ctx_id);
+	unsigned ret_sched_ctx = _starpu_sched_ctx_list_get_sched_ctx(worker->sched_ctx_list, sched_ctx_id);
 	/* remove context from worker */
-	if(worker->sched_ctx[worker_sched_ctx_id]->sched_policy && worker->sched_ctx[worker_sched_ctx_id]->sched_policy->remove_workers)
-		worker->sched_ctx[worker_sched_ctx_id]->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1);
-	worker->sched_ctx[worker_sched_ctx_id] = NULL;
-	worker->nsched_ctxs--;
+	if(ret_sched_ctx != STARPU_NMAX_SCHED_CTXS)
+	{
+		struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+		if(sched_ctx && sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers)
+			sched_ctx->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1);
+		_starpu_sched_ctx_list_remove(&worker->sched_ctx_list, sched_ctx_id);
+		worker->nsched_ctxs--;
+	}
 	return;
 }
 
@@ -316,7 +315,9 @@ struct _starpu_sched_ctx*  _starpu_create_sched_ctx(struct starpu_sched_policy *
 		for(i = 0; i < nworkers; i++)
 		{
 			struct _starpu_worker *worker = _starpu_get_worker_struct(i);
-			worker->sched_ctx[_starpu_worker_get_first_free_sched_ctx(worker)] = sched_ctx;
+			worker->sched_ctx_list = (struct _starpu_sched_ctx_list*)malloc(sizeof(struct _starpu_sched_ctx_list));
+			_starpu_sched_ctx_list_init(worker->sched_ctx_list);
+			_starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx->id);
 			worker->nsched_ctxs++;
 		}
 	}
@@ -527,6 +528,9 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->empty_ctx_mutex);
 	sem_destroy(&sched_ctx->parallel_code_sem);
 	sched_ctx->id = STARPU_NMAX_SCHED_CTXS;
+#ifdef STARPU_HAVE_HWLOC
+	hwloc_bitmap_free(sched_ctx->hwloc_workers_set);
+#endif //STARPU_HAVE_HWLOC
 
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
@@ -721,24 +725,6 @@ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config)
 	return;
 }
 
-/* unused sched_ctx pointers of a worker are NULL */
-void _starpu_init_sched_ctx_for_worker(unsigned workerid)
-{
-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-	worker->sched_ctx = (struct _starpu_sched_ctx**)malloc(STARPU_NMAX_SCHED_CTXS * sizeof(struct _starpu_sched_ctx*));
-	unsigned i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		worker->sched_ctx[i] = NULL;
-
-	return;
-}
-
-void _starpu_delete_sched_ctx_for_worker(unsigned workerid)
-{
-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-	free(worker->sched_ctx);
-}
-
 /* sched_ctx aren't necessarly one next to another */
 /* for eg when we remove one its place is free */
 /* when we add  new one we reuse its place */
@@ -753,34 +739,6 @@ static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *
 	return STARPU_NMAX_SCHED_CTXS;
 }
 
-static unsigned _starpu_worker_get_first_free_sched_ctx(struct _starpu_worker *worker)
-{
-	unsigned i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		if(worker->sched_ctx[i] == NULL)
-			return i;
-	STARPU_ASSERT(0);
-	return STARPU_NMAX_SCHED_CTXS;
-}
-
-static unsigned _starpu_worker_get_sched_ctx_id(struct _starpu_worker *worker, unsigned sched_ctx_id)
-{
-	unsigned to_be_deleted = STARPU_NMAX_SCHED_CTXS;
-	unsigned i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-	{
-		if(worker->sched_ctx[i] != NULL)
-		{
-			if(worker->sched_ctx[i]->id == sched_ctx_id)
-				return i;
-			else if(worker->sched_ctx[i]->id == STARPU_NMAX_SCHED_CTXS)
-				to_be_deleted = i;
-		}
-	}
-
-	return to_be_deleted;
-}
-
 int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
@@ -1000,13 +958,6 @@ unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sc
 
 unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 {
-/* 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); */
-/* 	unsigned i; */
-/* 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) */
-/* 	{ */
-/* 		if(worker->sched_ctx[i] && worker->sched_ctx[i]->id == sched_ctx_id) */
-/* 			return 1; */
-/* 	} */
         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
         struct starpu_worker_collection *workers = sched_ctx->workers;
@@ -1083,10 +1034,10 @@ void starpu_sched_ctx_set_turn_to_other_ctx(int workerid, unsigned sched_ctx_id)
 
 	struct _starpu_sched_ctx *other_sched_ctx = NULL;
 	struct _starpu_sched_ctx *active_sched_ctx = NULL;
-	int i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	struct _starpu_sched_ctx_list *l = NULL;
+        for (l = worker->sched_ctx_list; l; l = l->next)
 	{
-		other_sched_ctx = worker->sched_ctx[i];
+		other_sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
 		if(other_sched_ctx != NULL && other_sched_ctx->id != STARPU_NMAX_SCHED_CTXS &&
 		   other_sched_ctx->id != 0 && other_sched_ctx->id != sched_ctx_id)
 		{
@@ -1277,7 +1228,6 @@ void _starpu_sched_ctx_rebind_thread_to_its_cpu(unsigned cpuid)
 static void _starpu_sched_ctx_get_workers_to_sleep(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-
 	struct starpu_worker_collection *workers = sched_ctx->workers;
 	struct starpu_sched_ctx_iterator it;
 	struct _starpu_worker *worker = NULL;
@@ -1294,7 +1244,7 @@ static void _starpu_sched_ctx_get_workers_to_sleep(unsigned sched_ctx_id)
 
 	while(workers->has_next(workers, &it))
 	{
-		int w = workers->get_next(workers, &it);
+		workers->get_next(workers, &it);
 		sem_wait(&sched_ctx->parallel_code_sem);
 	}
 	return;
@@ -1304,15 +1254,12 @@ void _starpu_sched_ctx_signal_worker_blocked(int workerid)
 {
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	struct _starpu_sched_ctx *sched_ctx = NULL;
-	unsigned i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	struct _starpu_sched_ctx_list *l = NULL;
+	for (l = worker->sched_ctx_list; l; l = l->next)
 	{
-		if(worker->sched_ctx[i] != NULL && worker->sched_ctx[i]->id != STARPU_NMAX_SCHED_CTXS
-			&& worker->sched_ctx[i]->id != 0)
-		{
-			sched_ctx = worker->sched_ctx[i];
+		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
+		if(sched_ctx->id != 0)
 			sem_post(&sched_ctx->parallel_code_sem);
-		}
 	}	
 	return;
 }

+ 1 - 6
src/core/sched_ctx.h

@@ -24,6 +24,7 @@
 #include <common/barrier_counter.h>
 #include <profiling/profiling.h>
 #include <semaphore.h>
+#include "sched_ctx_list.h"
 
 #ifdef STARPU_HAVE_HWLOC
 #include <hwloc.h>
@@ -117,12 +118,6 @@ struct _starpu_machine_config;
 /* init sched_ctx_id of all contextes*/
 void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config);
 
-/* init the list of contexts of the worker */
-void _starpu_init_sched_ctx_for_worker(unsigned workerid);
-
-/* free the list of contexts of the worker */
-void _starpu_delete_sched_ctx_for_worker(unsigned workerid);
-
 /* allocate all structures belonging to a context */
 struct _starpu_sched_ctx*  _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name);
 

+ 86 - 0
src/core/sched_ctx_list.c

@@ -0,0 +1,86 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2013  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "sched_ctx_list.h"
+
+void _starpu_sched_ctx_list_init(struct _starpu_sched_ctx_list *list)
+{
+	list->next = NULL;
+	list->sched_ctx = STARPU_NMAX_SCHED_CTXS;
+}
+
+void _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx)
+{
+	if((*list)->sched_ctx == STARPU_NMAX_SCHED_CTXS)
+		(*list)->sched_ctx = sched_ctx;
+	else
+	{
+		struct _starpu_sched_ctx_list *l = (struct _starpu_sched_ctx_list*)malloc(sizeof(struct _starpu_sched_ctx_list));
+		l->sched_ctx = sched_ctx;
+		l->next = *list;
+		*list = l;
+	}
+}
+
+void _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx)
+{
+	struct _starpu_sched_ctx_list *l = NULL;
+	struct _starpu_sched_ctx_list *prev = NULL;
+	for (l = (*list); l; l = l->next)
+	{
+		if(l->sched_ctx == sched_ctx)
+			break;
+		prev = l;
+	}
+	struct _starpu_sched_ctx_list *next = NULL;
+	if(l->next)
+		next = l->next;
+	free(l);
+	l = NULL;
+	
+	if(next)
+	{
+		if(prev)
+			prev->next = next;
+		else
+			*list = next;
+	}
+}
+
+unsigned _starpu_sched_ctx_list_get_sched_ctx(struct _starpu_sched_ctx_list *list, unsigned sched_ctx)
+{
+	struct _starpu_sched_ctx_list *l = NULL;
+	for (l = list; l; l = l->next)
+	{
+		if(l->sched_ctx == sched_ctx)
+			return sched_ctx;
+	}
+	return STARPU_NMAX_SCHED_CTXS;
+}
+
+void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list)
+{
+	while(*list)
+	{
+		struct _starpu_sched_ctx_list *next = (*list)->next;
+		free(*list);
+		*list = NULL;
+		if(next)
+			*list = next;
+	}
+		
+}

+ 32 - 0
src/core/sched_ctx_list.h

@@ -0,0 +1,32 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __SCHED_CONTEXT_LIST_H__
+#define __SCHED_CONTEXT_LIST_H__
+
+struct _starpu_sched_ctx_list
+{
+	struct _starpu_sched_ctx_list *next;
+	unsigned sched_ctx;
+};
+
+void _starpu_sched_ctx_list_init(struct _starpu_sched_ctx_list *list);
+void _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx);
+void _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx);
+unsigned _starpu_sched_ctx_list_get_sched_ctx(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
+void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list);
+
+#endif // __SCHED_CONTEXT_H__

+ 33 - 54
src/core/sched_policy.c

@@ -209,19 +209,19 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		starpu_prefetch_task_input_on_node(task, memory_node);
 
 	/* if we push a task on a specific worker, notify all the sched_ctxs the worker belongs to */
-	unsigned i;
 	struct _starpu_sched_ctx *sched_ctx;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-	{
-		sched_ctx = worker->sched_ctx[i];
-		if (sched_ctx != NULL && sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify)
+	struct _starpu_sched_ctx_list *l = NULL;
+        for (l = worker->sched_ctx_list; l; l = l->next)
+        {
+		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
+		if (sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify)
 			sched_ctx->sched_policy->push_task_notify(task, workerid, sched_ctx->id);
 	}
 
 #ifdef STARPU_USE_SC_HYPERVISOR
 	starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx);
 #endif //STARPU_USE_SC_HYPERVISOR
-
+	unsigned i;
 	if (is_basic_worker)
 	{
 		unsigned node = starpu_worker_get_memory_node(workerid);
@@ -536,40 +536,33 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 }
 
 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
-{
-	while(1)
+{	
+	struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
+	unsigned smallest_counter =  worker->nsched_ctxs;
+	struct _starpu_sched_ctx_list *l = NULL;
+	for (l = worker->sched_ctx_list; l; l = l->next)
 	{
-		struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
-		unsigned smallest_counter =  worker->nsched_ctxs;
-		unsigned i;
-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
+		if(worker->removed_from_ctx[sched_ctx->id])
+			return sched_ctx;
+		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
+		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
 		{
-			sched_ctx = worker->sched_ctx[i];
-			
-			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && worker->removed_from_ctx[sched_ctx->id])
-				return sched_ctx;
-			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS &&
-			   sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
-			   smallest_counter > sched_ctx->pop_counter[worker->workerid])
-			{
-				good_sched_ctx = sched_ctx;
-				smallest_counter = sched_ctx->pop_counter[worker->workerid];
-			}
+			good_sched_ctx = sched_ctx;
+			smallest_counter = sched_ctx->pop_counter[worker->workerid];
 		}
-		
-		if(good_sched_ctx == NULL)
+	}
+	
+	if(good_sched_ctx == NULL)
+	{
+		for (l = worker->sched_ctx_list; l; l = l->next)
 		{
-			for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-			{
-				sched_ctx = worker->sched_ctx[i];
-				if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
-					sched_ctx->pop_counter[worker->workerid] = 0;
-			}
-			
-			continue;
+			sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
+			sched_ctx->pop_counter[worker->workerid] = 0;
 		}
-		return good_sched_ctx;
+		return _starpu_get_sched_ctx_struct(worker->sched_ctx_list->sched_ctx);
 	}
+	return good_sched_ctx;
 }
 
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
@@ -592,45 +585,31 @@ pick:
 
 	/* get tasks from the stacks of the strategy */
 	if(!task)
-	{
-		struct _starpu_sched_ctx *sched_ctx;
-
-		//unsigned lucky_ctx = STARPU_NMAX_SCHED_CTXS;
-
-		int been_here[STARPU_NMAX_SCHED_CTXS];
-		int i;
-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-			been_here[i] = 0;
+	{		
+		struct _starpu_sched_ctx *sched_ctx ;
 
-		while(!task)
+		if(!task)
 		{
 			if(worker->nsched_ctxs == 1)
 				sched_ctx = _starpu_get_initial_sched_ctx();
 			else
 				sched_ctx = _get_next_sched_ctx_to_pop_into(worker);
-			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+
+			if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
 			{
 				if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
 				{
 					task = sched_ctx->sched_policy->pop_task(sched_ctx->id);
-					//lucky_ctx = sched_ctx->id;
 				}
 			}
 
-			if(!task && worker->removed_from_ctx[sched_ctx->id])
+			if(!task && sched_ctx && worker->removed_from_ctx[sched_ctx->id])
 			{
 				_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
 				worker->removed_from_ctx[sched_ctx->id] = 0;
 			}
 
-			if((!task && sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
-				break;
-
-
-			been_here[sched_ctx->id] = 1;
-
 			sched_ctx->pop_counter[worker->workerid]++;
-
 		}
 	  }
 

+ 0 - 4
src/core/topology.c

@@ -641,7 +641,6 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 		config->workers[worker_idx].devid = miccore_id;
 		config->workers[worker_idx].worker_mask = STARPU_MIC;
 		config->worker_mask |= STARPU_MIC;
-		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 	}
 
 	topology->nworkers += topology->nmiccores[mic_idx];
@@ -779,7 +778,6 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
-		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_CUDA;
 
 		struct handle_entry *entry;
@@ -854,7 +852,6 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
-		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_OPENCL;
 	}
 
@@ -979,7 +976,6 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = cpu;
 		config->workers[worker_idx].worker_mask = STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
-		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 	}
 
 	topology->nworkers += topology->ncpus;

+ 1 - 1
src/core/workers.c

@@ -1102,7 +1102,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 
 out:
 		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
-		_starpu_delete_sched_ctx_for_worker(workerid);
+		_starpu_sched_ctx_list_delete(worker->sched_ctx_list);
 		_starpu_job_list_delete(worker->terminated_jobs);
 	}
 }

+ 2 - 2
src/core/workers.h

@@ -29,7 +29,7 @@
 #include <core/topology.h>
 #include <core/errorcheck.h>
 #include <core/sched_ctx.h>
-
+#include <core/sched_ctx_list.h>
 #ifdef STARPU_HAVE_HWLOC
 #include <hwloc.h>
 #endif
@@ -84,7 +84,7 @@ struct _starpu_worker
 	char short_name[10];
 	unsigned run_by_starpu; /* Is this run by StarPU or directly by the application ? */
 
-	struct _starpu_sched_ctx **sched_ctx;
+	struct _starpu_sched_ctx_list *sched_ctx_list;
 	unsigned nsched_ctxs; /* the no of contexts a worker belongs to*/
 	struct _starpu_barrier_counter tasks_barrier; /* wait for the tasks submitted */
        

+ 60 - 13
src/debug/traces/starpu_fxt.c

@@ -17,6 +17,7 @@
 #include <starpu.h>
 #include <common/config.h>
 #include <common/uthash.h>
+#include <string.h>
 
 #ifdef STARPU_HAVE_POTI
 #include <poti.h>
@@ -1890,25 +1891,52 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 	options->nworkers = nworkers;
 }
 
-static FILE *out_data_total_trace_file;
+#define DATA_STR_MAX_SIZE 15
 
 struct parse_task
 {
 	unsigned exec_time;
 	unsigned data_total;
+	char *codelet_name;
 };
 
 static struct parse_task tasks[STARPU_NMAXWORKERS];
 
+struct starpu_data_trace_kernel
+{
+	UT_hash_handle hh;
+	char *name;
+	FILE *file;
+} *kernels;
+
 #define NANO_SEC_TO_MILI_SEC 0.000001
 
+static FILE *codelet_list;
+
 static void write_task(struct parse_task pt)
 {
+	struct starpu_data_trace_kernel *kernel;
+	char *codelet_name = pt.codelet_name;
+	HASH_FIND_STR(kernels, codelet_name, kernel);
+	//fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name);
+	if(kernel == NULL)
+	{
+		kernel = malloc(sizeof(*kernel));
+		kernel->name = strdup(codelet_name);
+		//fprintf(stderr, "%s\n", kernel->name);
+		kernel->file = fopen(codelet_name, "w+");
+		if(!kernel->file)
+		{
+			perror("open failed :");
+			exit(-1);
+		}
+		HASH_ADD_STR(kernels, name, kernel); 
+		fprintf(codelet_list, "%s\n", codelet_name);
+	}
 	double time = pt.exec_time * NANO_SEC_TO_MILI_SEC;
-	fprintf(out_data_total_trace_file, "%lf %d\n", time, pt.data_total);
+	fprintf(kernel->file, "%lf %d\n", time, pt.data_total);
 }
 
-
 void starpu_fxt_write_data_trace(char *filename_in)
 {
 	int fd_in;
@@ -1927,17 +1955,21 @@ void starpu_fxt_write_data_trace(char *filename_in)
 	        exit(-1);
 	}
 
+	codelet_list = fopen("codelet_list", "w+");
+	if(!codelet_list)
+	{
+		perror("open failed :");
+		exit(-1);
+	}
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
-	out_data_total_trace_file = fopen("data_total.txt", "w+");
-	if(!out_data_total_trace_file)
-        {
-                perror("open failed :");
-                exit(-1);
-        }
-
 	struct fxt_ev_64 ev;
+
+	unsigned workerid;
+	unsigned long has_name = 0;
+
 	while(1)
 	{
 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
@@ -1946,8 +1978,6 @@ void starpu_fxt_write_data_trace(char *filename_in)
 			break;
 		}
 		
-		unsigned workerid;
-
 		switch (ev.code)
 		{
 		case _STARPU_FUT_WORKER_INIT_START:
@@ -1957,6 +1987,9 @@ void starpu_fxt_write_data_trace(char *filename_in)
 		case _STARPU_FUT_START_CODELET_BODY:
 			workerid = find_worker_id(ev.param[2]);
 			tasks[workerid].exec_time = ev.time;
+			has_name = ev.param[3];
+			tasks[workerid].codelet_name = strdup(has_name ? (char *) &ev.param[4] : "unknow");
+			//fprintf(stderr, "start codelet :[%d][%s]\n", workerid, tasks[workerid].codelet_name);
 			break;
 			
 		case _STARPU_FUT_END_CODELET_BODY:
@@ -1985,11 +2018,25 @@ void starpu_fxt_write_data_trace(char *filename_in)
 	        exit(-1);
 	}
 	
-	if(fclose(out_data_total_trace_file))
+	if(fclose(codelet_list))
 	{
 		perror("close failed :");
 		exit(-1);
 	}
+	
+	struct starpu_data_trace_kernel *kernel, *tmp;	
 
+	HASH_ITER(hh, kernels, kernel, tmp)
+	{
+		if(fclose(kernel->file))
+		{ 
+			perror("close failed :");
+			exit(-1);
+		}
+		HASH_DEL(kernels, kernel);
+		free(kernel->name);
+		free(kernel);
+	}
+		
 }
 #endif // STARPU_USE_FXT

+ 4 - 4
src/drivers/driver_common/driver_common.c

@@ -224,11 +224,11 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 #ifdef STARPU_USE_SC_HYPERVISOR
 		struct _starpu_sched_ctx *sched_ctx = NULL;
 		struct starpu_sched_ctx_performance_counters *perf_counters = NULL;
-		int j;
-		for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
+		struct _starpu_sched_ctx_list *l = NULL;
+		for (l = args->sched_ctx_list; l; l = l->next)
 		{
-			sched_ctx = args->sched_ctx[j];
-			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+			sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
+			if(sched_ctx->id != 0)
 			{
 				perf_counters = sched_ctx->perf_counters;
 				if(perf_counters != NULL && perf_counters->notify_idle_cycle)

+ 5 - 3
src/drivers/mp_common/sink_common.c

@@ -204,7 +204,6 @@ static void _starpu_sink_common_recv_workers(struct _starpu_mp_node * node, void
 		workers[i].current_task = NULL;
 		workers[i].set = NULL;
 		workers[i].terminated_jobs = NULL;
-		workers[i].sched_ctx = NULL;
 	
 		//_starpu_barrier_counter_init(&workers[i].tasks_barrier, 1);
 		//_starpu_barrier_counter_destroy(&workers[i].tasks_barrier);
@@ -442,9 +441,12 @@ static int _starpu_sink_common_get_current_rank(int workerid, struct _starpu_com
 
 /* Execute the task 
  */
-static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int coreid, struct mp_task *task, struct _starpu_worker * worker)
+static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int coreid, struct _starpu_worker * worker)
 {
 	struct _starpu_combined_worker * combined_worker = NULL;
+	struct mp_task* task = node->run_table[coreid];
+
+
 	/* If it's a parallel task */
 	if(task->is_parallel_task)
 	{
@@ -540,7 +542,7 @@ void* _starpu_sink_thread(void * thread_arg)
 		/*Wait there is a task available */
 		sem_wait(&node->sem_run_table[coreid]);
 		if(node->run_table[coreid] != NULL)
-			_starpu_sink_common_execute_kernel(node,coreid,node->run_table[coreid],worker);
+			_starpu_sink_common_execute_kernel(node,coreid,worker);
 
 	}
 	pthread_exit(NULL);

+ 2 - 1
src/drivers/mp_common/source_common.c

@@ -131,7 +131,7 @@ static int _starpu_src_common_handle_async(const struct _starpu_mp_node *node ST
 	return 1;
 }
 
-
+/* Handle all message which have been stored in the message_queue */
 static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
@@ -209,6 +209,7 @@ static void _starpu_src_common_recv_async(struct _starpu_mp_node * node)
 	enum _starpu_mp_command answer;
 
 	int completed = 0;	
+	/*While the waited completed execution message has not been receive*/
 	while(!completed)
 	{
 		answer = _starpu_mp_common_recv_command (node, arg, arg_size);

+ 2 - 2
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -53,8 +53,6 @@ struct _starpu_dmda_data
 	long int ready_task_cnt;
 };
 
-static double idle_power = 0.0;
-
 /* The dmda scheduling policy uses
  *
  * alpha * T_computation + beta * T_communication + gamma * Consumption
@@ -70,6 +68,7 @@ static double idle_power = 0.0;
 static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
 static double beta = _STARPU_SCHED_BETA_DEFAULT;
 static double _gamma = _STARPU_SCHED_GAMMA_DEFAULT;
+static double idle_power = 0.0;
 static const float alpha_minimum=0;
 static const float alpha_maximum=10.0;
 static const float beta_minimum=0;
@@ -866,6 +865,7 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 		dt->idle_power = atof(strval_idle_power);
 
 #ifdef STARPU_USE_TOP
+	/* FIXME: broken, needs to access context variable */
 	starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
 					    alpha_minimum, alpha_maximum, param_modified);
 	starpu_top_register_parameter_float("DMDA_BETA", &beta,

+ 1 - 1
src/sched_policies/parallel_heft.c

@@ -502,7 +502,7 @@ static int parallel_heft_push_task(struct starpu_task *task)
 	return ret_val;
 }
 
-static void parallel_heft_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
+static void parallel_heft_add_workers(__attribute__((unused)) unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 {
 	int workerid;
 	unsigned i;

+ 2 - 0
tests/datawizard/acquire_cb_insert.c

@@ -77,6 +77,8 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	if(starpu_cpu_worker_get_count() == 0) return STARPU_TEST_SKIPPED;
+
 	/* Declare x */
 	starpu_variable_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
 

+ 6 - 2
tests/datawizard/commute.c

@@ -85,7 +85,9 @@ void end(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
 {
 	int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
 
-	if (codelet_end.modes[0] & STARPU_W)
+	enum starpu_data_access_mode end_mode = *(enum starpu_data_access_mode*) _args;
+
+	if (end_mode & STARPU_W)
 		(*x)++;
 }
 
@@ -105,7 +107,7 @@ static void test(enum starpu_data_access_mode begin_mode, enum starpu_data_acces
 	int ret;
 
 	codelet_begin.modes[0] = begin_mode;
-	codelet_end.modes[0] = end_mode;
+	codelet_end.modes[0] = end_mode;	
 
 	begin_t = starpu_task_create();
 	begin_t->cl = &codelet_begin;
@@ -130,6 +132,8 @@ static void test(enum starpu_data_access_mode begin_mode, enum starpu_data_acces
 	end_t->cl = &codelet_end;
 	end_t->handles[0] = x_handle;
 	end_t->detach = 0;
+	end_t->cl_arg = &end_mode;
+	end_t->cl_arg_size = sizeof(end_mode);
 
 	if (starpu_task_submit(begin_t) == -ENODEV)
 		exit(STARPU_TEST_SKIPPED);

+ 2 - 0
tests/datawizard/data_invalidation.c

@@ -141,6 +141,8 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	if(starpu_cpu_worker_get_count() == 0) return STARPU_TEST_SKIPPED;
+
 	/* The buffer should never be explicitely allocated */
 	starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char));
 

+ 0 - 1
tests/errorcheck/invalid_blocking_calls.c

@@ -47,7 +47,6 @@ static struct starpu_codelet wrong_codelet =
 	.cpu_funcs = {wrong_func, NULL},
 	.cuda_funcs = {wrong_func, NULL},
         .opencl_funcs = {wrong_func, NULL},
-	.cpu_funcs_name = {"wrong_func", NULL},
 	.model = NULL,
 	.nbuffers = 0
 };

+ 61 - 7
tools/starpu_fxt_data_trace.c

@@ -1,19 +1,28 @@
 #include <stdio.h>
 #include <config.h>
 #include <starpu.h>
+#include <string.h>
 
 #define PROGNAME "starpu_fxt_data_trace"
+#define MAX_LINE_SIZE 100
 
 static void usage(char *progname)
 {
-	fprintf(stderr, "Usage : %s <filename>\n", progname);
+	fprintf(stderr, "Usage : %s <filename> [codelet...]\n", progname);
 	exit(77);
 }
 
-static void write_plt(){
+static void write_plt(int argc, char **argv){
+	FILE *codelet_list = fopen("codelet_list", "r");
+	if(!codelet_list)
+	{
+		perror("Error while opening codelet list:");
+		exit(-1);
+	}
+	char codelet_name[MAX_LINE_SIZE];
 	FILE *plt = fopen("data_trace.gp", "w+");
 	if(!plt){
-		fprintf(stderr, "Error while creating data_trace.plt");
+		perror("Error while creating data_trace.plt:");
 		exit(-1);
 	}
 
@@ -25,8 +34,53 @@ static void write_plt(){
 	fprintf(plt, "set logscale y\n");
 	fprintf(plt, "set xlabel \"tasks size (ms)\"\n");
 	fprintf(plt, "set ylabel \"data size (B)\"\n");
-	fprintf(plt, "plot \"data_total.txt\" using 1:2 with dots lw 1\n");
-	if(fclose(plt)){
+	fprintf(plt, "plot ");
+	int c_iter;
+	char *v_iter;
+	int begin = 1;
+	while(fgets(codelet_name, MAX_LINE_SIZE, codelet_list) != NULL)
+	{
+		if(argc == 0)
+		{
+			if(begin)
+				begin = 0;
+			else
+			fprintf(plt, ", ");
+		}
+		int size = strlen(codelet_name);
+		if(size > 0)
+			codelet_name[size-1] = '\0';
+		if(argc != 0)
+		{
+			for(c_iter = 0, v_iter = argv[c_iter];
+			    c_iter < argc;
+			    c_iter++, v_iter = argv[c_iter])
+			{
+				if(!strcmp(v_iter, codelet_name))
+				{
+					if(begin)
+						begin = 0;
+					else
+						fprintf(plt, ", ");
+					fprintf(plt, "\"%s\" using 1:2 with dots lw 1 title \"%s\"", codelet_name, codelet_name);
+				}
+			}
+		}
+		else
+		{
+			fprintf(plt, "\"%s\" using 1:2 with dots lw 1 title \"%s\"", codelet_name, codelet_name);
+		}
+	}
+	fprintf(plt, "\n");
+
+	if(fclose(codelet_list))
+	{
+		perror("close failed :");
+		exit(-1);
+	}
+
+	if(fclose(plt))
+	{
 		perror("close failed :");
 		exit(-1);
 	}
@@ -34,11 +88,11 @@ static void write_plt(){
 
 int main(int argc, char **argv)
 {
-	if(argc != 2)
+	if(argc < 2)
 	{
 		usage(argv[0]);
 	}
 	starpu_fxt_write_data_trace(argv[1]);
-	write_plt();
+	write_plt(argc - 2, argv + 2);
 	return 0;
 }