Browse Source

Bug fixing (resize for magma)

Andra Hugo 13 years ago
parent
commit
fd6e0aad2a

+ 2 - 1
configure.ac

@@ -161,7 +161,7 @@ fi
 
 ###############################################################################
 #									      #
-#				SCED_CTX settings			      #
+#				SCHED_CTX settings			      #
 #									      #
 ###############################################################################
 AC_MSG_CHECKING(maximum number of sched_ctxs)
@@ -179,6 +179,7 @@ AC_ARG_ENABLE([sched_ctx_hypervisor],
 
 if test "x$enable_sched_ctx_hypervisor" = "xyes"; then
    AC_DEFINE(STARPU_USE_SCHED_CTX_HYPERVISOR, [1], [enable sched_ctx_hypervisor lib])
+   PKG_CHECK_MODULES([SCHED_CTX_HYPERVISOR], [libsched_ctx_hypervisor], [], build_sched_ctx_hypervisor="yes")
    build_sched_ctx_hypervisor="yes"
 else
    build_sched_ctx_hypervisor="no"

+ 1 - 0
include/starpu_config.h.in

@@ -46,6 +46,7 @@
 #undef STARPU_MAXCUDADEVS
 #undef STARPU_MAXOPENCLDEVS
 #undef STARPU_NMAXWORKERS
+#undef STARPU_NMAX_SCHED_CTXS
 #undef STARPU_MAXIMPLEMENTATIONS
 
 #undef STARPU_HAVE_LIBNUMA

+ 0 - 2
sched_ctx_hypervisor/Makefile.am

@@ -17,7 +17,5 @@ SUBDIRS = src examples
 
 libsched_ctx_hypervisor_la_includedir=$(includedir)
 
-noinst_HEADERS = src/sched_ctx_hypervisor_intern.h
-
 include_HEADERS = include/sched_ctx_hypervisor.h
 

+ 64 - 64
sched_ctx_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c

@@ -309,71 +309,71 @@ void construct_contexts(void (*bench)(float*, unsigned, unsigned))
 
 void set_hypervisor_conf(int event, int task_tag)
 {
-	unsigned *id = pthread_getspecific(key);
-	if(*id == 0)
-	{
-		if(event == END_BENCH)
-		{
-			if(it < 2)
-			{
-				sched_ctx_hypervisor_ioctl(p2.ctx,
-							   HYPERVISOR_MIN_WORKERS, 2,
-							   HYPERVISOR_MAX_WORKERS, 4,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-
-				printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag);
-				sched_ctx_hypervisor_ioctl(p1.ctx,
-							   HYPERVISOR_MIN_WORKERS, 6,
-							   HYPERVISOR_MAX_WORKERS, 8,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-				printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag);
-				sched_ctx_hypervisor_resize(p1.ctx, task_tag);
-			}
-			if(it == 2)
-			{
-				sched_ctx_hypervisor_ioctl(p2.ctx,
-							   HYPERVISOR_MIN_WORKERS, 12,
-							   HYPERVISOR_MAX_WORKERS, 12,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-				printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag);
-				sched_ctx_hypervisor_ioctl(p1.ctx,
-							   HYPERVISOR_MIN_WORKERS, 0,
-							   HYPERVISOR_MAX_WORKERS, 0,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-				printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag);
-				sched_ctx_hypervisor_resize(p1.ctx, task_tag);
-			}
-			it++;
+/* 	unsigned *id = pthread_getspecific(key); */
+/* 	if(*id == 0) */
+/* 	{ */
+/* 		if(event == END_BENCH) */
+/* 		{ */
+/* 			if(it < 2) */
+/* 			{ */
+/* 				sched_ctx_hypervisor_ioctl(p2.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 2, */
+/* 							   HYPERVISOR_MAX_WORKERS, 4, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+
+/* 				printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag); */
+/* 				sched_ctx_hypervisor_ioctl(p1.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 6, */
+/* 							   HYPERVISOR_MAX_WORKERS, 8, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+/* 				printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag); */
+/* 				sched_ctx_hypervisor_resize(p1.ctx, task_tag); */
+/* 			} */
+/* 			if(it == 2) */
+/* 			{ */
+/* 				sched_ctx_hypervisor_ioctl(p2.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 12, */
+/* 							   HYPERVISOR_MAX_WORKERS, 12, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+/* 				printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag); */
+/* 				sched_ctx_hypervisor_ioctl(p1.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 0, */
+/* 							   HYPERVISOR_MAX_WORKERS, 0, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+/* 				printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag); */
+/* 				sched_ctx_hypervisor_resize(p1.ctx, task_tag); */
+/* 			} */
+/* 			it++; */
 				
-		}
-	}
-	else
-	{
-		if(event == END_BENCH)
-		{
-			if(it2 < 3)
-			{
-				sched_ctx_hypervisor_ioctl(p1.ctx,
-							   HYPERVISOR_MIN_WORKERS, 6,
-							   HYPERVISOR_MAX_WORKERS, 12,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-				printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag);
-				sched_ctx_hypervisor_ioctl(p2.ctx,
-							   HYPERVISOR_MIN_WORKERS, 0,
-							   HYPERVISOR_MAX_WORKERS, 0,
-							   HYPERVISOR_TIME_TO_APPLY, task_tag,
-							   NULL);
-				printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag);
-				sched_ctx_hypervisor_resize(p2.ctx, task_tag);
-			}
-			it2++;
-		}
-	}
+/* 		} */
+/* 	} */
+/* 	else */
+/* 	{ */
+/* 		if(event == END_BENCH) */
+/* 		{ */
+/* 			if(it2 < 3) */
+/* 			{ */
+/* 				sched_ctx_hypervisor_ioctl(p1.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 6, */
+/* 							   HYPERVISOR_MAX_WORKERS, 12, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+/* 				printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag); */
+/* 				sched_ctx_hypervisor_ioctl(p2.ctx, */
+/* 							   HYPERVISOR_MIN_WORKERS, 0, */
+/* 							   HYPERVISOR_MAX_WORKERS, 0, */
+/* 							   HYPERVISOR_TIME_TO_APPLY, task_tag, */
+/* 							   NULL); */
+/* 				printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag); */
+/* 				sched_ctx_hypervisor_resize(p2.ctx, task_tag); */
+/* 			} */
+/* 			it2++; */
+/* 		} */
+/* 	} */
 
 	/* if(*id == 1) */
 	/* { */

+ 0 - 2
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -1,6 +1,4 @@
 #include <starpu.h>
-#include <../common/config.h>
-#include <../common/htable32.h>
 #include <pthread.h>
 
 /* ioctl properties*/

+ 3 - 1
sched_ctx_hypervisor/src/Makefile.am

@@ -27,4 +27,6 @@ libsched_ctx_hypervisor_la_LIBADD = $(top_builddir)/src/libstarpu.la
 
 libsched_ctx_hypervisor_la_SOURCES = 	\
 	sched_ctx_hypervisor.c		\
-	hypervisor_policies/simple_policy.c
+	hypervisor_policies/simple_policy.c
+
+noinst_HEADERS = sched_ctx_hypervisor_intern.h

+ 4 - 3
sched_ctx_hypervisor/src/hypervisor_policies/simple_policy.c

@@ -245,7 +245,7 @@ static unsigned _get_nworkers_to_move(unsigned req_sched_ctx)
 			else
 				nworkers_to_move = potential_moving_workers - (config->min_nworkers - nfixed_workers);	
 		}
-		printf("nworkers = %d nworkers_to_move = %d max_nworkers=%d\n", nworkers, nworkers_to_move, config->max_nworkers);
+//		printf("nworkers = %d nworkers_to_move = %d max_nworkers=%d\n", nworkers, nworkers_to_move, config->max_nworkers);
 		if((nworkers - nworkers_to_move) > config->max_nworkers)
 			nworkers_to_move = nworkers - config->max_nworkers;
 	}
@@ -263,7 +263,7 @@ static int _find_fastest_sched_ctx()
 	for(i = 0; i < nsched_ctxs; i++)
 	{
 		curr_debit = sched_ctx_hypervisor_get_debit(sched_ctxs[i]);
-		if(fastest_debit <= curr_debit)
+		if(fastest_debit < curr_debit)
 		{
 			fastest_debit = curr_debit;
 			fastest_sched_ctx = sched_ctxs[i];
@@ -284,7 +284,7 @@ static int _find_slowest_sched_ctx()
 	for(i = 0; i < nsched_ctxs; i++)
 	{
 		curr_debit = sched_ctx_hypervisor_get_debit(sched_ctxs[i]);
-		if(slowest_debit >= curr_debit)
+		if(slowest_debit > curr_debit)
 		{
 			slowest_debit = curr_debit;
 			slowest_sched_ctx = sched_ctxs[i];
@@ -358,6 +358,7 @@ static void simple_manage_task_flux(unsigned curr_sched_ctx)
 	
 	int slow_sched_ctx = _find_slowest_sched_ctx();
 	int fast_sched_ctx = _find_fastest_sched_ctx();
+
 	if(slow_sched_ctx != fast_sched_ctx && slow_sched_ctx != -1 && fast_sched_ctx != -1)
 	{
 		if(curr_sched_ctx == slow_sched_ctx)

+ 40 - 26
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -83,11 +83,15 @@ void sched_ctx_hypervisor_shutdown(void)
 	int i;
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 	{
-		sched_ctx_hypervisor_stop_resize(hypervisor.sched_ctxs[i]);
                 if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && hypervisor.nsched_ctxs > 0)
-			sched_ctx_hypervisor_ignore_ctx(i);
+		{
+			sched_ctx_hypervisor_stop_resize(hypervisor.sched_ctxs[i]);
+			sched_ctx_hypervisor_ignore_ctx(hypervisor.sched_ctxs[i]);
+		}
 	}
 	free(criteria);
+	criteria = NULL;
+
 	pthread_mutex_destroy(&act_hypervisor_mutex);
 }
 
@@ -158,14 +162,14 @@ void sched_ctx_hypervisor_ignore_ctx(unsigned sched_ctx)
 
 void sched_ctx_hypervisor_set_config(unsigned sched_ctx, void *config)
 {
-	    printf("%d: ", sched_ctx );
+	printf("%d: ", sched_ctx );
 	if(hypervisor.sched_ctx_w[sched_ctx].config != NULL && config != NULL)
-	  {
+	{
 		hypervisor.policy.update_config(hypervisor.sched_ctx_w[sched_ctx].config, config);
-	  }
+	}
 	else
 		hypervisor.sched_ctx_w[sched_ctx].config = config;
-
+	
 	return;
 }
 
@@ -348,17 +352,23 @@ void sched_ctx_hypervisor_steal_workers(unsigned sched_ctx, int *workerids, int
 
 static void reset_idle_time_cb(unsigned sched_ctx, int worker)
 {
-	if(hypervisor.resize[sched_ctx])
-		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
+	if(criteria != NULL)
+	{
+		if(hypervisor.resize[sched_ctx])
+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
+	}
 }
 
 static void idle_time_cb(unsigned sched_ctx, int worker, double idle_time)
 {
-	if(hypervisor.resize[sched_ctx] && hypervisor.nsched_ctxs > 1 && hypervisor.policy.manage_idle_time)
+	if(criteria != NULL)
 	{
-		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] += idle_time;
-		hypervisor.policy.manage_idle_time(sched_ctx, worker, hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker]);
-
+		if(hypervisor.resize[sched_ctx] && hypervisor.nsched_ctxs > 1 && hypervisor.policy.manage_idle_time)
+		{
+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] += idle_time;
+			hypervisor.policy.manage_idle_time(sched_ctx, worker, hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker]);
+			
+		}
 	}
 	return;
 }
@@ -386,26 +396,20 @@ double sched_ctx_hypervisor_get_debit(unsigned sched_ctx)
 
 	int npushed_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
 	int npoped_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].poped_tasks);
-	if(hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks != npushed_tasks || hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks!= npoped_tasks)
+	STARPU_ASSERT(npoped_tasks <= npushed_tasks);
+	if(npushed_tasks > 0 && npoped_tasks > 0)
 	{
-		hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks = npushed_tasks;
-		hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks = npoped_tasks;
-		
-		STARPU_ASSERT(npoped_tasks <= npushed_tasks);
-		if(npushed_tasks > 0 && npoped_tasks > 0)
-		{
-			double debit = (((double)npoped_tasks)*1.0)/((double)npushed_tasks * 1.0);
-			return debit;
-		}
+		double debit = (((double)npoped_tasks)*1.0)/((double)npushed_tasks * 1.0);
+		return debit;
 	}
-
+	
 	return 0.0;
 }
 
 static void pushed_task_cb(unsigned sched_ctx, int worker)
 {	
 	hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++;
-       
+	
 	int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
 	
 	if(!imposed_resize)
@@ -416,8 +420,18 @@ static void poped_task_cb(unsigned sched_ctx, int worker)
 {
 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
 	
-	/* if(hypervisor.nsched_ctxs > 1) */
-	/* 	hypervisor.policy.manage_task_flux(sched_ctx); */
+	if(hypervisor.nsched_ctxs > 1)
+	{
+		int npushed_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
+		int npoped_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].poped_tasks);
+		if(hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks != npushed_tasks &&
+		   hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks != npoped_tasks)
+		{
+			hypervisor.policy.manage_task_flux(sched_ctx);
+			hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks = npushed_tasks;
+			hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks = npoped_tasks;
+		}
+	}
 }
 
 static void post_exec_hook_cb(unsigned sched_ctx, int task_tag)

+ 1 - 0
sched_ctx_hypervisor/src/sched_ctx_hypervisor_intern.h

@@ -1,4 +1,5 @@
 #include <sched_ctx_hypervisor.h>
+#include <../common/htable32.h>
 
 struct sched_ctx_wrapper {
 	unsigned sched_ctx;

+ 31 - 0
src/core/perfmodel/perfmodel_bus.c

@@ -921,6 +921,37 @@ static void write_bus_bandwidth_file_content(void)
 	fclose(f);
 }
 
+void starpu_print_bus_bandwidth(FILE *f)
+{
+  int src, dst, maxnode;
+
+  maxnode = ncuda;
+#ifdef STARPU_USE_OPENCL
+  maxnode += nopencl;
+#endif
+
+  fprintf(f, "from\t");
+  fprintf(f, "to RAM\t\t");
+  for (dst = 0; dst < ncuda; dst++)
+    fprintf(f, "to CUDA %d\t", dst);
+  for (dst = 0; dst < nopencl; dst++)
+    fprintf(f, "to OpenCL %d\t", dst);
+  fprintf(f, "\n");
+
+  for (src = 0; src <= maxnode; src++)
+    {
+      if (!src)
+	fprintf(f, "RAM\t");
+      else if (src <= ncuda)
+	fprintf(f, "CUDA %d\t", src-1);
+      else
+	fprintf(f, "OpenCL%d\t", src-ncuda-1);
+      for (dst = 0; dst <= maxnode; dst++)
+	fprintf(f, "%f\t", bandwidth_matrix[src][dst]);
+
+      fprintf(f, "\n");
+    }
+}
 static void generate_bus_bandwidth_file(void)
 {
 	if (!was_benchmarked)

+ 33 - 4
src/core/sched_policy.c

@@ -284,6 +284,25 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 	}
 }
 
+static int _starpu_nworkers_able_to_execute_task(struct starpu_task *task, struct starpu_sched_ctx *sched_ctx)
+{
+  int worker = -1, nworkers = 0;
+  struct worker_collection *workers = sched_ctx->workers;
+  if(workers->init_cursor)
+    workers->init_cursor(workers);
+  
+  while(workers->has_next(workers))
+    {
+      worker = workers->get_next(workers);
+      if (starpu_worker_may_execute_task(worker, task, 0))
+		  nworkers++;
+    }
+  
+  if(workers->init_cursor)
+    workers->deinit_cursor(workers);
+  return nworkers;
+}
+
 /* the generic interface that call the proper underlying implementation */
 int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 {
@@ -291,9 +310,14 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	int workerid = starpu_worker_get_id();
 	unsigned no_workers = 0;
-	unsigned nworkers; 
-       
-	nworkers = sched_ctx->workers->nworkers;
+	unsigned nworkers = 0; 
+
+	/*if there are workers in the ctx that are not able to execute tasks 
+	  we consider the ctx empty */
+	if(!sched_ctx->is_initial_sched)
+	  nworkers = _starpu_nworkers_able_to_execute_task(task, sched_ctx);
+	else
+	  nworkers = sched_ctx->workers->nworkers;
 
 	if(nworkers == 0)
 	{
@@ -302,6 +326,8 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 			PTHREAD_MUTEX_LOCK(&sched_ctx->no_workers_mutex);
 			PTHREAD_COND_WAIT(&sched_ctx->no_workers_cond, &sched_ctx->no_workers_mutex);
 			PTHREAD_MUTEX_UNLOCK(&sched_ctx->no_workers_mutex);
+			nworkers = _starpu_nworkers_able_to_execute_task(task, sched_ctx);
+			if(nworkers == 0) return _starpu_push_task(j, job_is_already_locked);
 		}
 		else
 		{
@@ -420,11 +446,14 @@ struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker)
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 	{
 		sched_ctx = worker->sched_ctx[i];
-		if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->criteria != NULL)
+		if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->criteria != NULL 
+		   && sched_ctx->criteria->idle_time_cb && sched_ctx->criteria->reset_idle_time_cb)
+		{
 			if(!task)
 				sched_ctx->criteria->idle_time_cb(sched_ctx->id, worker->workerid, 1.0);
 			else
 				sched_ctx->criteria->reset_idle_time_cb(sched_ctx->id, worker->workerid);
+		}
 	}
 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR