12 years ago · 8089b81d73
--- a/sched_ctx_hypervisor/examples/Makefile.am
+++ b/sched_ctx_hypervisor/examples/Makefile.am
@@ -20,8 +20,9 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_GLPK_LDFLA
 
				 
			
 
				 if !NO_BLAS_LIB
			
 
				 noinst_PROGRAMS =				\
			
 
				-	cholesky/cholesky_implicit  \
			
 
				-	app_driven_test/app_driven_test
			
 
				+	cholesky/cholesky_implicit  		\
			
 
				+	app_driven_test/app_driven_test		\
			
 
				+	lp_test/lp_test
			
 
				 
			
 
				 noinst_HEADERS = 				\
			
 
				 	cholesky/cholesky.h			\
			
--- a/sched_ctx_hypervisor/examples/app_driven_test/app_driven_test.c
+++ b/sched_ctx_hypervisor/examples/app_driven_test/app_driven_test.c
@@ -19,44 +19,56 @@
 
				 #include <starpu.h>
			
 
				 #include <sched_ctx_hypervisor.h>
			
 
				 
			
 
				+#define NTASKS 1000
			
 
				+#define NINCR 10
			
 
				 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
			
 
				 
			
 
				-/* Every implementation of a codelet must have this prototype, the first                                                                                                                                             * argument (buffers) describes the buffers/streams that are managed by the
			
 
				- * DSM; the second arguments references read-only data that is passed as an
			
 
				- * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
			
 
				- * are no data input/output managed by the DSM (cl.nbuffers = 0) */
			
 
				 struct params
			
 
				 {
			
 
				 	unsigned sched_ctx;
			
 
				-    int task_tag;
			
 
				+	int task_tag;
			
 
				 };
			
 
				 
			
 
				+unsigned val[2];
			
 
				+pthread_mutex_t mut[2];
			
 
				+
			
 
				+/* Every implementation of a codelet must have this prototype, the first                                                                                                                                             * argument (buffers) describes the buffers/streams that are managed by the
			
 
				+ * DSM; the second arguments references read-only data that is passed as an
			
 
				+ * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
			
 
				+ * are no data input/output managed by the DSM (cl.nbuffers = 0) */
			
 
				+
			
 
				 void cpu_func(void *buffers[], void *cl_arg)
			
 
				 {
			
 
				 	struct params *params = (struct params *) cl_arg;
			
 
				 
			
 
				 	int i;
			
 
				-	for(i = 0; i < 1000; i++);
			
 
				-	FPRINTF(stdout, "Hello world sched_ctx = %d task_tag = %d \n", params->sched_ctx, params->task_tag);
			
 
				+	for(i = 0; i < NINCR; i++)
			
 
				+	{
			
 
				+		pthread_mutex_lock(&mut[params->sched_ctx - 1]);
			
 
				+		val[params->sched_ctx - 1]++;
			
 
				+		pthread_mutex_unlock(&mut[params->sched_ctx - 1]);
			
 
				+	}
			
 
				+	if(params->task_tag != 0)
			
 
				+		FPRINTF(stdout, "Task with tag %d executed in ctx = %d %d counter_tests\n", params->task_tag, params->sched_ctx, val[params->sched_ctx - 1]);
			
 
				 }
			
 
				 
			
 
				-struct starpu_codelet cl = {};
			
 
				+struct starpu_codelet cl = {0};
			
 
				 
			
 
				+/* the management of the tags is done by the user */
			
 
				+/* who will take care that the tags will be unique */
			
 
				 int tag = 1;
			
 
				-void* start_thread(void *arg)
			
 
				+void* submit_tasks_thread(void *arg)
			
 
				 {
			
 
				 	unsigned sched_ctx = *((unsigned*)arg);
			
 
				 	starpu_sched_ctx_set_context(&sched_ctx);
			
 
				 
			
 
				-	struct starpu_task *task[10];
			
 
				-	struct params params[10];
			
 
				+	struct starpu_task *task[NTASKS];
			
 
				+	struct params params[NTASKS];
			
 
				 	int i;
			
 
				-	for(i = 0; i < 10; i++)
			
 
				+	for(i = 0; i < NTASKS; i++)
			
 
				 	{
			
 
				-		int j;
			
 
				-		for(j = 0; j < 1000; j++);
			
 
				 		task[i] = starpu_task_create();
			
 
				-
			
 
				+//		usleep(5000);
			
 
				 		cl.cpu_funcs[0] = cpu_func;
			
 
				 		cl.nbuffers = 0;
			
 
				 
			
@@ -64,13 +76,18 @@ void* start_thread(void *arg)
 
				 
			
 
				 		if(sched_ctx == 1 && i == 5)
			
 
				 		{
			
 
				+			/* tag the tasks whose execution will start the resizing process */
			
 
				 			task[i]->hypervisor_tag = tag;
			
 
				+			/* indicate particular settings the context should have when the 
			
 
				+			   resizing will be done */
			
 
				 			sched_ctx_hypervisor_ioctl(sched_ctx,
			
 
				 						   HYPERVISOR_TIME_TO_APPLY, tag,
			
 
				 						   HYPERVISOR_MIN_WORKERS, 2,
			
 
				 						   HYPERVISOR_MAX_WORKERS, 12,
			
 
				 						   HYPERVISOR_NULL);
			
 
				 			printf("require resize for sched_ctx %d at tag %d\n", sched_ctx, tag);
			
 
				+			/* specify that the contexts should be resized when the task having this
			
 
				+			   particular tag will finish executing */
			
 
				 			sched_ctx_hypervisor_resize(sched_ctx, tag);
			
 
				 		}
			
 
				 
			
@@ -84,6 +101,7 @@ void* start_thread(void *arg)
 
				 	}
			
 
				 
			
 
				 	starpu_task_wait_for_all();
			
 
				+	return;
			
 
				 }
			
 
				 
			
 
				 int main()
			
@@ -104,27 +122,49 @@ int main()
 
				 	for(i = 0; i < nres2; i++)
			
 
				 		ressources2[i] = nres1+i;
			
 
				 
			
 
				-	unsigned sched_ctx1 = starpu_sched_ctx_create("heft", ressources1, nres1, "sched_ctx1");
			
 
				-	unsigned sched_ctx2 = starpu_sched_ctx_create("heft", ressources2, nres2, "sched_ctx2");
			
 
				+	/* create contexts */
			
 
				+	unsigned sched_ctx1 = starpu_sched_ctx_create("dmda", ressources1, nres1, "sched_ctx1");
			
 
				+	unsigned sched_ctx2 = starpu_sched_ctx_create("dmda", ressources2, nres2, "sched_ctx2");
			
 
				 
			
 
				+	/* initialize the hypervisor */
			
 
				 	struct sched_ctx_hypervisor_policy policy;
			
 
				 	policy.custom = 0;
			
 
				+	/* indicate which strategy to use
			
 
				+	   in this particular case we use app_driven which allows the user to resize 
			
 
				+	   the ctxs dynamically at particular moments of the execution of the application */
			
 
				 	policy.name = "app_driven";
			
 
				 	void *perf_counters = sched_ctx_hypervisor_init(&policy);
			
 
				 
			
 
				+	/* let starpu know which performance counters should use 
			
 
				+	   to inform the hypervisor how the application and the resources are executing */
			
 
				 	starpu_sched_ctx_set_perf_counters(sched_ctx1, (struct starpu_sched_ctx_performance_counters*)perf_counters);
			
 
				 	starpu_sched_ctx_set_perf_counters(sched_ctx2, (struct starpu_sched_ctx_performance_counters*)perf_counters);
			
 
				+
			
 
				+	/* register the contexts that should be managed by the hypervisor
			
 
				+	   and indicate an approximate amount of workload if known;
			
 
				+	   in this case we don't know it and we put 0 */
			
 
				 	sched_ctx_hypervisor_register_ctx(sched_ctx1, 0.0);
			
 
				 	sched_ctx_hypervisor_register_ctx(sched_ctx2, 0.0);
			
 
				 
			
 
				 	starpu_pthread_t tid[2];
			
 
				 
			
 
				-	starpu_pthread_create(&tid[0], NULL, start_thread, (void*)&sched_ctx1);
			
 
				-	starpu_pthread_create(&tid[1], NULL, start_thread, (void*)&sched_ctx2);
			
 
				+	val[0] = 0;
			
 
				+	val[1] = 0;
			
 
				+	pthread_mutex_init(&mut[0], NULL);
			
 
				+	pthread_mutex_init(&mut[1], NULL);
			
 
				+
			
 
				+	/* we create two threads to simulate simultaneous submission of tasks */
			
 
				+	starpu_pthread_create(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1);
			
 
				+	starpu_pthread_create(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2);
			
 
				 
			
 
				 	starpu_pthread_join(tid[0], NULL);
			
 
				 	starpu_pthread_join(tid[1], NULL);
			
 
				 
			
 
				+	/* free starpu and hypervisor data */
			
 
				 	starpu_shutdown();
			
 
				 	sched_ctx_hypervisor_shutdown();
			
 
				+
			
 
				+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR);
			
 
				+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR);
			
 
				+	return 0;
			
 
				 }
			
--- a/sched_ctx_hypervisor/examples/lp_test/lp_test.c
+++ b/sched_ctx_hypervisor/examples/lp_test/lp_test.c
@@ -0,0 +1,134 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2012  INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdint.h>
			
 
				+#include <starpu.h>
			
 
				+#include <sched_ctx_hypervisor.h>
			
 
				+
			
 
				+#define NTASKS 1000
			
 
				+#define NINCR 10
			
 
				+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
			
 
				+
			
 
				+
			
 
				+unsigned val[2];
			
 
				+pthread_mutex_t mut[2];
			
 
				+
			
 
				+/* Every implementation of a codelet must have this prototype, the first                                                                                                                                             * argument (buffers) describes the buffers/streams that are managed by the
			
 
				+ * DSM; the second arguments references read-only data that is passed as an
			
 
				+ * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
			
 
				+ * are no data input/output managed by the DSM (cl.nbuffers = 0) */
			
 
				+
			
 
				+void cpu_func(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	unsigned sched_ctx = *((unsigned *) cl_arg);
			
 
				+
			
 
				+	int i;
			
 
				+	for(i = 0; i < NINCR; i++)
			
 
				+	{
			
 
				+		pthread_mutex_lock(&mut[sched_ctx - 1]);
			
 
				+		val[sched_ctx - 1]++;
			
 
				+		pthread_mutex_unlock(&mut[sched_ctx - 1]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet cl = {0};
			
 
				+
			
 
				+void* submit_tasks_thread(void *arg)
			
 
				+{
			
 
				+	unsigned sched_ctx = *((unsigned*)arg);
			
 
				+	starpu_sched_ctx_set_context(&sched_ctx);
			
 
				+
			
 
				+	struct starpu_task *task[NTASKS];
			
 
				+	int i;
			
 
				+	for(i = 0; i < NTASKS; i++)
			
 
				+	{
			
 
				+		task[i] = starpu_task_create();
			
 
				+		cl.cpu_funcs[0] = cpu_func;
			
 
				+		cl.nbuffers = 0;
			
 
				+
			
 
				+		task[i]->cl = &cl;
			
 
				+
			
 
				+		task[i]->cl_arg = &sched_ctx;
			
 
				+		task[i]->cl_arg_size = sizeof(unsigned);
			
 
				+
			
 
				+		task[i]->flops = NINCR*1000000000.0;
			
 
				+		starpu_task_submit(task[i]);
			
 
				+	}
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+	int ret = starpu_init(NULL);
			
 
				+
			
 
				+	if (ret == -ENODEV)
			
 
				+        return 77;
			
 
				+
			
 
				+
			
 
				+	/* create contexts */
			
 
				+	unsigned sched_ctx1 = starpu_sched_ctx_create("dmda", NULL, 0, "sched_ctx1");
			
 
				+	unsigned sched_ctx2 = starpu_sched_ctx_create("dmda", NULL, 0, "sched_ctx2");
			
 
				+
			
 
				+	/* initialize the hypervisor */
			
 
				+	struct sched_ctx_hypervisor_policy policy;
			
 
				+	policy.custom = 0;
			
 
				+	/* indicate which strategy to use
			
 
				+	   in this particular case we use app_driven which allows the user to resize 
			
 
				+	   the ctxs dynamically at particular moments of the execution of the application */
			
 
				+	policy.name = "lp";
			
 
				+	void *perf_counters = sched_ctx_hypervisor_init(&policy);
			
 
				+
			
 
				+	/* let starpu know which performance counters should use 
			
 
				+	   to inform the hypervisor how the application and the resources are executing */
			
 
				+	starpu_sched_ctx_set_perf_counters(sched_ctx1, (struct starpu_sched_ctx_performance_counters*)perf_counters);
			
 
				+	starpu_sched_ctx_set_perf_counters(sched_ctx2, (struct starpu_sched_ctx_performance_counters*)perf_counters);
			
 
				+
			
 
				+	double flops1 = NTASKS*NINCR*1000000000.0;
			
 
				+	double flops2 = NTASKS*NINCR*1000000000.0;
			
 
				+	/* register the contexts that should be managed by the hypervisor
			
 
				+	   and indicate an approximate amount of workload if known;
			
 
				+	   in this case we don't know it and we put 0 */
			
 
				+	sched_ctx_hypervisor_register_ctx(sched_ctx1, flops1);
			
 
				+	sched_ctx_hypervisor_register_ctx(sched_ctx2, flops2);
			
 
				+        /* lp strategy allows sizing the contexts because we know the total number of flops
			
 
				+	   to be executed */
			
 
				+	sched_ctx_hypervisor_size_ctxs(NULL, -1, NULL, -1);
			
 
				+
			
 
				+	starpu_pthread_t tid[2];
			
 
				+
			
 
				+	val[0] = 0;
			
 
				+	val[1] = 0;
			
 
				+	pthread_mutex_init(&mut[0], NULL);
			
 
				+	pthread_mutex_init(&mut[1], NULL);
			
 
				+
			
 
				+	/* we create two threads to simulate simultaneous submission of tasks */
			
 
				+	starpu_pthread_create(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1);
			
 
				+	starpu_pthread_create(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2);
			
 
				+
			
 
				+	starpu_pthread_join(tid[0], NULL);
			
 
				+	starpu_pthread_join(tid[1], NULL);
			
 
				+
			
 
				+	/* free starpu and hypervisor data */
			
 
				+	starpu_shutdown();
			
 
				+	sched_ctx_hypervisor_shutdown();
			
 
				+
			
 
				+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR);
			
 
				+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR);
			
 
				+	return 0;
			
 
				+}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -30,15 +30,20 @@ static void lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_t
 
				 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 		if(ret != EBUSY)
			
 
				 		{
			
 
				-			int total_nw[2];
			
 
				-			_get_total_nw(NULL, -1, 2, total_nw);
			
 
				+			int nw = 1;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+			nw = ncuda != 0 ? 2 : 1;
			
 
				+#endif
			
 
				+			int total_nw[nw];
			
 
				+			_get_total_nw(NULL, -1, nw, total_nw);
			
 
				 
			
 
				 
			
 
				 			struct timeval start_time;
			
 
				 			struct timeval end_time;
			
 
				 			gettimeofday(&start_time, NULL);
			
 
				 
			
 
				-			double vmax = _lp_get_nworkers_per_ctx(nsched_ctxs, 2, nworkers, total_nw);
			
 
				+			double vmax = _lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
			
 
				 			gettimeofday(&end_time, NULL);
			
 
				 
			
 
				 			long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
@@ -48,9 +53,9 @@ static void lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_t
 
				 
			
 
				 			if(vmax != 0.0)
			
 
				 			{
			
 
				-				int nworkers_rounded[nsched_ctxs][2];
			
 
				-				_lp_round_double_to_int(nsched_ctxs, 2, nworkers, nworkers_rounded);
			
 
				-				_lp_redistribute_resources_in_ctxs(nsched_ctxs, 2, nworkers_rounded, nworkers);
			
 
				+				int nworkers_rounded[nsched_ctxs][nw];
			
 
				+				_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
			
 
				+				_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
			
 
				 			}
			
 
				 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 		}
			
@@ -59,27 +64,40 @@ static void lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_t
 
				 static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
			
 
				 {
			
 
				 	int nsched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_nsched_ctxs() : ns;
			
 
				-	double nworkers_per_type[nsched_ctxs][2];
			
 
				-	int total_nw[2];
			
 
				-	_get_total_nw(workers, nworkers, 2, total_nw);
			
 
				+	int nw = 1;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+	nw = ncuda != 0 ? 2 : 1;
			
 
				+#endif
			
 
				+	double nworkers_per_type[nsched_ctxs][nw];
			
 
				+	int total_nw[nw];
			
 
				+	_get_total_nw(workers, nworkers, nw, total_nw);
			
 
				 
			
 
				 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				-	double vmax = _lp_get_nworkers_per_ctx(nsched_ctxs, 2, nworkers_per_type, total_nw);
			
 
				+	double vmax = _lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers_per_type, total_nw);
			
 
				 	if(vmax != 0.0)
			
 
				 	{
			
 
				-/*  		printf("********size\n"); */
			
 
				+// 		printf("********size\n");
			
 
				 /* 		int i; */
			
 
				 /* 		for( i = 0; i < nsched_ctxs; i++) */
			
 
				 /* 		{ */
			
 
				 /* 			printf("ctx %d/worker type %d: n = %lf \n", i, 0, nworkers_per_type[i][0]); */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]); */
			
 
				+/* #ifdef STARPU_USE_CUDA */
			
 
				+/* 			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); */
			
 
				+/* 			if(ncuda != 0) */
			
 
				+/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]); */
			
 
				+/* #endif */
			
 
				 /* 		} */
			
 
				-		int nworkers_per_type_rounded[nsched_ctxs][2];
			
 
				-		_lp_round_double_to_int(nsched_ctxs, 2, nworkers_per_type, nworkers_per_type_rounded);
			
 
				-/*       		for( i = 0; i < nsched_ctxs; i++) */
			
 
				+		int nworkers_per_type_rounded[nsched_ctxs][nw];
			
 
				+		_lp_round_double_to_int(nsched_ctxs, nw, nworkers_per_type, nworkers_per_type_rounded);
			
 
				+/*       	for( i = 0; i < nsched_ctxs; i++) */
			
 
				 /* 		{ */
			
 
				 /* 			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]); */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]); */
			
 
				+/* #ifdef STARPU_USE_CUDA */
			
 
				+/* 			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); */
			
 
				+/* 			if(ncuda != 0) */
			
 
				+/* 				printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]); */
			
 
				+/* #endif */
			
 
				 /* 		} */
			
 
				 		int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : 
			
 
				 			sched_ctxs;
			
@@ -97,9 +115,9 @@ static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
 
				 			}
			
 
				 		}
			
 
				 		if(has_workers)
			
 
				-			_lp_redistribute_resources_in_ctxs(nsched_ctxs, 2, nworkers_per_type_rounded, nworkers_per_type);
			
 
				+			_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_per_type_rounded, nworkers_per_type);
			
 
				 		else
			
 
				-			_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, 2, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
			
 
				+			_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, nw, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
			
 
				 	}
			
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -232,17 +232,26 @@ double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double r
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-		v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				-		v[i][1] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				-
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+		if(ncuda != 0)
			
 
				+		{
			
 
				+			v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+			v[i][1] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+		}
			
 
				+		else
			
 
				+			v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+#else
			
 
				+		v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+#endif // STARPU_USE_CUDA
			
 
				 		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				 //		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				 	}
			
 
				 
			
 
				 	return 1/_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				-#else
			
 
				+#else//STARPU_HAVE_GLPK_H
			
 
				 	return 0.0;
			
 
				-#endif
			
 
				+#endif//STARPU_HAVE_GLPK_H
			
 
				 }
			
 
				 
			
 
				 double _lp_get_tmax(int nw, int *workers)
			
@@ -561,9 +570,19 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				 			enum starpu_archtype arch;
			
 
				-			if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				-			if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				 
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+			if(ncuda != 0)
			
 
				+			{
			
 
				+				if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				+				if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				+			}
			
 
				+			else
			
 
				+				if(w == 0) arch = STARPU_CPU_WORKER;
			
 
				+#else
			
 
				+			if(w == 0) arch = STARPU_CPU_WORKER;
			
 
				+#endif //STARPU_USE_CUDA
			
 
				 			if(w == 1)
			
 
				 			{
			
 
				 				int nworkers_to_add = res_rounded[s][w];
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -578,8 +578,13 @@ void _get_total_nw(int *workers, int nworkers, int ntypes_of_workers, int total_
 
				 	{
			
 
				  		enum starpu_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
			
 
				 			starpu_worker_get_type(workers[w]);
			
 
				-		if(arch == STARPU_CPU_WORKER)
			
 
				-			total_nw[1]++;
			
 
				+		if(ntypes_of_workers == 2)
			
 
				+		{
			
 
				+			if(arch == STARPU_CPU_WORKER)
			
 
				+				total_nw[1]++;
			
 
				+			else
			
 
				+				total_nw[0]++;
			
 
				+		}
			
 
				 		else
			
 
				 			total_nw[0]++;
			
 
				 	}