Pārlūkot izejas kodu

Generate simgrid XML file at the same time as bus perfmodel files. This permits to avoid having to set STARPU_N*, and simply use STARPU_HOSTNAME to select the simulated machine.

Samuel Thibault 12 gadi atpakaļ
vecāks
revīzija
5cf34a093a

+ 2 - 3
configure.ac

@@ -884,9 +884,7 @@ AC_MSG_RESULT($enable_debug)
 
 if test x$enable_debug = xyes; then
 	CFLAGS="$CFLAGS -O0"
-	if test x$enable_simgrid != xyes; then
-		AC_DEFINE(STARPU_SPINLOCK_CHECK, [1], [check spinlock use])
-	fi
+	AC_DEFINE(STARPU_SPINLOCK_CHECK, [1], [check spinlock use])
 else
 	CFLAGS="-O3 $CFLAGS"
 fi
@@ -1053,6 +1051,7 @@ AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 		[how many buffers can be manipulated per task])
 
 if test x$enable_simgrid = xyes ; then
+	# We still need the room for the virtual CUDA/OpenCL devices
 	maxnodes=16
 else
 	# We have one memory node shared by all CPU workers, one node per GPU

+ 10 - 12
doc/chapters/perf-optimization.texi

@@ -520,11 +520,11 @@ times before the model is calibrated.
 @subsection Simulation
 
 Then, recompile StarPU, passing @code{--enable-simgrid} to @code{./configure}, and re-run the
-application, specifying the requested number of devices:
+application:
 
 @smallexample
 $ ./configure --enable-simgrid && make
-$ STARPU_SCHED=dmda STARPU_NCPU=12 STARPU_NCUDA=0 STARPU_NOPENCL=1 ./examples/matvecmult/matvecmult
+$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult
 TEST FAILED !!!
 @end smallexample
 
@@ -535,9 +535,7 @@ If the performance model is not calibrated enough, the following error
 message will be displayed
 
 @smallexample
-$ STARPU_SCHED=dmda STARPU_NCPU=12 STARPU_NCUDA=0 STARPU_NOPENCL=1 ./examples/matvecmult/matvecmult
-[0.000000] [xbt_cfg/INFO] type in variable = 2
-[0.000000] [surf_workstation/INFO] surf_workstation_model_init_ptask_L07
+$ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult
 [starpu][_starpu_load_history_based_model] Warning: model matvecmult
     is not calibrated, forcing calibration for this run. Use the
     STARPU_CALIBRATE environment variable to control this.
@@ -545,16 +543,16 @@ $ STARPU_SCHED=dmda STARPU_NCPU=12 STARPU_NCUDA=0 STARPU_NOPENCL=1 ./examples/ma
     matvecmult does not have a perfmodel, or is not calibrated enough
 @end smallexample
 
-For now, only the number of cpus can be arbitrarily chosen. The number of CUDA
-and OpenCL devices have to be lower than the real number on the current machine.
+The number of devices can be chosen as usual with @code{STARPU_NCPU},
+@code{STARPU_NCUDA}, and @code{STARPU_NOPENCL}.  For now, only the number of
+cpus can be arbitrarily chosen. The number of CUDA and OpenCL devices have to be
+lower than the real number on the current machine.
 
-The Simgrid default stack size is small, to increase it use the
+The Simgrid default stack size is small; to increase it use the
 parameter @code{--cfg=contexts/stack_size}, for example:
 
 @smallexample
-$ STARPU_NCPU=12 STARPU_NCUDA=2 STARPU_NOPENCL=0 ./example --cfg=contexts/stack_size:8192
-[0.000000] [xbt_cfg/INFO] type in variable = 2
-[0.000000] [surf_workstation/INFO] surf_workstation_model_init_ptask_L07
+$ ./example --cfg=contexts/stack_size:8192
 TEST FAILED !!!
 @end smallexample
 
@@ -568,7 +566,7 @@ virtual timestamp in ms.
 The simgrid support even permits to perform simulations on another machine, your
 desktop, typically. To achieve this, one still needs to perform the Calibration
 step on the actual machine to be simulated, then copy them to your desktop
-machine (the @code{$STARPU_HOME/.starpu} directory). One can then performa the
+machine (the @code{$STARPU_HOME/.starpu} directory). One can then perform the
 Simulation step on the desktop machine, by setting the @code{STARPU_HOSTNAME}
 environment variable to the name of the actual machine, to make StarPU use the
 performance models of the simulated machine even on the desktop machine.

+ 212 - 15
src/core/perfmodel/perfmodel_bus.c

@@ -32,6 +32,7 @@
 #include <common/config.h>
 #include <core/workers.h>
 #include <core/perfmodel/perfmodel.h>
+#include <core/simgrid.h>
 #include <common/utils.h>
 
 #ifdef STARPU_USE_OPENCL
@@ -743,7 +744,7 @@ static void load_bus_affinity_file_content(void)
 	FILE *f;
 
 	char path[256];
-	get_affinity_path(path, 256);
+	get_affinity_path(path, sizeof(path));
 
 	_STARPU_DEBUG("loading affinities from %s\n", path);
 
@@ -818,7 +819,7 @@ static void write_bus_affinity_file_content(void)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 	FILE *f;
 	char path[256];
-	get_affinity_path(path, 256);
+	get_affinity_path(path, sizeof(path));
 
 	_STARPU_DEBUG("writing affinities to %s\n", path);
 
@@ -884,7 +885,7 @@ static void load_bus_affinity_file(void)
 	int res;
 
 	char path[256];
-	get_affinity_path(path, 256);
+	get_affinity_path(path, sizeof(path));
 
 	res = access(path, F_OK);
 	if (res)
@@ -963,7 +964,7 @@ static int load_bus_latency_file_content(void)
 	double latency;
 
 	char path[256];
-	get_latency_path(path, 256);
+	get_latency_path(path, sizeof(path));
 
 	_STARPU_DEBUG("loading latencies from %s\n", path);
 
@@ -1060,7 +1061,7 @@ static void write_bus_latency_file_content(void)
 	STARPU_ASSERT(was_benchmarked);
 
 	char path[256];
-	get_latency_path(path, 256);
+	get_latency_path(path, sizeof(path));
 
 	_STARPU_DEBUG("writing latencies to %s\n", path);
 
@@ -1148,7 +1149,7 @@ static void load_bus_latency_file(void)
 	int res;
 
 	char path[256];
-	get_latency_path(path, 256);
+	get_latency_path(path, sizeof(path));
 
 	res = access(path, F_OK);
 	if (res || !load_bus_latency_file_content())
@@ -1176,7 +1177,7 @@ static int load_bus_bandwidth_file_content(void)
 	double bandwidth;
 
 	char path[256];
-	get_bandwidth_path(path, 256);
+	get_bandwidth_path(path, sizeof(path));
 
 	_STARPU_DEBUG("loading bandwidth from %s\n", path);
 
@@ -1273,7 +1274,7 @@ static void write_bus_bandwidth_file_content(void)
 	STARPU_ASSERT(was_benchmarked);
 
 	char path[256];
-	get_bandwidth_path(path, 256);
+	get_bandwidth_path(path, sizeof(path));
 
 	_STARPU_DEBUG("writing bandwidth to %s\n", path);
 
@@ -1451,7 +1452,7 @@ static void load_bus_bandwidth_file(void)
 	int res;
 
 	char path[256];
-	get_bandwidth_path(path, 256);
+	get_bandwidth_path(path, sizeof(path));
 
 	res = access(path, F_OK);
 	if (res || !load_bus_bandwidth_file_content())
@@ -1476,7 +1477,7 @@ static void check_bus_config_file(void)
         char path[256];
         struct _starpu_machine_config *config = _starpu_get_machine_config();
 
-        get_config_path(path, 256);
+        get_config_path(path, sizeof(path));
         res = access(path, F_OK);
 	if (res || config->conf->bus_calibrate > 0)
 	{
@@ -1538,14 +1539,13 @@ static void check_bus_config_file(void)
         }
 }
 
-#ifndef STARPU_SIMGRID
 static void write_bus_config_file_content(void)
 {
 	FILE *f;
 	char path[256];
 
 	STARPU_ASSERT(was_benchmarked);
-        get_config_path(path, 256);
+        get_config_path(path, sizeof(path));
 
 	_STARPU_DEBUG("writing config to %s\n", path);
 
@@ -1559,16 +1559,209 @@ static void write_bus_config_file_content(void)
 
         fclose(f);
 }
-#endif
 
 static void generate_bus_config_file(void)
 {
 	if (!was_benchmarked)
 		benchmark_all_gpu_devices();
 
-#ifndef STARPU_SIMGRID
 	write_bus_config_file_content();
+}
+#endif /* !SIMGRID */
+
+void _starpu_simgrid_get_platform_path(char *path, size_t maxlen)
+{
+	get_bus_path("platform.xml", path, maxlen);
+}
+
+#ifndef STARPU_SIMGRID
+static void write_bus_platform_file_content(void)
+{
+	FILE *f;
+	char path[256];
+	int i, j;
+
+	STARPU_ASSERT(was_benchmarked);
+
+	_starpu_simgrid_get_platform_path(path, sizeof(path));
+
+	_STARPU_DEBUG("writing platform to %s\n", path);
+
+	f = fopen(path, "w+");
+	if (!f)
+	{
+		perror("fopen write_bus_platform_file_content");
+		_STARPU_DISP("path '%s'\n", path);
+		fflush(stderr);
+		STARPU_ABORT();
+	}
+
+	fprintf(f,
+"<?xml version='1.0'?>\n"
+" <!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'>\n"
+" <platform version='3'>\n"
+" <config id='General'>\n"
+"   <prop id='network/TCP_gamma' value='-1'></prop>\n"
+"   <prop id='network/latency_factor' value='1'></prop>\n"
+"   <prop id='network/bandwidth_factor' value='1'></prop>\n"
+" </config>\n"
+" <AS  id='AS0'  routing='Full'>\n"
+"   <host id='MAIN' power='1'/>\n"
+		);
+
+	for (i = 0; i < ncpus; i++)
+		fprintf(f, "   <host id='CPU%d' power='2000000000'/>\n", i);
+
+	for (i = 0; i < ncuda; i++)
+		fprintf(f, "   <host id='CUDA%d' power='2000000000'/>\n", i);
+
+	for (i = 0; i < nopencl; i++)
+		fprintf(f, "   <host id='OpenCL%d' power='2000000000'/>\n", i);
+
+	fprintf(f, "\n   <host id='RAM' power='1'/>\n");
+
+	/* Compute maximum bandwidth, taken as machine bandwidth */
+	double max_bandwidth = 0;
+#ifdef STARPU_USE_CUDA
+	for (i = 0; i < ncuda; i++)
+	{
+		double down_bw = 1.0 / cudadev_timing_dtoh[1+i];
+		double up_bw = 1.0 / cudadev_timing_htod[1+i];
+		if (max_bandwidth < down_bw)
+			max_bandwidth = down_bw;
+		if (max_bandwidth < up_bw)
+			max_bandwidth = up_bw;
+	}
+#endif
+#ifdef STARPU_USE_OPENCL
+	for (i = 0; i < nopencl; i++)
+	{
+		double down_bw = 1.0 / opencldev_timing_dtoh[1+i];
+		double up_bw = 1.0 / opencldev_timing_htod[1+i];
+		if (max_bandwidth < down_bw)
+			max_bandwidth = down_bw;
+		if (max_bandwidth < up_bw)
+			max_bandwidth = up_bw;
+	}
+#endif
+	fprintf(f, "\n   <link id='Share' bandwidth='%f' latency='0.000000'/>\n\n", max_bandwidth*1000000);
+
+	/* Write bandwidths & latencies */
+#ifdef STARPU_USE_CUDA
+	for (i = 0; i < ncuda; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		fprintf(f, "   <link id='RAM-%s' bandwidth='%f' latency='%f'/>\n",
+			i_name,
+			1000000. / cudadev_timing_htod[1+i],
+			cudadev_latency_htod[1+i]/1000000.);
+		fprintf(f, "   <link id='%s-RAM' bandwidth='%f' latency='%f'/>\n",
+			i_name,
+			1000000. / cudadev_timing_dtoh[1+i],
+			cudadev_latency_dtoh[1+i]/1000000.);
+	}
+#ifdef HAVE_CUDA_MEMCPY_PEER
+	for (i = 0; i < ncuda; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		for (j = 0; j < ncuda; j++)
+		{
+			char j_name[16];
+			if (j == i)
+				continue;
+			snprintf(j_name, sizeof(j_name), "CUDA%d", j);
+			fprintf(f, "   <link id='%s-%s' bandwidth='%f' latency='%f'/>\n",
+				i_name, j_name,
+				1000000. / cudadev_timing_dtod[1+i][1+j],
+				cudadev_latency_dtod[1+i][1+j]/1000000.);
+		}
+	}
+#endif
+#endif
+
+#ifdef STARPU_USE_OPENCL
+	for (i = 0; i < nopencl; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "OpenCL%d", i);
+		fprintf(f, "   <link id='RAM-%s' bandwidth='%f' latency='%f'/>\n",
+			i_name,
+			1000000 / opencldev_timing_htod[1+i],
+			opencldev_latency_htod[1+i]/1000000.);
+		fprintf(f, "   <link id='%s-RAM' bandwidth='%f' latency='%f'/>\n",
+			i_name,
+			1000000 / opencldev_timing_dtoh[1+i],
+			opencldev_latency_dtoh[1+i]/1000000.);
+	}
 #endif
+
+	/* Write routes */
+#ifdef STARPU_USE_CUDA
+	for (i = 0; i < ncuda; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		fprintf(f, "   <route src='RAM' dst='%s' symmetrical='NO'><link_ctn id='RAM-%s'/><link_ctn id='Share'/></route>\n", i_name, i_name);
+		fprintf(f, "   <route src='%s' dst='RAM' symmetrical='NO'><link_ctn id='%s-RAM'/><link_ctn id='Share'/></route>\n", i_name, i_name);
+	}
+#ifdef HAVE_CUDA_MEMCPY_PEER
+	for (i = 0; i < ncuda; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		for (j = 0; j < ncuda; j++)
+		{
+			char j_name[16];
+			if (j == i)
+				continue;
+			snprintf(j_name, sizeof(j_name), "CUDA%d", j);
+			fprintf(f, "   <route src='%s' dst='%s' symmetrical='NO'><link_ctn id='%s-%s'/><link_ctn id='Share'/></route>\n", i_name, j_name, i_name, j_name);
+		}
+	}
+#endif
+#endif
+
+#ifdef STARPU_USE_OPENCL
+	for (i = 0; i < nopencl; i++)
+	{
+		char i_name[16];
+		snprintf(i_name, sizeof(i_name), "OpenCL%d", i);
+		fprintf(f, "   <route src='RAM' dst='%s' symmetrical='NO'><link_ctn id='RAM-%s'/><link_ctn id='Share'/></route>\n", i_name, i_name);
+		fprintf(f, "   <route src='%s' dst='RAM' symmetrical='NO'><link_ctn id='%s-RAM'/><link_ctn id='Share'/></route>\n", i_name, i_name);
+	}
+#endif
+
+	fprintf(f,
+" </AS>\n"
+" </platform>\n"
+		);
+
+	fclose(f);
+}
+
+static void generate_bus_platform_file(void)
+{
+	if (!was_benchmarked)
+		benchmark_all_gpu_devices();
+
+	write_bus_platform_file_content();
+}
+
+static void check_bus_platform_file(void)
+{
+	int res;
+
+	char path[256];
+	_starpu_simgrid_get_platform_path(path, sizeof(path));
+
+	res = access(path, F_OK);
+	if (res)
+	{
+		/* File does not exist yet */
+		generate_bus_platform_file();
+	}
 }
 
 /*
@@ -1583,7 +1776,8 @@ static void starpu_force_bus_sampling(void)
 	generate_bus_affinity_file();
 	generate_bus_latency_file();
 	generate_bus_bandwidth_file();
-        generate_bus_config_file();
+	generate_bus_config_file();
+	generate_bus_platform_file();
 }
 #endif /* !SIMGRID */
 
@@ -1604,6 +1798,9 @@ void _starpu_load_bus_performance_files(void)
 #endif
 	load_bus_latency_file();
 	load_bus_bandwidth_file();
+#ifndef STARPU_SIMGRID
+	check_bus_platform_file();
+#endif
 }
 
 /* (in MB/s) */

+ 13 - 127
src/core/simgrid.c

@@ -19,99 +19,7 @@
 #include <unistd.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/workers.h>
-
-static struct starpu_conf conf;
-
-static void bus_name(char *s, size_t size, int num)
-{
-	if (!num)
-		snprintf(s, size, "RAM");
-	else if (num < conf.ncuda + 1)
-		snprintf(s, size, "CUDA%d", num - 1);
-	else
-		snprintf(s, size, "OpenCL%d", num - conf.ncuda - 1);
-}
-
-void starpu_simgrid_write_platform(struct starpu_conf *conf, FILE *file)
-{
-	int i, j;
-
-	fprintf(file,
-"<?xml version='1.0'?>\n"
-" <!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'>\n"
-" <platform version='3'>\n"
-" <config id='General'>\n"
-"   <prop id='network/TCP_gamma' value='-1'></prop>\n"
-"   <prop id='network/latency_factor' value='1'></prop>\n"
-"   <prop id='network/bandwidth_factor' value='1'></prop>\n"
-" </config>\n"
-" <AS  id='AS0'  routing='Full'>\n"
-"   <host id='MAIN' power='1'/>\n"
-		);
-
-	for (i = 0; i < conf->ncpus; i++)
-		fprintf(file, "   <host id='CPU%d' power='2000000000'/>\n", i);
-
-	for (i = 0; i < conf->ncuda; i++)
-		fprintf(file, "   <host id='CUDA%d' power='2000000000'/>\n", i);
-
-	for (i = 0; i < conf->nopencl; i++)
-		fprintf(file, "   <host id='OpenCL%d' power='2000000000'/>\n", i);
-
-	fprintf(file, "\n   <host id='RAM' power='1'/>\n");
-
-	/* Compute maximum bandwidth, taken as machine bandwidth */
-	double max_bandwidth = 0;
-	for (i = 1; i < conf->ncuda + conf->nopencl + 1; i++)
-	{
-		if (max_bandwidth < _starpu_transfer_bandwidth(0, i))
-			max_bandwidth = _starpu_transfer_bandwidth(0, i);
-		if (max_bandwidth < _starpu_transfer_bandwidth(i, 0))
-			max_bandwidth = _starpu_transfer_bandwidth(i, 0);
-	}
-	fprintf(file, "\n   <link id='Share' bandwidth='%f' latency='0.000000'/>\n\n", max_bandwidth*1000000);
-
-	for (i = 0; i < conf->ncuda + conf->nopencl + 1; i++)
-	{
-		char i_name[16];
-		bus_name(i_name, sizeof(i_name), i);
-
-		for (j = 0; j < conf->ncuda + conf->nopencl + 1; j++)
-		{
-			char j_name[16];
-			if (j == i)
-				continue;
-			bus_name(j_name, sizeof(j_name), j);
-			fprintf(file, "   <link id='%s-%s' bandwidth='%f' latency='%f'/>\n",
-				i_name, j_name,
-				_starpu_transfer_bandwidth(i, j) * 1000000,
-				_starpu_transfer_latency(i, j) / 1000000);
-		}
-	}
-
-	for (i = 0; i < conf->ncuda + conf->nopencl + 1; i++)
-	{
-		char i_name[16];
-		bus_name(i_name, sizeof(i_name), i);
-
-		for (j = 0; j < conf->ncuda + conf->nopencl + 1; j++)
-		{
-			char j_name[16];
-			if (j == i)
-				continue;
-			bus_name(j_name, sizeof(j_name), j);
-			fprintf(file,
-"   <route src='%s' dst='%s' symmetrical='NO'><link_ctn id='%s-%s'/><link_ctn id='Share'/></route>\n",
-				i_name, j_name, i_name, j_name);
-		}
-	}
-
-	fprintf(file, 
-" </AS>\n"
-" </platform>\n"
-		);
-	fclose(file);
-}
+#include <core/simgrid.h>
 
 #ifdef STARPU_SIMGRID
 #include <msg/msg.h>
@@ -159,9 +67,7 @@ int main(int argc, char **argv)
 {
 	xbt_dynar_t hosts;
 	int i;
-	char name[] = "/tmp/starpu-simgrid-platform.xml.XXXXXX";
-	int fd;
-	FILE *file;
+	char path[256];
 
 	if (!starpu_main)
 	{
@@ -175,29 +81,9 @@ int main(int argc, char **argv)
 	MSG_config("workstation/model", "ptask_L07");
 #endif
 
-	/* Create platform file */
-	starpu_conf_init(&conf);
-	if ((!getenv("STARPU_NCPUS") && !getenv("STARPU_NCPU"))
-	  || !getenv("STARPU_NCUDA")
-	  || !getenv("STARPU_NOPENCL"))
-	{
-		_STARPU_ERROR("Please specify the number of cpus and gpus by setting the environment variables STARPU_NCPU, STARPU_NCUDA, STARPU_NOPENCL\n");
-		exit(EXIT_FAILURE);
-	}
-	_starpu_conf_check_environment(&conf);
-
-	_starpu_load_bus_performance_files();
-
-	/* TODO: make the user to provide his own xml */
-	/* And remove the hack in _starpu_cpu_discover_devices */
-	fd = mkstemp(name);
-	file = fdopen(fd, "w");
-	starpu_simgrid_write_platform(&conf, file);
-	close(fd);
-
-	/* and load it */
-	MSG_create_environment(name);
-	unlink(name);
+	/* Load XML platform */
+	_starpu_simgrid_get_platform_path(path, sizeof(path));
+	MSG_create_environment(path);
 
 	hosts = MSG_hosts_as_dynar();
 	int nb = xbt_dynar_length(hosts);
@@ -266,15 +152,15 @@ static int transfers_are_sequential(struct transfer *new_transfer, struct transf
 	int new_is_opencl STARPU_ATTRIBUTE_UNUSED, old_is_opencl STARPU_ATTRIBUTE_UNUSED;
 	int new_is_gpu_gpu, old_is_gpu_gpu;
 
-	new_is_cuda  = new_transfer->src_node >= 1 && new_transfer->src_node <= conf.ncuda;
-	new_is_cuda |= new_transfer->dst_node >= 1 && new_transfer->dst_node <= conf.ncuda;
-	old_is_cuda  = old_transfer->src_node >= 1 && old_transfer->src_node <= conf.ncuda;
-	old_is_cuda |= old_transfer->dst_node >= 1 && old_transfer->dst_node <= conf.ncuda;
+	new_is_cuda  = starpu_node_get_kind(new_transfer->src_node) == STARPU_CUDA_RAM;
+	new_is_cuda |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_CUDA_RAM;
+	old_is_cuda  = starpu_node_get_kind(old_transfer->src_node) == STARPU_CUDA_RAM;
+	old_is_cuda |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_CUDA_RAM;
 
-	new_is_opencl  = new_transfer->src_node > conf.ncuda && new_transfer->src_node <= conf.ncuda + conf.nopencl;
-	new_is_opencl |= new_transfer->dst_node > conf.ncuda && new_transfer->dst_node <= conf.ncuda + conf.nopencl;
-	old_is_opencl  = old_transfer->src_node > conf.ncuda && old_transfer->src_node <= conf.ncuda + conf.nopencl;
-	old_is_opencl |= old_transfer->dst_node > conf.ncuda && old_transfer->dst_node <= conf.ncuda + conf.nopencl;
+	new_is_opencl  = starpu_node_get_kind(new_transfer->src_node) == STARPU_OPENCL_RAM;
+	new_is_opencl |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_OPENCL_RAM;
+	old_is_opencl  = starpu_node_get_kind(old_transfer->src_node) == STARPU_OPENCL_RAM;
+	old_is_opencl |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_OPENCL_RAM;
 
 	new_is_gpu_gpu = new_transfer->src_node && new_transfer->dst_node;
 	old_is_gpu_gpu = old_transfer->src_node && old_transfer->dst_node;

+ 1 - 0
src/core/simgrid.h

@@ -24,6 +24,7 @@ void _starpu_simgrid_execute_job(struct _starpu_job *job, enum starpu_perf_archt
 int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req);
 /* Return the number of hosts prefixed by PREFIX */
 int _starpu_simgrid_get_nbhosts(const char *prefix);
+void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 #endif
 
 #endif // __SIMGRID_H__

+ 3 - 1
src/core/workers.c

@@ -590,11 +590,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
 				cuda++;
 				break;
-#ifdef STARPU_USE_OPENCL
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
 			case STARPU_OPENCL_WORKER:
+#ifndef STARPU_SIMGRID
 				starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
 				if (!_starpu_may_launch_driver(config->conf, &driver))
 					break;
+#endif
 				_STARPU_DEBUG("waiting for worker %u initialization\n", worker);
 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 				while (!workerarg->worker_is_initialized)

+ 0 - 5
src/drivers/cpu/driver_cpu.c

@@ -48,12 +48,7 @@
 void
 _starpu_cpu_discover_devices(struct _starpu_machine_config *config)
 {
-#if 0
 	config->topology.nhwcpus = _starpu_simgrid_get_nbhosts("CPU");
-#else
-	/* For now, lie about the number of CPUs we actually have */
-	config->topology.nhwcpus = STARPU_NMAXWORKERS;
-#endif
 }
 #elif defined(STARPU_HAVE_HWLOC)
 void

+ 4 - 5
src/drivers/opencl/driver_opencl.c

@@ -367,7 +367,10 @@ void _starpu_opencl_init(void)
         if (!init_done)
 	{
 #ifdef STARPU_SIMGRID
-		nb_devices = _starpu_simgrid_get_nbhosts("OpenCL");
+		unsigned ncuda = _starpu_simgrid_get_nbhosts("CUDA");
+		unsigned nopencl = _starpu_simgrid_get_nbhosts("OpenCL");
+		nb_devices = nopencl - ncuda;
+		STARPU_ASSERT_MSG((nopencl == ncuda) || !ncuda, "Does not yet support selectively disabling OpenCL devices of NVIDIA cards.");
 #else /* STARPU_USE_OPENCL */
                 cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
                 cl_uint nb_platforms;
@@ -678,15 +681,11 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
 
 unsigned _starpu_opencl_get_device_count(void)
 {
-#ifdef STARPU_USE_OPENCL
         if (!init_done)
 	{
                 _starpu_opencl_init();
         }
 	return nb_devices;
-#else
-	return _starpu_simgrid_get_nbhosts("OpenCL");
-#endif
 }
 
 #ifdef STARPU_USE_OPENCL