瀏覽代碼

Add initial simgrid support for MPI, not working yet

Samuel Thibault 11 年之前
父節點
當前提交
7a82eb9b6b
共有 11 個文件被更改,包括 197 次插入31 次删除
  1. 1 0
      configure.ac
  2. 3 1
      include/starpu.h
  3. 1 1
      include/starpu_thread.h
  4. 6 2
      src/Makefile.am
  5. 5 4
      src/common/thread.c
  6. 5 2
      src/core/sched_ctx.c
  7. 114 14
      src/core/simgrid.c
  8. 2 0
      src/core/simgrid.h
  9. 7 6
      src/core/topology.c
  10. 6 1
      src/core/workers.c
  11. 47 0
      src/starpu_smpi.xslt

+ 1 - 0
configure.ac

@@ -994,6 +994,7 @@ if test x$enable_simgrid = xyes ; then
 			AC_MSG_ERROR(Simgrid support needs simgrid installed)
 		]
 	)
+   	AC_CHECK_FUNCS([MSG_process_join])
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 		    		[[#include <msg/msg.h>]],
 				[[msg_host_t foo; ]]

+ 3 - 1
include/starpu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux 1
+ * Copyright (C) 2009-2014  Université de Bordeaux 1
  * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -73,8 +73,10 @@ extern "C"
 #endif
 
 #ifdef STARPU_SIMGRID
+#ifndef main
 #define main starpu_main
 #endif
+#endif
 
 struct starpu_conf
 {

+ 1 - 1
include/starpu_thread.h

@@ -39,7 +39,7 @@ extern "C"
 
 #ifdef STARPU_SIMGRID
 
-typedef int starpu_pthread_t;
+typedef msg_process_t starpu_pthread_t;
 typedef int starpu_pthread_attr_t;
 
 int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, int where);

+ 6 - 2
src/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2013  Université de Bordeaux 1
+# Copyright (C) 2009-2014  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 # Copyright (C) 2011  INRIA
 #
@@ -49,7 +49,7 @@ endif STARPU_HAVE_WINDOWS
 
 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ $(STARPU_RCCE_CPPFLAGS) -DBUILDING_STARPU
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ $(STARPU_RCCE_CPPFLAGS) -DBUILDING_STARPU -DSTARPU_DATADIR='"$(datadir)"'
 
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(FXT_CFLAGS)
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS) $(STARPU_RCCE_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS) $(STARPU_LEVELDB_LDFLAGS)
@@ -310,5 +310,9 @@ endif
 
 #########################################
 
+if STARPU_SIMGRID
+dist_pkgdata_DATA = starpu_smpi.xslt
+endif
+
 showcheck:
 	-cat /dev/null

+ 5 - 4
src/common/thread.c

@@ -34,7 +34,7 @@ int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_
 	_args->f = start_routine;
 	_args->arg = arg;
 	_hosts = MSG_hosts_as_dynar();
-	MSG_process_create(name, _starpu_simgrid_thread_start, _args,
+	*thread = MSG_process_create(name, _starpu_simgrid_thread_start, _args,
 			   xbt_dynar_get_as(_hosts, (where), msg_host_t));
 	xbt_dynar_free(&_hosts);
 	return 0;
@@ -47,10 +47,11 @@ int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t
 
 int starpu_pthread_join(starpu_pthread_t thread, void **retval)
 {
-#ifdef STARPU_DEVEL
-#warning TODO: use a simgrid_join when it becomes available
-#endif
+#if 0 //def HAVE_MSG_PROCESS_JOIN
+	MSG_process_join(thread, 100);
+#else
 	MSG_process_sleep(1);
+#endif
 	return 0;
 }
 

+ 5 - 2
src/core/sched_ctx.c

@@ -1511,13 +1511,15 @@ static void _starpu_sched_ctx_bind_thread_to_ctx_cpus(unsigned sched_ctx_id)
 	return;
 }
 
-void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid)
+void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid STARPU_ATTRIBUTE_UNUSED)
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 
+	/* FIXME: why not factorize with _starpu_bind_thread_on_cpu? */
+
 #ifdef STARPU_SIMGRID
 	return;
-#endif
+#else
 	if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
 		return;
 
@@ -1565,6 +1567,7 @@ void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid)
 #else
 #warning no CPU binding support
 #endif
+#endif
 
 }
 

+ 114 - 14
src/core/simgrid.c

@@ -23,9 +23,18 @@
 
 #ifdef STARPU_SIMGRID
 #include <msg/msg.h>
+#include <smpi/smpif.h>
+
+#define STARPU_MPI_AS_PREFIX "StarPU-MPI"
 
 #pragma weak starpu_main
 extern int starpu_main(int argc, char *argv[]);
+#pragma weak smpi_main
+extern int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]);
+#pragma weak smpi_simulated_main_
+extern int smpi_simulated_main_(int argc, char *argv[]);
+
+#define _starpu_simgrid_running_smpi() (getenv("SMPI_GLOBAL_SIZE") != NULL)
 
 struct main_args
 {
@@ -39,13 +48,45 @@ int do_starpu_main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBU
 	return starpu_main(args->argc, args->argv);
 }
 
+static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
+{
+	xbt_dict_t dict;
+	xbt_dict_cursor_t cursor;
+	const char *key;
+	msg_as_t as, ret;
+	dict = MSG_environment_as_get_routing_sons(root);
+	xbt_dict_foreach(dict, cursor, key, as) {
+		if (!strcmp(MSG_environment_as_get_name(as), name))
+			return as;
+		ret = __starpu_simgrid_get_as_by_name(as, name);
+		if (ret)
+			return ret;
+	}
+	return NULL;
+}
+
+static msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
+{
+	return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
+}
+
 int _starpu_simgrid_get_nbhosts(const char *prefix)
 {
 	int ret;
-	xbt_dynar_t hosts = MSG_hosts_as_dynar();
-	unsigned i, nb = xbt_dynar_length(hosts);
+	xbt_dynar_t hosts;
+	unsigned i, nb;
 	unsigned len = strlen(prefix);
 
+	if (_starpu_simgrid_running_smpi())
+	{
+		char name[16];
+		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%u", smpi_current_rank);
+		hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name));
+	}
+	else
+		hosts = MSG_hosts_as_dynar();
+	nb = xbt_dynar_length(hosts);
+
 	ret = 0;
 	for (i = 0; i < nb; i++) {
 		const char *name;
@@ -65,7 +106,7 @@ unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devi
 
 	snprintf(name, sizeof(name), "%s%u", prefix, devid);
 
-	host = MSG_get_host_by_name(name);
+	host = _starpu_simgrid_get_host_by_name(name);
 	if (!host)
 		return 0;
 
@@ -79,22 +120,39 @@ unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devi
 	return atoll(memsize);
 }
 
+msg_host_t _starpu_simgrid_get_host_by_name(const char *name)
+{
+	if (_starpu_simgrid_running_smpi())
+	{
+		char mpiname[16];
+		snprintf(mpiname, sizeof(mpiname), "%d-%s", smpi_current_rank, name);
+		return MSG_get_host_by_name(mpiname);
+	}
+	else
+		return MSG_get_host_by_name(name);
+}
+
 #ifdef STARPU_DEVEL
 #warning TODO: use another way to start main, when simgrid provides it, and then include the application-provided configuration for platform numbers
 #endif
 #undef main
 int main(int argc, char **argv)
 {
-	xbt_dynar_t hosts;
-	int i;
 	char path[256];
 
-	if (!starpu_main)
+	if (!starpu_main && !(smpi_main && smpi_simulated_main_))
 	{
 		_STARPU_ERROR("The main file of this application needs to be compiled with starpu.h included, to properly define starpu_main\n");
 		exit(EXIT_FAILURE);
 	}
 
+	if (_starpu_simgrid_running_smpi())
+	{
+		/* Oops, we are running SMPI, let it start Simgrid, and we'll
+		 * take back hand in _starpu_simgrid_init from starpu_init() */
+		return smpi_main(smpi_simulated_main_, argc, argv);
+	}
+
 	MSG_init(&argc, argv);
 #if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 9)
 	/* Versions earlier than 3.9 didn't support our communication tasks */
@@ -108,17 +166,59 @@ int main(int argc, char **argv)
 	_starpu_simgrid_get_platform_path(path, sizeof(path));
 	MSG_create_environment(path);
 
-	hosts = MSG_hosts_as_dynar();
+	struct main_args args = { .argc = argc, .argv = argv };
+	MSG_process_create("main", &do_starpu_main, &args, MSG_get_host_by_name("MAIN"));
+
+	MSG_main();
+	return 0;
+}
+
+void _starpu_simgrid_init()
+{
+	xbt_dynar_t hosts;
+	int i;
+
+	if (_starpu_simgrid_running_smpi())
+	{
+		/* Take back hand to create the local platform for this MPI
+		 * node */
+
+		char asname[16];
+		char path[256];
+		char cmdline[1024];
+		FILE *in;
+		int out;
+		char template[] = "/tmp/"STARPU_MPI_AS_PREFIX"-platform-XXXXXX.xml";
+		int ret;
+
+		snprintf(asname, sizeof(asname), STARPU_MPI_AS_PREFIX"%u", smpi_current_rank);
+
+		/* Get XML platform */
+		_starpu_simgrid_get_platform_path(path, sizeof(path));
+		in = fopen(path, "r");
+		STARPU_ASSERT_MSG(in, "Could not open platform file %s", path);
+		out = mkstemps(template, strlen(".xml"));
+
+		/* Generate modified XML platform */
+		STARPU_ASSERT_MSG(out >= 0, "Could not create temporary file like %s", template);
+		close(out);
+		snprintf(cmdline, sizeof(cmdline), "xsltproc --novalid --stringparam ASname %s -o %s "STARPU_DATADIR"/starpu/starpu_smpi.xslt %s", asname, template, path);
+		ret = system(cmdline);
+		STARPU_ASSERT_MSG(ret == 0, "running xsltproc to generate SMPI platforms %s from %s failed", template, path);
+
+		/* And create it */
+		MSG_create_environment(template);
+		unlink(template);
+		hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(asname));
+	}
+	else
+		hosts = MSG_hosts_as_dynar();
+
 	int nb = xbt_dynar_length(hosts);
 	for (i = 0; i < nb; i++)
 		MSG_host_set_data(xbt_dynar_get_as(hosts, i, msg_host_t), calloc(MAX_TSD, sizeof(void*)));
 
-	struct main_args args = { .argc = argc, .argv = argv };
-	MSG_process_create("main", &do_starpu_main, &args, xbt_dynar_get_as(hosts, 0, msg_host_t));
 	xbt_dynar_free(&hosts);
-
-	MSG_main();
-	return 0;
 }
 
 /* Task execution submitted by StarPU */
@@ -247,7 +347,7 @@ static int transfer_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARP
 		if (!wake->nwait)
 		{
 			_STARPU_DEBUG("triggering transfer %p\n", wake);
-			MSG_process_create("transfer task", transfer_execute, wake, MSG_get_host_by_name("MAIN"));
+			MSG_process_create("transfer task", transfer_execute, wake, _starpu_simgrid_get_host_by_name("MAIN"));
 		}
 	}
 
@@ -288,7 +388,7 @@ static void transfer_submit(struct transfer *transfer)
 	if (!transfer->nwait)
 	{
 		_STARPU_DEBUG("transfer %p waits for nobody, starting\n", transfer);
-		MSG_process_create("transfer task", transfer_execute, transfer, MSG_get_host_by_name("MAIN"));
+		MSG_process_create("transfer task", transfer_execute, transfer, _starpu_simgrid_get_host_by_name("MAIN"));
 	}
 }
 

+ 2 - 0
src/core/simgrid.h

@@ -30,11 +30,13 @@ struct _starpu_pthread_args
 
 #define MAX_TSD 16
 
+void _starpu_simgrid_init(void);
 void _starpu_simgrid_execute_job(struct _starpu_job *job, struct starpu_perfmodel_arch* perf_arch, double length);
 int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req);
 /* Return the number of hosts prefixed by PREFIX */
 int _starpu_simgrid_get_nbhosts(const char *prefix);
 unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid);
+msg_host_t _starpu_simgrid_get_host_by_name(const char *name);
 void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 #endif
 

+ 7 - 6
src/core/topology.c

@@ -1093,11 +1093,11 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 void
 _starpu_bind_thread_on_cpu (
 	struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
-	int cpuid)
+	int cpuid STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_SIMGRID
 	return;
-#endif
+#else
 	if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
 		return;
 	if (cpuid < 0)
@@ -1157,6 +1157,7 @@ _starpu_bind_thread_on_cpu (
 #else
 #warning no CPU binding support
 #endif
+#endif
 }
 
 
@@ -1217,7 +1218,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
 #ifdef STARPU_SIMGRID
 	char name[16];
-	msg_host_t host = MSG_get_host_by_name("RAM");
+	msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
 	STARPU_ASSERT(host);
 	_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
 #endif
@@ -1277,7 +1278,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					memory_node = numa_memory_nodes[numaid] = _starpu_memory_node_register(STARPU_CPU_RAM, numaid);
 #ifdef STARPU_SIMGRID
 					snprintf(name, sizeof(name), "RAM%d", numaid);
-					host = MSG_get_host_by_name(name);
+					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
 #endif
@@ -1315,7 +1316,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 #ifdef STARPU_SIMGRID
 					snprintf(name, sizeof(name), "CUDA%d", devid);
-					host = MSG_get_host_by_name(name);
+					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
 #endif /* SIMGRID */
@@ -1365,7 +1366,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 #ifdef STARPU_SIMGRID
 					snprintf(name, sizeof(name), "OpenCL%d", devid);
-					host = MSG_get_host_by_name(name);
+					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
 #endif /* SIMGRID */

+ 6 - 1
src/core/workers.c

@@ -39,6 +39,7 @@
 
 #ifdef STARPU_SIMGRID
 #include <msg/msg.h>
+#include <core/simgrid.h>
 #endif
 
 #ifdef __MINGW32__
@@ -943,7 +944,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
 	int ret;
 
-#ifndef STARPU_SIMGRID
+#ifdef STARPU_SIMGRID
+	_starpu_simgrid_init();
+#else
 #ifdef __GNUC__
 #ifndef __OPTIMIZE__
 	_STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
@@ -1316,7 +1319,9 @@ void starpu_shutdown(void)
 	_starpu_delete_all_sched_ctxs();
 
 	_starpu_disk_unregister();
+#ifdef STARPU_HAVE_HWLOC
 	starpu_tree_free(config.topology.tree);
+#endif
 	_starpu_destroy_topology(&config);
 #ifdef STARPU_USE_FXT
 	_starpu_stop_fxt_profiling();

+ 47 - 0
src/starpu_smpi.xslt

@@ -0,0 +1,47 @@
+<!--
+StarPU   Runtime system for heterogeneous multicore architectures.
+
+Copyright (C) 2014  Université de Bordeaux 1
+
+StarPU is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+StarPU is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+See the GNU Lesser General Public License in COPYING.LGPL for more details.
+-->
+
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+	<xsl:output doctype-system="http://simgrid.gforge.inria.fr/simgrid.dtd"/>
+
+     <!-- Add doctype 
+     <xsl:text>&lt;!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'&gt;</xsl:text>
+
+-->
+    <!-- Copy everything by default but keep applying templates.  -->
+    <xsl:template match="platform|AS|host|link|@*">
+        <xsl:copy>
+            <xsl:apply-templates select="node()|@*"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <!-- Replace AS name.  -->
+    <xsl:template match="platform/AS/@id">
+        <xsl:attribute name="id">
+            <xsl:value-of select="$ASname"/>
+        </xsl:attribute>
+    </xsl:template>
+
+    <!-- Prepend AS name to host names.  -->
+    <xsl:template match="platform/AS/host/@id">
+	    <xsl:attribute name="id"><xsl:value-of select="$ASname"/>-<xsl:value-of select="."/></xsl:attribute>
+    </xsl:template>
+
+</xsl:stylesheet>
+
+