Sfoglia il codice sorgente

- start to move some initialization code relative to topology discovery from
"workers.c" to "topology.c"
- introduce the starpu_topo_obj_t structure, even though it is not used yet

Cédric Augonnet 16 anni fa
parent
commit
033ebe0d39
5 ha cambiato i file con 388 aggiunte e 274 eliminazioni
  1. 2 0
      src/Makefile.am
  2. 310 0
      src/core/topology.c
  3. 68 0
      src/core/topology.h
  4. 4 274
      src/core/workers.c
  5. 4 0
      src/core/workers.h

+ 2 - 0
src/Makefile.am

@@ -46,6 +46,7 @@ noinst_HEADERS = 						\
 	core/perfmodel/regression.h				\
 	core/jobs.h						\
 	core/workers.h						\
+	core/topology.h						\
 	core/debug.h						\
 	datawizard/footprint.h					\
 	datawizard/datawizard.h					\
@@ -89,6 +90,7 @@ libstarpu_la_SOURCES = 						\
 	common/timing.c						\
 	core/jobs.c						\
 	core/workers.c						\
+	core/topology.c						\
 	core/debug.c						\
 	core/dependencies/tags.c				\
 	core/dependencies/htable.c				\

+ 310 - 0
src/core/topology.c

@@ -0,0 +1,310 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <common/config.h>
+#include <core/workers.h>
+#include <core/debug.h>
+#include <core/topology.h>
+
+/*
+ * Discover the topology of the machine
+ */
+
+
+#ifdef USE_CPUS
+static unsigned ncores;
+#endif
+#ifdef USE_CUDA
+static unsigned ncudagpus;
+#endif
+#ifdef USE_GORDON
+static unsigned ngordon_spus;
+#endif
+
+#ifdef USE_CUDA
+extern unsigned get_cuda_device_count(void);
+#endif
+
+static void init_machine_config(struct machine_config_s *config,
+				struct starpu_conf *user_conf)
+{
+	int explicitval __attribute__((unused));
+	unsigned use_accelerator = 0;
+
+	config->nworkers = 0;
+
+#ifdef USE_CUDA
+	if (user_conf && (user_conf->ncuda == 0))
+	{
+		/* the user explicitely disabled CUDA */
+		ncudagpus = 0;
+	}
+	else {
+		/* we need to initialize CUDA early to count the number of devices */
+		init_cuda();
+
+		if (user_conf && (user_conf->ncuda != -1))
+		{
+			explicitval = user_conf->ncuda;
+		}
+		else {
+			explicitval = starpu_get_env_number("NCUDA");
+		}
+
+		if (explicitval < 0) {
+			ncudagpus = STARPU_MIN(get_cuda_device_count(), MAXCUDADEVS);
+		} else {
+			/* use the specified value */
+			ncudagpus = (unsigned)explicitval;
+			STARPU_ASSERT(ncudagpus <= MAXCUDADEVS);
+		}
+		STARPU_ASSERT(ncudagpus + config->nworkers <= NMAXWORKERS);
+	}
+
+	if (ncudagpus > 0)
+		use_accelerator = 1;
+
+	unsigned cudagpu;
+	for (cudagpu = 0; cudagpu < ncudagpus; cudagpu++)
+	{
+		config->workers[config->nworkers + cudagpu].arch = CUDA_WORKER;
+		config->workers[config->nworkers + cudagpu].perf_arch = STARPU_CUDA_DEFAULT;
+		config->workers[config->nworkers + cudagpu].id = cudagpu;
+		config->worker_mask |= (CUDA|CUBLAS);
+	}
+
+	config->nworkers += ncudagpus;
+#endif
+	
+#ifdef USE_GORDON
+	if (user_conf && (user_conf->ncuda != -1)) {
+		explicitval = user_conf->ncuda;
+	}
+	else {
+		explicitval = starpu_get_env_number("NGORDON");
+	}
+
+	if (explicitval < 0) {
+		ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
+	} else {
+		/* use the specified value */
+		ngordon_spus = (unsigned)explicitval;
+		STARPU_ASSERT(ngordon_spus <= NMAXGORDONSPUS);
+	}
+	STARPU_ASSERT(ngordon_spus + config->nworkers <= NMAXWORKERS);
+
+	if (ngordon_spus > 0)
+		use_accelerator = 1;
+
+	unsigned spu;
+	for (spu = 0; spu < ngordon_spus; spu++)
+	{
+		config->workers[config->nworkers + spu].arch = GORDON_WORKER;
+		config->workers[config->nworkers + spu].perf_arch = STARPU_GORDON_DEFAULT;
+		config->workers[config->nworkers + spu].id = spu;
+		config->workers[config->nworkers + spu].worker_is_running = 0;
+		config->worker_mask |= GORDON;
+	}
+
+	config->nworkers += ngordon_spus;
+#endif
+
+/* we put the CPU section after the accelerator : in case there was an
+ * accelerator found, we devote one core */
+#ifdef USE_CPUS
+	if (user_conf && (user_conf->ncpus != -1)) {
+		explicitval = user_conf->ncpus;
+	}
+	else {
+		explicitval = starpu_get_env_number("NCPUS");
+	}
+
+	if (explicitval < 0) {
+		long avail_cores = sysconf(_SC_NPROCESSORS_ONLN) 
+						- (use_accelerator?1:0);
+		ncores = STARPU_MIN(avail_cores, NMAXCORES);
+	} else {
+		/* use the specified value */
+		ncores = (unsigned)explicitval;
+		STARPU_ASSERT(ncores <= NMAXCORES);
+	}
+	STARPU_ASSERT(ncores + config->nworkers <= NMAXWORKERS);
+
+	unsigned core;
+	for (core = 0; core < ncores; core++)
+	{
+		config->workers[config->nworkers + core].arch = CORE_WORKER;
+		config->workers[config->nworkers + core].perf_arch = STARPU_CORE_DEFAULT;
+		config->workers[config->nworkers + core].id = core;
+		config->worker_mask |= CORE;
+	}
+
+	config->nworkers += ncores;
+#endif
+
+
+	if (config->nworkers == 0)
+	{
+		fprintf(stderr, "No worker found, aborting ...\n");
+		exit(-1);
+	}
+}
+
+/*
+ * Bind workers on the different processors
+ */
+
+static int current_bindid = 0;
+static unsigned get_next_bindid_is_initialized = 0;
+static unsigned get_next_bindid_use_envvar = 0;
+static char *get_next_bindid_strval;
+
+static inline int get_next_bindid(void)
+{
+	int bindid;
+
+	/* do we use a round robin policy to distribute the workers on the
+ 	 * cores, or do we another distribution ? */
+	if (!get_next_bindid_is_initialized)
+	{
+		char *strval;
+		strval = getenv("WORKERS_CPUID");
+		if (strval) {
+			get_next_bindid_strval = strval;
+			get_next_bindid_use_envvar = 1;
+		}
+
+		get_next_bindid_is_initialized = 1;
+	}
+	
+	if (get_next_bindid_use_envvar)
+	{
+		/* read the value from the WORKERS_CPUID env variable */
+		long int val;
+		char *endptr;
+		val = strtol(get_next_bindid_strval, &endptr, 10);
+		if (endptr != get_next_bindid_strval)
+		{
+			bindid = (int)(val % sysconf(_SC_NPROCESSORS_ONLN));
+
+			get_next_bindid_strval = endptr;
+		}
+		else {
+			/* there was no valid value so we use a round robin */
+			bindid = (current_bindid++) % (sysconf(_SC_NPROCESSORS_ONLN));
+		}
+	}
+	else {
+		/* the user did not specify any worker distribution so we use a
+ 		 * round robin distribution by default */
+		bindid = (current_bindid++) % (sysconf(_SC_NPROCESSORS_ONLN));
+	}
+
+	return bindid;
+}
+
+
+
+void bind_thread_on_cpu(unsigned coreid)
+{
+#ifndef DONTBIND
+	int ret;
+
+	/* fix the thread on the correct cpu */
+	cpu_set_t aff_mask;
+	CPU_ZERO(&aff_mask);
+	CPU_SET(coreid, &aff_mask);
+
+	pthread_t self = pthread_self();
+
+	ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
+	if (ret)
+	{
+		perror("pthread_setaffinity_np");
+		STARPU_ASSERT(0);
+	}
+#endif
+}
+
+static void init_workers_binding(struct machine_config_s *config)
+{
+	/* launch one thread per CPU */
+	unsigned ram_memory_node;
+
+	/* a single core is dedicated for the accelerators */
+	int accelerator_bindid = -1;
+
+	/* note that even if the CPU core are not used, we always have a RAM node */
+	/* TODO : support NUMA  ;) */
+	ram_memory_node = register_memory_node(RAM);
+
+	unsigned worker;
+	for (worker = 0; worker < config->nworkers; worker++)
+	{
+		unsigned memory_node = -1;
+		unsigned is_an_accelerator = 0;
+		struct worker_s *workerarg = &config->workers[worker];
+		
+		/* select the memory node that contains worker's memory */
+		switch (workerarg->arch) {
+			case CORE_WORKER:
+			/* "dedicate" a cpu core to that worker */
+				is_an_accelerator = 0;
+				memory_node = ram_memory_node;
+				break;
+#ifdef USE_GORDON
+			case GORDON_WORKER:
+				is_an_accelerator = 1;
+				memory_node = ram_memory_node;
+				break;
+#endif
+#ifdef USE_CUDA
+			case CUDA_WORKER:
+				is_an_accelerator = 1;
+				memory_node = register_memory_node(CUDA_RAM);
+				break;
+#endif
+			default:
+				STARPU_ASSERT(0);
+		}
+
+		if (is_an_accelerator) {
+			if (accelerator_bindid == -1)
+				accelerator_bindid = get_next_bindid();
+			workerarg->bindid = accelerator_bindid;
+		}
+		else {
+			workerarg->bindid = get_next_bindid();
+		}
+
+		workerarg->memory_node = memory_node;
+	}
+}
+
+
+
+void starpu_build_topology(struct machine_config_s *config,
+			   struct starpu_conf *user_conf)
+{
+	init_machine_config(config, user_conf);
+
+	/* for the data management library */
+	init_memory_nodes();
+
+	init_workers_binding(config);
+}

+ 68 - 0
src/core/topology.h

@@ -0,0 +1,68 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __TOPOLOGY_H__
+#define __TOPOLOGY_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <common/config.h>
+#include <common/list.h>
+#include <common/fxt.h>
+
+#include <starpu.h>
+
+/* TODO actually move this struct into this header */
+struct machine_config_s;
+
+/* This structure is "inspired" by the libtopology project
+ * (see http://runtime.bordeaux.inria.fr/libtopology/) */
+
+struct starpu_topo_obj_t {
+	/* global position */
+	unsigned level;
+	unsigned number;
+
+	/* father */
+	struct starpu_topo_obj_t *father;
+	unsigned index;
+	
+	/* children */
+	unsigned arity;
+	struct starpu_topo_obj **children;
+	struct starpu_topo_obj *first_child;
+	struct starpu_topo_obj *last_child;
+
+	/* cousins */
+	struct topo_obj *next_cousin;
+	struct topo_obj *prev_cousin;
+
+	/* for the convenience of the scheduler */
+	void *sched_data;
+
+	/* flags */
+	unsigned is_a_worker;
+	struct worker_s *worker; /* (ignored if !is_a_worker) */
+};
+
+void starpu_build_topology(struct machine_config_s *config,
+			   struct starpu_conf *user_conf);
+
+#endif // __TOPOLOGY_H__

+ 4 - 274
src/core/workers.c

@@ -27,291 +27,26 @@ static struct machine_config_s config;
 
 /* in case a task is submitted, we may check whether there exists a worker
    that may execute the task or not */
-static uint32_t worker_mask = 0;
 
 inline uint32_t worker_exists(uint32_t task_mask)
 {
-	return (task_mask & worker_mask);
+	return (task_mask & config.worker_mask);
 } 
 
 inline uint32_t may_submit_cuda_task(void)
 {
-	return ((CUDA|CUBLAS) & worker_mask);
+	return ((CUDA|CUBLAS) & config.worker_mask);
 }
 
 inline uint32_t may_submit_core_task(void)
 {
-	return (CORE & worker_mask);
+	return (CORE & config.worker_mask);
 }
 
-#ifdef USE_CPUS
-static unsigned ncores;
-#endif
-#ifdef USE_CUDA
-static unsigned ncudagpus;
-#endif
-#ifdef USE_GORDON
-static unsigned ngordon_spus;
-#endif
-
 /*
  * Runtime initialization methods
  */
 
-#ifdef USE_CUDA
-extern unsigned get_cuda_device_count(void);
-#endif
-
-static void init_machine_config(struct machine_config_s *config,
-				struct starpu_conf *user_conf)
-{
-	int explicitval __attribute__((unused));
-	unsigned use_accelerator = 0;
-
-	config->nworkers = 0;
-
-#ifdef USE_CUDA
-	if (user_conf && (user_conf->ncuda == 0))
-	{
-		/* the user explicitely disabled CUDA */
-		ncudagpus = 0;
-	}
-	else {
-		/* we need to initialize CUDA early to count the number of devices */
-		init_cuda();
-
-		if (user_conf && (user_conf->ncuda != -1))
-		{
-			explicitval = user_conf->ncuda;
-		}
-		else {
-			explicitval = starpu_get_env_number("NCUDA");
-		}
-
-		if (explicitval < 0) {
-			ncudagpus = STARPU_MIN(get_cuda_device_count(), MAXCUDADEVS);
-		} else {
-			/* use the specified value */
-			ncudagpus = (unsigned)explicitval;
-			STARPU_ASSERT(ncudagpus <= MAXCUDADEVS);
-		}
-		STARPU_ASSERT(ncudagpus + config->nworkers <= NMAXWORKERS);
-	}
-
-	if (ncudagpus > 0)
-		use_accelerator = 1;
-
-	unsigned cudagpu;
-	for (cudagpu = 0; cudagpu < ncudagpus; cudagpu++)
-	{
-		config->workers[config->nworkers + cudagpu].arch = CUDA_WORKER;
-		config->workers[config->nworkers + cudagpu].perf_arch = STARPU_CUDA_DEFAULT;
-		config->workers[config->nworkers + cudagpu].id = cudagpu;
-		worker_mask |= (CUDA|CUBLAS);
-	}
-
-	config->nworkers += ncudagpus;
-#endif
-	
-#ifdef USE_GORDON
-	if (user_conf && (user_conf->ncuda != -1)) {
-		explicitval = user_conf->ncuda;
-	}
-	else {
-		explicitval = starpu_get_env_number("NGORDON");
-	}
-
-	if (explicitval < 0) {
-		ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
-	} else {
-		/* use the specified value */
-		ngordon_spus = (unsigned)explicitval;
-		STARPU_ASSERT(ngordon_spus <= NMAXGORDONSPUS);
-	}
-	STARPU_ASSERT(ngordon_spus + config->nworkers <= NMAXWORKERS);
-
-	if (ngordon_spus > 0)
-		use_accelerator = 1;
-
-	unsigned spu;
-	for (spu = 0; spu < ngordon_spus; spu++)
-	{
-		config->workers[config->nworkers + spu].arch = GORDON_WORKER;
-		config->workers[config->nworkers + spu].perf_arch = STARPU_GORDON_DEFAULT;
-		config->workers[config->nworkers + spu].id = spu;
-		config->workers[config->nworkers + spu].worker_is_running = 0;
-		worker_mask |= GORDON;
-	}
-
-	config->nworkers += ngordon_spus;
-#endif
-
-/* we put the CPU section after the accelerator : in case there was an
- * accelerator found, we devote one core */
-#ifdef USE_CPUS
-	if (user_conf && (user_conf->ncpus != -1)) {
-		explicitval = user_conf->ncpus;
-	}
-	else {
-		explicitval = starpu_get_env_number("NCPUS");
-	}
-
-	if (explicitval < 0) {
-		long avail_cores = sysconf(_SC_NPROCESSORS_ONLN) 
-						- (use_accelerator?1:0);
-		ncores = STARPU_MIN(avail_cores, NMAXCORES);
-	} else {
-		/* use the specified value */
-		ncores = (unsigned)explicitval;
-		STARPU_ASSERT(ncores <= NMAXCORES);
-	}
-	STARPU_ASSERT(ncores + config->nworkers <= NMAXWORKERS);
-
-	unsigned core;
-	for (core = 0; core < ncores; core++)
-	{
-		config->workers[config->nworkers + core].arch = CORE_WORKER;
-		config->workers[config->nworkers + core].perf_arch = STARPU_CORE_DEFAULT;
-		config->workers[config->nworkers + core].id = core;
-		worker_mask |= CORE;
-	}
-
-	config->nworkers += ncores;
-#endif
-
-
-	if (config->nworkers == 0)
-	{
-		fprintf(stderr, "No worker found, aborting ...\n");
-		exit(-1);
-	}
-}
-
-void bind_thread_on_cpu(unsigned coreid)
-{
-#ifndef DONTBIND
-	int ret;
-
-	/* fix the thread on the correct cpu */
-	cpu_set_t aff_mask;
-	CPU_ZERO(&aff_mask);
-	CPU_SET(coreid, &aff_mask);
-
-	pthread_t self = pthread_self();
-
-	ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
-	if (ret)
-	{
-		perror("pthread_setaffinity_np");
-		STARPU_ASSERT(0);
-	}
-#endif
-}
-
-static int current_bindid = 0;
-static unsigned get_next_bindid_is_initialized = 0;
-static unsigned get_next_bindid_use_envvar = 0;
-static char *get_next_bindid_strval;
-
-static inline int get_next_bindid(void)
-{
-	int bindid;
-
-	/* do we use a round robin policy to distribute the workers on the
- 	 * cores, or do we another distribution ? */
-	if (!get_next_bindid_is_initialized)
-	{
-		char *strval;
-		strval = getenv("WORKERS_CPUID");
-		if (strval) {
-			get_next_bindid_strval = strval;
-			get_next_bindid_use_envvar = 1;
-		}
-
-		get_next_bindid_is_initialized = 1;
-	}
-	
-	if (get_next_bindid_use_envvar)
-	{
-		/* read the value from the WORKERS_CPUID env variable */
-		long int val;
-		char *endptr;
-		val = strtol(get_next_bindid_strval, &endptr, 10);
-		if (endptr != get_next_bindid_strval)
-		{
-			bindid = (int)(val % sysconf(_SC_NPROCESSORS_ONLN));
-
-			get_next_bindid_strval = endptr;
-		}
-		else {
-			/* there was no valid value so we use a round robin */
-			bindid = (current_bindid++) % (sysconf(_SC_NPROCESSORS_ONLN));
-		}
-	}
-	else {
-		/* the user did not specify any worker distribution so we use a
- 		 * round robin distribution by default */
-		bindid = (current_bindid++) % (sysconf(_SC_NPROCESSORS_ONLN));
-	}
-
-	return bindid;
-}
-
-static void init_workers_binding(struct machine_config_s *config)
-{
-	/* launch one thread per CPU */
-	unsigned ram_memory_node;
-
-	/* a single core is dedicated for the accelerators */
-	int accelerator_bindid = -1;
-
-	/* note that even if the CPU core are not used, we always have a RAM node */
-	/* TODO : support NUMA  ;) */
-	ram_memory_node = register_memory_node(RAM);
-
-	unsigned worker;
-	for (worker = 0; worker < config->nworkers; worker++)
-	{
-		unsigned memory_node = -1;
-		unsigned is_an_accelerator = 0;
-		struct worker_s *workerarg = &config->workers[worker];
-		
-		/* select the memory node that contains worker's memory */
-		switch (workerarg->arch) {
-			case CORE_WORKER:
-			/* "dedicate" a cpu core to that worker */
-				is_an_accelerator = 0;
-				memory_node = ram_memory_node;
-				break;
-#ifdef USE_GORDON
-			case GORDON_WORKER:
-				is_an_accelerator = 1;
-				memory_node = ram_memory_node;
-				break;
-#endif
-#ifdef USE_CUDA
-			case CUDA_WORKER:
-				is_an_accelerator = 1;
-				memory_node = register_memory_node(CUDA_RAM);
-				break;
-#endif
-			default:
-				STARPU_ASSERT(0);
-		}
-
-		if (is_an_accelerator) {
-			if (accelerator_bindid == -1)
-				accelerator_bindid = get_next_bindid();
-			workerarg->bindid = accelerator_bindid;
-		}
-		else {
-			workerarg->bindid = get_next_bindid();
-		}
-
-		workerarg->memory_node = memory_node;
-	}
-}
-
 #ifdef USE_GORDON
 static unsigned gordon_inited = 0;	
 static struct worker_set_s gordon_worker_set;
@@ -414,12 +149,7 @@ void starpu_init(struct starpu_conf *user_conf)
 
 	timing_init();
 
-	init_machine_config(&config, user_conf);
-
-	/* for the data wizard */
-	init_memory_nodes();
-
-	init_workers_binding(&config);
+	starpu_build_topology(&config, user_conf);
 
 	initialize_tag_mutex();
 

+ 4 - 0
src/core/workers.h

@@ -29,6 +29,7 @@
 #include <core/jobs.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/policies/sched_policy.h>
+#include <core/topology.h>
 
 #include <starpu.h>
 
@@ -95,6 +96,9 @@ struct machine_config_s {
 	unsigned nworkers;
 
 	struct worker_s workers[NMAXWORKERS];
+	uint32_t worker_mask;
+
+	struct starpu_topo_obj_t *topology;
 
 	/* this flag is set until the runtime is stopped */
 	unsigned running;