лет назад: 8 · 35ad7ce8b1
--- a/configure.ac
+++ b/configure.ac
@@ -167,7 +167,7 @@ AM_CONDITIONAL([STARPU_USE_MIC], [test "x$enable_mic" = "xyes"])
 
																 AC_MSG_CHECKING(maximum number of NUMA nodes)
															
 
																 AC_ARG_ENABLE(maxnumanodes, [AS_HELP_STRING([--enable-maxnumanodes=<number>],
															
 
																 			[maximum number of NUMA nodes])],
															
 
																-			nmaxnumanodes=$enableval, nmaxnumanodes=4)
															
 
																+			nmaxnumanodes=$enableval, nmaxnumanodes=2)
															
 
																 AC_MSG_RESULT($nmaxnumanodes)
															
 
																 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
															
 
																 		[maximum number of NUMA nodes])
															
--- a/doc/doxygen/chapters/210_check_list_performance.doxy
+++ b/doc/doxygen/chapters/210_check_list_performance.doxy
@@ -177,9 +177,13 @@ structures of StarPU by describing the shape of your machine and/or your
 
																 application at the configure step.
															
 
																 To reduce the memory footprint of the data internal structures of StarPU, one
															
 
																-can set the \ref enable-maxcpus "--enable-maxcpus", \ref enable-maxcudadev
															
 
																-"--enable-maxcudadev", \ref enable-maxopencldev "--enable-maxopencldev" and
															
 
																-\ref enable-maxnodes "--enable-maxnodes" configure parameters to give StarPU
															
 
																+can set the
															
 
																+\ref enable-maxcpus "--enable-maxcpus",
															
 
																+\ref enable-maxnumanodes "--enable-maxnumanodes",
															
 
																+\ref enable-maxcudadev "--enable-maxcudadev",
															
 
																+\ref enable-maxopencldev "--enable-maxopencldev" and
															
 
																+\ref enable-maxnodes "--enable-maxnodes"
															
 
																+configure parameters to give StarPU
															
 
																 the architecture of the machine it will run on, thus tuning the size of the
															
 
																 structures to the machine.
															
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -767,6 +767,14 @@ available to the application on each CPU device. Setting it enables allocation
 
																 cache in main memory
															
 
																 </dd>
															
 
																+<dt>STARPU_LIMIT_CPU_NUMA_devid_MEM</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_LIMIT_CPU_NUMA_devid_MEM
															
 
																+\addindex __env__STARPU_LIMIT_CPU_NUMA_devid_MEM
															
 
																+This variable specifies the maximum number of megabytes that should be
															
 
																+available to the application on the NUMA node with the OS identifier <c>devid</c>.
															
 
																+</dd>
															
 
																+
															
 
																 <dt>STARPU_MINIMUM_AVAILABLE_MEM</dt>
															
 
																 <dd>
															
 
																 \anchor STARPU_MINIMUM_AVAILABLE_MEM
															
--- a/doc/doxygen/chapters/510_configure_options.doxy
+++ b/doc/doxygen/chapters/510_configure_options.doxy
@@ -129,6 +129,14 @@ Use at most <c>count</c> CPU cores.  This information is then
 
																 available as the macro ::STARPU_MAXCPUS.
															
 
																 </dd>
															
 
																+<dt>--enable-maxnumanodes=<c>count</c></dt>
															
 
																+<dd>
															
 
																+\anchor enable-maxnumanodes
															
 
																+\addindex __configure__--enable-maxnumanodes
															
 
																+Use at most <c>count</c> NUMA nodes.  This information is then
															
 
																+available as the macro ::STARPU_MAXNUMANODES.
															
 
																+</dd>
															
 
																+
															
 
																 <dt>--disable-cpu</dt>
															
 
																 <dd>
															
 
																 \anchor disable-cpu
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1610,9 +1610,15 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
																 		{
															
 
																 			case STARPU_CPU_WORKER:
															
 
																 			{
															
 
																+				/* "dedicate" a cpu core to that worker */
															
 
																 				workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
															
 
																 				int numaid = workerarg->numa_memory_node = _starpu_worker_numa_node(worker);
															
 
																-				/* "dedicate" a cpu core to that worker */
															
 
																+				if (!numa_init[numaid] && nb_numa_nodes == STARPU_MAXNUMANODES-1)
															
 
																+				{
															
 
																+					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
															
 
																+					numaid = STARPU_MAIN_RAM;
															
 
																+				}
															
 
																+
															
 
																 				if (numa_init[numaid])
															
 
																 				{
															
 
																 					memory_node = numa_memory_nodes[numaid];
															
@@ -2034,14 +2040,15 @@ static int _starpu_worker_numa_node(unsigned workerid)
 
																 	struct _starpu_machine_topology *topology = &config->topology ;
															
 
																 	hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid) ;
															
 
																 	STARPU_ASSERT(obj) ;
															
 
																-	while (obj->depth != HWLOC_OBJ_NODE)
															
 
																+	while (obj->type != HWLOC_OBJ_NODE)
															
 
																 	{
															
 
																 		obj = obj->parent;
															
 
																 		/* If we don't find a "node" obj before the root, this means
															
 
																 		 * hwloc does not know whether there are numa nodes or not, so
															
 
																 		 * we should not use a per-node sampling in that case. */
															
 
																-		STARPU_ASSERT(obj);
															
 
																+		if (!obj)
															
 
																+			return STARPU_MAIN_RAM;
															
 
																 	}
															
 
																 	STARPU_ASSERT(obj->depth == HWLOC_OBJ_NODE);
															
 
																 	return obj->logical_index;
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2016  Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  * Copyright (C) 2016  Uppsala University
															
@@ -76,7 +76,7 @@ LIST_TYPE(_starpu_worker,
 
																 	starpu_pthread_cond_t started_cond; /* indicate when the worker is ready */
															
 
																 	starpu_pthread_cond_t ready_cond; /* indicate when the worker is ready */
															
 
																 	unsigned memory_node; /* which memory node is the worker associated with ? */
															
 
																-	unsigned numa_memory_node; /* which numa memory node is the worker associated with ? */
															
 
																+	unsigned numa_memory_node; /* which numa memory node is the worker associated with? (logical index) */
															
 
																 	starpu_pthread_cond_t sched_cond; /* condition variable used when the worker waits for tasks. */
															
 
																         starpu_pthread_mutex_t sched_mutex; /* mutex protecting sched_cond */
															
 
																 	struct starpu_task_list local_tasks; /* this queue contains tasks that have been explicitely submitted to that queue */
															
--- a/src/datawizard/datawizard.c
+++ b/src/datawizard/datawizard.c
@@ -70,7 +70,8 @@ void _starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc)
 
																 int __starpu_datawizard_progress_ram(unsigned may_alloc, unsigned push_requests)
															
 
																 {
															
 
																-	int res = 0, i;
															
 
																+	int res = 0;
															
 
																+	unsigned i;
															
 
																 	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
 
																 	for (i=0; i<nb_numa_nodes; i++)
															
 
																 	{
															
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -216,12 +216,9 @@ uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle)
 
																 uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	int node = STARPU_MAIN_RAM;
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																-	node = handle->home_node;
															
 
																+	int node = handle->home_node;
															
 
																 	if (node < 0 || (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		node = STARPU_MAIN_RAM;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 	/* XXX 0 */
															
 
																 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
															
@@ -232,12 +229,9 @@ uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle)
 
																 uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	int node = STARPU_MAIN_RAM;
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																-	node = handle->home_node;
															
 
																+	int node = handle->home_node;
															
 
																 	if (node < 0 || (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		node = STARPU_MAIN_RAM;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 	/* XXX 0 */
															
 
																 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
															
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -515,12 +515,9 @@ struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_hand
 
																 void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	int node = STARPU_MAIN_RAM;
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																-	node = handle->home_node;
															
 
																+	int node = handle->home_node;
															
 
																 	if (node < 0 || (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		node = STARPU_MAIN_RAM;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 	const void *ram_ptr = starpu_data_handle_to_pointer(handle, node);
															
 
																 #ifdef STARPU_OPENMP
															
@@ -758,12 +755,9 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
																 			_STARPU_DEBUG("Conversion needed\n");
															
 
																 			void *buffers[1];
															
 
																 			struct starpu_multiformat_interface *format_interface;
															
 
																-			home_node = STARPU_MAIN_RAM;
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																 			home_node = handle->home_node;
															
 
																 			if (home_node < 0 || (starpu_node_get_kind(home_node) != STARPU_CPU_RAM))
															
 
																 				home_node = STARPU_MAIN_RAM;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 			format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, home_node);
															
 
																 			struct starpu_codelet *cl = NULL;
															
 
																 			enum starpu_node_kind node_kind = starpu_node_get_kind(handle->mf_node);
															
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -298,7 +298,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
																 		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_memnode_to_numaid(dst_node));
															
 
																 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
															
 
																 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
															
 
																-		fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, _starpu_memnode_to_numaid(dst_node), *A);
															
 
																+		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, _starpu_memnode_to_numaid(dst_node), *A);
															
 
																 		if (!*A)
															
 
																 			ret = -ENOMEM;
															
 
																 	}
															
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1597,6 +1597,9 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 
																 	return target;
															
 
																 }
															
 
																+#ifdef STARPU_DEVEL
															
 
																+#  warning TODO: better choose NUMA node
															
 
																+#endif
															
 
																 static unsigned
															
 
																 choose_target(starpu_data_handle_t handle, unsigned node)
															
@@ -1607,7 +1610,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
																 		/* try to push on RAM if we can before to push on disk */
															
 
																 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		{
															
 
																- 	                int i;
															
 
																+ 	                unsigned i;
															
 
																 			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
 
																 			for (i=0; i<nb_numa_nodes; i++)
															
 
																 			{
															
@@ -1640,7 +1643,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
																 		} else {
															
 
																 		/* node != 0 */
															
 
																 		/* try to push data to RAM if we can before to push on disk*/
															
 
																-			int i;
															
 
																+			unsigned i;
															
 
																 			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
 
																 			for (i=0; i<nb_numa_nodes; i++)
															
 
																 			{
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -166,26 +166,23 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
																 static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED, struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
															
 
																 {
															
 
																 	size_t global_mem;
															
 
																-	starpu_ssize_t limit;
															
 
																+	starpu_ssize_t limit = -1;
															
 
																 	char name[32];
															
 
																-	/* FIXME: do we want logical or physical? */
															
 
																-	sprintf(name, "STARPU_LIMIT_CPU_NUMA%d_MEM", nodeid);
															
 
																-	limit = starpu_get_env_number(name);
															
 
																-	if (limit == -1)
															
 
																-		limit = starpu_get_env_number("STARPU_LIMIT_CPU_MEM");
															
 
																-
															
 
																 #if defined(STARPU_HAVE_HWLOC)
															
 
																 	struct _starpu_machine_topology *topology = &config->topology;
															
 
																-
															
 
																 #ifdef STARPU_USE_NUMA
															
 
																         int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
															
 
																 	if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
															
 
																 	     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
 
																-	else
															
 
																-	     global_mem = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid)->memory.local_memory;
															
 
																+	else {
															
 
																+	     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
															
 
																+	     global_mem = obj->memory.local_memory;
															
 
																+	     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
															
 
																+	     limit = starpu_get_env_number(name);
															
 
																+	}
															
 
																 #else /* STARPU_USE_NUMA */
															
 
																 	/* Do not limit ourself to a single NUMA node */
															
 
																 	global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
@@ -193,11 +190,14 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
																 #else /* STARPU_HAVE_HWLOC */
															
 
																 #ifdef STARPU_DEVEL
															
 
																-#  warning use sysinfo when available to get global size
															
 
																+#  warning TODO: use sysinfo when available to get global size
															
 
																 #endif
															
 
																 	global_mem = 0;
															
 
																 #endif
															
 
																+	if (limit == -1)
															
 
																+		limit = starpu_get_env_number("STARPU_LIMIT_CPU_MEM");
															
 
																+
															
 
																 	if (limit < 0)
															
 
																 		// No limit is defined, we return the global memory size
															
 
																 		return global_mem;
															
--- a/src/util/openmp_runtime_support.c
+++ b/src/util/openmp_runtime_support.c
@@ -2418,13 +2418,10 @@ void starpu_omp_atomic_fallback_inline_end(void)
 
																 void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base)
															
 
																 {
															
 
																-	/* FIXME Oli? */
															
 
																-	int node = STARPU_MAIN_RAM;
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																-	node = handle->home_node;
															
 
																+	/* FIXME Oli: rather iterate over all nodes? */
															
 
																+	int node = handle->home_node;
															
 
																 	if (node < 0 || (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		node = STARPU_MAIN_RAM;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
															
 
																 		starpu_data_get_interface_on_node(handle, node);
															
 
																 	assert(vector_interface->id == STARPU_VECTOR_INTERFACE_ID);