Browse Source

use STARPU_USE_NUMA when lauching application to use NUMA mode instead of --enable-numa when compiling
Disable also --enable-numa in configure.ac

Corentin Salingue 8 years ago
parent
commit
101a90d0f7

+ 0 - 11
configure.ac

@@ -576,17 +576,6 @@ AC_MSG_RESULT($nmaxnumanodes)
 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
 		[maximum number of NUMA nodes])
 
-AC_ARG_ENABLE(numa, [AS_HELP_STRING([--enable-numa],
-	      [use NUMA node(s)])], [enable_numa=$enableval], [enable_numa=no])
-
-if test x$enable_numa = xyes ; then
-	AC_DEFINE(STARPU_USE_NUMA, [1], [NUMA memory nodes support is enabled])
-else
-	nmaxnumanodes=1
-fi
-
-AM_CONDITIONAL([STARPU_USE_NUMA], [test "x$enable_numa" = "xyes"])
-
 
 ###############################################################################
 

+ 4 - 4
doc/doxygen/chapters/api/data_management.doxy

@@ -2,7 +2,7 @@
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011, 2012 INRIA
+ * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
 
@@ -104,9 +104,9 @@ data to StarPU, the specified memory node indicates where the piece of
 data initially resides (we also call this memory node the home node of
 a piece of data).
 
-In the case of NUMA systems, functions starpu_numaphysid_get_memory_node()
-and starpu_memory_node_get_numaphysid() can be used to convert from NUMA node
-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
+In the case of NUMA systems, functions starpu_numa_hwloclogid_to_id()
+and starpu_numa_id_to_hwloclogid() can be used to convert from NUMA node
+numbers as seen by the HWLOC library and NUMA node numbers as seen by StarPU.
 
 \fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
 \ingroup API_Data_Management

+ 5 - 5
doc/doxygen/chapters/api/workers.doxy

@@ -250,15 +250,15 @@ Return the type of \p node as defined by
 this function should be used in the allocation function to determine
 on which device the memory needs to be allocated.
 
-\fn unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid)
+\fn int starpu_numa_hwloclogid_to_id(int logid)
 \ingroup API_Workers_Properties
 This function returns the identifier of the memory node associated to the NUMA
-node identified by \p numaphysid by the Operating System.
+node identified by \p logid by the HWLOC library.
 
-\fn unsigned starpu_memory_node_get_numaphysid(unsigned node)
+\fn int starpu_numa_id_to_hwloclogid(unsigned id);
 \ingroup API_Workers_Properties
-This function returns the Operating System identifier of the memory node
-whose StarPU identifier is \p node.
+This function returns the HWLOC logical identifier of the memory node
+whose StarPU identifier is \p id.
 
 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 \ingroup API_Workers_Properties

+ 8 - 2
examples/cpp/add_vectors_cpp11.cpp

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
- * Copyright (C) 2012 INRIA
+ * Copyright (C) 2012, 2017  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,7 +29,7 @@
 #endif
 
 #include <starpu.h>
-#if !defined(STARPU_HAVE_CXX11) || defined(STARPU_USE_NUMA)
+#if !defined(STARPU_HAVE_CXX11)
 int main(int argc, char **argv)
 {
 	return 77;
@@ -78,6 +78,12 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	if (starpu_get_nb_numa_nodes() > 1)
+	{
+		starpu_shutdown();
+		return 77;
+	}
+
 	// StarPU data registering
 	starpu_data_handle_t spu_vec_A;
 	starpu_data_handle_t spu_vec_B;

+ 1 - 2
include/starpu_config.h.in

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
- * Copyright (C) 2014  INRIA
+ * Copyright (C) 2014, 2017  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -102,7 +102,6 @@
 #undef STARPU_HAVE_GLPK_H
 
 #undef STARPU_HAVE_LIBNUMA
-#undef STARPU_USE_NUMA
 
 #undef STARPU_HAVE_WINDOWS
 #undef STARPU_LINUX_SYS

+ 5 - 3
include/starpu_data.h

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
- * Copyright (C) 2016  Inria
+ * Copyright (C) 2016, 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -132,8 +132,10 @@ enum starpu_node_kind
 
 unsigned starpu_worker_get_memory_node(unsigned workerid);
 unsigned starpu_memory_nodes_get_count(void);
-unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid);
-unsigned starpu_memory_node_get_numaphysid(unsigned node);
+int starpu_get_nb_numa_nodes(void);
+int starpu_numa_hwloclogid_to_id(int logid);
+int starpu_numa_id_to_hwloclogid(unsigned id);
+
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);

+ 2 - 1
src/core/disk.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2013  Corentin Salingue
  * Copyright (C) 2015, 2016  CNRS
+ * Copyright (C) 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -63,7 +64,7 @@ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_s
 	unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0);
 
         /* Connect the disk memory node to all numa memory nodes */
-        int nb_numa_nodes = _starpu_get_nb_numa_nodes();
+        int nb_numa_nodes = starpu_get_nb_numa_nodes();
         int numa_node;
         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
         {

+ 87 - 56
src/core/perfmodel/perfmodel_bus.c

@@ -175,14 +175,21 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
 	/* Allocate a buffer on the host */
 	unsigned char *h_buffer;
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
-#else
-	/* we use STARPU_MAIN_RAM */
-	_STARPU_MALLOC(h_buffer, size);
+	
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+	}
+	else
 #endif
-	cudaHostRegister((void *)h_buffer, size, 0);
+	{
+		/* we use STARPU_MAIN_RAM */
+		_STARPU_MALLOC(h_buffer, size);
+		cudaHostRegister((void *)h_buffer, size, 0);
+	}
 
 	STARPU_ASSERT(cures == cudaSuccess);
 
@@ -252,11 +259,18 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
 	/* Free buffers */
 	cudaHostUnregister(h_buffer);
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_free(hwtopology, h_buffer, size);
-#else
-	free(h_buffer);
+#if defined(STARPU_HAVE_HWLOC) 
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_free(hwtopology, h_buffer, size);
+	}
+	else
 #endif
+	{
+		free(h_buffer);
+	}
+
 	cudaFree(d_buffer);
 
 	cudaThreadExit();
@@ -421,13 +435,19 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
 	/* Allocate a buffer on the host */
 	unsigned char *h_buffer;
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
-#else
-	/* we use STARPU_MAIN_RAM */
-	_STARPU_MALLOC(h_buffer, size);
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+	}
+	else
 #endif
+	{
+		/* we use STARPU_MAIN_RAM */
+		_STARPU_MALLOC(h_buffer, size);
+	}
 
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
@@ -501,11 +521,17 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 	err = clReleaseMemObject(d_buffer);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_free(hwtopology, h_buffer, size);
-#else
-	free(h_buffer);
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_free(hwtopology, h_buffer, size);
+	}
+	else
 #endif
+	{
+		free(h_buffer);
+	}
 
 	/* Uninitiliaze OpenCL context on the device */
 	if (not_initialized == 1)
@@ -632,48 +658,53 @@ static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing *d
 
 static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
 {
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	double start, end, timing;
-	unsigned iter;
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		double start, end, timing;
+		unsigned iter;
 
-	unsigned char *h_buffer;	
-	hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
+		unsigned char *h_buffer;	
+		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
 
-	unsigned char *d_buffer;	
-	hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
-	d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
+		unsigned char *d_buffer;	
+		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
+		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
 
-	memset(h_buffer, 0, SIZE);
+		memset(h_buffer, 0, SIZE);
 
-	start = starpu_timing_now();
-	for (iter = 0; iter < NITER; iter++)
-	{
-		memcpy(d_buffer, h_buffer, SIZE);
-	}
-	end = starpu_timing_now();
-	timing = end - start;
-	
-	numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
+		start = starpu_timing_now();
+		for (iter = 0; iter < NITER; iter++)
+		{
+			memcpy(d_buffer, h_buffer, SIZE);
+		}
+		end = starpu_timing_now();
+		timing = end - start;
 
-	start = starpu_timing_now();
-	for (iter = 0; iter < NITER; iter++)
-	{
-		memcpy(d_buffer, h_buffer, 1);
-	}
-	end = starpu_timing_now();
-	timing = end - start;
-	
-	numa_latency[numa_src][numa_dst] = timing/NITER;
+		numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
 
-	hwloc_free(hwtopology, h_buffer, SIZE);
-	hwloc_free(hwtopology, d_buffer, SIZE);
-#else
-	/* Cannot make a real calibration */
-	numa_timing[numa_src][numa_dst] = 0.01;
-	numa_latency[numa_src][numa_dst] = 0;
-#endif
+		start = starpu_timing_now();
+		for (iter = 0; iter < NITER; iter++)
+		{
+			memcpy(d_buffer, h_buffer, 1);
+		}
+		end = starpu_timing_now();
+		timing = end - start;
 
+		numa_latency[numa_src][numa_dst] = timing/NITER;
+
+		hwloc_free(hwtopology, h_buffer, SIZE);
+		hwloc_free(hwtopology, d_buffer, SIZE);
+	}
+	else
+#endif
+	{
+		/* Cannot make a real calibration */
+		numa_timing[numa_src][numa_dst] = 0.01;
+		numa_latency[numa_src][numa_dst] = 0;
+	}
 }
 
 static void benchmark_all_gpu_devices(void)

+ 3 - 3
src/core/simgrid.c

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2017  Université de Bordeaux
- * Copyright (C) 2016  	    Inria
- * Copyright (C) 2016, 2017  	    CNRS
+ * Copyright (C) 2016, 2017  Inria
+ * Copyright (C) 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -1043,7 +1043,7 @@ void _starpu_simgrid_count_ngpus(void)
 			for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
 			{
 				int numa;
-				int nnumas = _starpu_get_nb_numa_nodes();
+				int nnumas = starpu_get_nb_numa_nodes();
 				int found = 0;
 				for (numa = 0; numa < nnumas; numa++)
 					if (starpu_bus_get_id(src2, numa) != -1)

+ 184 - 156
src/core/topology.c

@@ -65,9 +65,9 @@ static int nobind;
 /* For checking whether two workers share the same PU, indexed by PU number */
 static int cpu_worker[STARPU_MAXCPUS];
 static unsigned nb_numa_nodes = 0;
-static unsigned numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
+static int numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
 static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
-static int _starpu_worker_numa_node(unsigned workerid);
+static int _starpu_get_numa_node_worker(unsigned workerid);
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 
@@ -96,12 +96,12 @@ static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
 struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
 #endif
 
-int _starpu_get_nb_numa_nodes(void)
+int starpu_get_nb_numa_nodes(void)
 {
 	return nb_numa_nodes;
 }
 
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
+#if defined(STARPU_HAVE_HWLOC)
 static int numa_get_logical_id(hwloc_obj_t obj)
 {
 	STARPU_ASSERT(obj);
@@ -119,42 +119,52 @@ static int numa_get_logical_id(hwloc_obj_t obj)
 }
 #endif
 
-static int _starpu_worker_numa_node(unsigned workerid)
+static int _starpu_get_numa_node_worker(unsigned workerid)
 {
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
-	struct _starpu_machine_topology *topology = &config->topology ;
-
-	hwloc_obj_t obj;
-	switch(worker->arch) 	
+#if defined(STARPU_HAVE_HWLOC)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
 	{
-		case STARPU_CPU_WORKER:
-			obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
-			break;
-		default:
-			STARPU_ABORT();
-	}
+		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+		struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
+		struct _starpu_machine_topology *topology = &config->topology ;
 
-	return numa_get_logical_id(obj);
-		
-#else 
-	(void) workerid; /* unused */
-	return -1;
+		hwloc_obj_t obj;
+		switch(worker->arch) 	
+		{
+			case STARPU_CPU_WORKER:
+				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
+				break;
+			default:
+				STARPU_ABORT();
+		}
+
+		return numa_get_logical_id(obj);
+	}
+	else		
 #endif 
+	{
+		(void) workerid; /* unused */
+		return -1;
+	}
 }
 
 static int _starpu_numa_get_logical_id_from_pu(int pu)
 {
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_machine_config *config = _starpu_get_machine_config();
-	struct _starpu_machine_topology *topology = &config->topology;
+#if defined(STARPU_HAVE_HWLOC)
+	if (nb_numa_nodes > 1)
+	{
+		struct _starpu_machine_config *config = _starpu_get_machine_config();
+		struct _starpu_machine_topology *topology = &config->topology;
 
-	hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
-	return numa_get_logical_id(obj);
-#else
-	return -1;
+		hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
+		return numa_get_logical_id(obj);
+	}
+	else
 #endif
+	{
+		return -1;
+	}
 }
 
 
@@ -913,18 +923,27 @@ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config S
 #endif
         _starpu_init_topology(config);
 
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_machine_topology *topology = &config->topology ;
-        int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
-	int res = nnumanodes > 0 ? nnumanodes : 1 ;
-#else /* STARPU_USE_NUMA */
-	int res = 1 ;
-#endif /* STARPU_USE_NUMA */
+	int res;
+#if defined(STARPU_HAVE_HWLOC)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
+	{
+		struct _starpu_machine_topology *topology = &config->topology ;
+		int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
+		res = nnumanodes > 0 ? nnumanodes : 1 ;
+	}
+	else
+#endif 
+	{	
+		res = 1;
+	}
+
 	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
 	return res;
 }
 
-int _starpu_numa_logid_to_id(unsigned logid)
+//TODO change this in an array
+int starpu_numa_hwloclogid_to_id(int logid)
 {
 	unsigned n;
 	for (n = 0; n < nb_numa_nodes; n++)
@@ -933,7 +952,7 @@ int _starpu_numa_logid_to_id(unsigned logid)
 	return -1;
 }
 
-unsigned _starpu_numa_id_to_logid(unsigned id)
+int starpu_numa_id_to_hwloclogid(unsigned id)
 {
 	STARPU_ASSERT(id < STARPU_MAXNUMANODES);
 	return numa_memory_nodes[id];
@@ -1888,10 +1907,10 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 		struct _starpu_worker *workerarg = &config->workers[worker];
 		if (workerarg->arch == STARPU_CPU_WORKER)
 		{
-			int numa_logical_id = _starpu_worker_numa_node(worker);
+			int numa_logical_id = _starpu_get_numa_node_worker(worker);
 
 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
-			int numa_starpu_id = _starpu_numa_logid_to_id(numa_logical_id);
+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(numa_logical_id);
 
 			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
 			{
@@ -1925,120 +1944,124 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
 #endif
 
-
-#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
-	for (i = 0; i < config->topology.ncudagpus; i++)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
 	{
-		hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
-		
-		/* Hwloc cannot recognize some devices */
-		if (!obj)
-			continue;
-			
-		while (obj->type != HWLOC_OBJ_NODE)
+		/* NUMA mode activated */
+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
+		for (i = 0; i < config->topology.ncudagpus; i++)
 		{
-			obj = obj->parent;
+			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
 
-			/* If we don't find a "node" obj before the root, this means
-			 * hwloc does not know whether there are numa nodes or not, so
-			 * we should not use a per-node sampling in that case. */
+			/* Hwloc cannot recognize some devices */
 			if (!obj)
 				continue;
-		}
-		int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
 
-		if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
-		{
-			_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
-			/* Don't create a new NUMA node */
-			numa_starpu_id = STARPU_MAIN_RAM;
-		}
+			while (obj->type != HWLOC_OBJ_NODE)
+			{
+				obj = obj->parent;
 
-		if (numa_starpu_id == -1)
-		{
-			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
-			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
-			numa_memory_nodes[memnode] = obj->logical_index;
-			nb_numa_nodes++;
+				/* If we don't find a "node" obj before the root, this means
+				 * hwloc does not know whether there are numa nodes or not, so
+				 * we should not use a per-node sampling in that case. */
+				if (!obj)
+					continue;
+			}
+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
+
+			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+			{
+				_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+				/* Don't create a new NUMA node */
+				numa_starpu_id = STARPU_MAIN_RAM;
+			}
+
+			if (numa_starpu_id == -1)
+			{
+				int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+				STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+				numa_memory_nodes[memnode] = obj->logical_index;
+				nb_numa_nodes++;
 #ifdef STARPU_SIMGRID
-			snprintf(name, sizeof(name), "RAM%d", memnode);
-			host = _starpu_simgrid_get_host_by_name(name);
-			STARPU_ASSERT(host);
-			_starpu_simgrid_memory_node_set_host(memnode, host);
+				snprintf(name, sizeof(name), "RAM%d", memnode);
+				host = _starpu_simgrid_get_host_by_name(name);
+				STARPU_ASSERT(host);
+				_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
-		}
-	}	
+			}
+		}	
 #endif
 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
-	if (config->topology.nopenclgpus > 0)
-	{
-		cl_int err;
-		cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
-		cl_uint nb_platforms;
-		unsigned platform;
-		unsigned nb_opencl_devices = 0, num = 0;
-
-		err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
-		if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
-			nb_platforms=0;
-
-		cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
-		if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
-			device_type |= CL_DEVICE_TYPE_CPU;
-		if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
-			device_type = CL_DEVICE_TYPE_CPU;
-
-		for (platform = 0; platform < nb_platforms ; platform++)
+		if (config->topology.nopenclgpus > 0)
 		{
-			err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
-			if (err != CL_SUCCESS)
-				num = 0;
-			nb_opencl_devices += num;
-
-			for (i = 0; i < num; i++)
+			cl_int err;
+			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
+			cl_uint nb_platforms;
+			unsigned platform;
+			unsigned nb_opencl_devices = 0, num = 0;
+
+			err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
+			if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
+				nb_platforms=0;
+
+			cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+			if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
+				device_type |= CL_DEVICE_TYPE_CPU;
+			if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
+				device_type = CL_DEVICE_TYPE_CPU;
+
+			for (platform = 0; platform < nb_platforms ; platform++)
 			{
-				hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
+				err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
+				if (err != CL_SUCCESS)
+					num = 0;
+				nb_opencl_devices += num;
 
-				/* Hwloc cannot recognize some devices */
-				if (!obj)
-					continue;
-
-				while (obj->type != HWLOC_OBJ_NODE)
+				for (i = 0; i < num; i++)
 				{
-					obj = obj->parent;
+					hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
 
-					/* If we don't find a "node" obj before the root, this means
-					 * hwloc does not know whether there are numa nodes or not, so
-					 * we should not use a per-node sampling in that case. */
+					/* Hwloc cannot recognize some devices */
 					if (!obj)
 						continue;
-				}
-				int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
 
-				if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
-				{
-					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
-					/* Don't create a new NUMA node */
-					numa_starpu_id = STARPU_MAIN_RAM;
-				}
+					while (obj->type != HWLOC_OBJ_NODE)
+					{
+						obj = obj->parent;
 
-				if (numa_starpu_id == -1)
-				{
-					int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
-					STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
-					numa_memory_nodes[memnode] = obj->logical_index;
-					nb_numa_nodes++;
+						/* If we don't find a "node" obj before the root, this means
+						 * hwloc does not know whether there are numa nodes or not, so
+						 * we should not use a per-node sampling in that case. */
+						if (!obj)
+							continue;
+					}
+					int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
+
+					if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+					{
+						_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+						/* Don't create a new NUMA node */
+						numa_starpu_id = STARPU_MAIN_RAM;
+					}
+
+					if (numa_starpu_id == -1)
+					{
+						int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+						STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+						numa_memory_nodes[memnode] = obj->logical_index;
+						nb_numa_nodes++;
 #ifdef STARPU_SIMGRID
-					snprintf(name, sizeof(name), "RAM%d", memnode);
-					host = _starpu_simgrid_get_host_by_name(name);
-					STARPU_ASSERT(host);
-					_starpu_simgrid_memory_node_set_host(memnode, host);
+						snprintf(name, sizeof(name), "RAM%d", memnode);
+						host = _starpu_simgrid_get_host_by_name(name);
+						STARPU_ASSERT(host);
+						_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
-				}
-			}	
+					}
+				}	
+			}
 		}
-	}
 #endif
+	}
 	
 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
 	//Found NUMA nodes from CUDA nodes
@@ -2059,38 +2082,43 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 	unsigned numa;
 	for (numa = 0; numa < nnuma; numa++)
 	{
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-		hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
-		unsigned numa_logical_id = obj->logical_index;
+#if defined(STARPU_HAVE_HWLOC)
+		if (nnuma > 1)
+		{
+			hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
+			unsigned numa_logical_id = obj->logical_index;
 
-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
-		STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
+			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
 
-		numa_memory_nodes[memnode] = numa_logical_id;
-		nb_numa_nodes++;								
+			numa_memory_nodes[memnode] = numa_logical_id;
+			nb_numa_nodes++;								
 
 #ifdef STARPU_SIMGRID
-		snprintf(name, sizeof(name), "RAM%d", memnode);
-		host = _starpu_simgrid_get_host_by_name(name);
-		STARPU_ASSERT(host);
-		_starpu_simgrid_memory_node_set_host(memnode, host);
+			snprintf(name, sizeof(name), "RAM%d", memnode);
+			host = _starpu_simgrid_get_host_by_name(name);
+			STARPU_ASSERT(host);
+			_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
-#else /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
+		}
+		else
+#endif /* defined(STARPU_HAVE_HWLOC) */
+		{
 
-		/* In this case, nnuma has only one node */
-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
-		STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
+			/* In this case, nnuma has only one node */
+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
+			STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
 
-		numa_memory_nodes[memnode] = -1;
-		nb_numa_nodes++;								
+			numa_memory_nodes[memnode] = -1;
+			nb_numa_nodes++;								
 #ifdef STARPU_SIMGRID
-		char name[16];
-		msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
-		STARPU_ASSERT(host);
-		_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
+			char name[16];
+			msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
+			STARPU_ASSERT(host);
+			_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
 #endif
+		}
 
-#endif /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
 	}	
 	
 	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
@@ -2168,8 +2196,8 @@ _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config,
 		{
 			case STARPU_CPU_WORKER:
 			{
-				int numa_logical_id = _starpu_worker_numa_node(worker);
-				int numa_starpu_id =  _starpu_numa_logid_to_id(numa_logical_id);
+				int numa_logical_id = _starpu_get_numa_node_worker(worker);
+				int numa_starpu_id =  starpu_numa_hwloclogid_to_id(numa_logical_id);
 				if (numa_starpu_id >= STARPU_MAXNUMANODES)
 					numa_starpu_id = STARPU_MAIN_RAM;
 
@@ -2607,7 +2635,7 @@ starpu_topology_print (FILE *output)
 		fprintf(output, "------\tNUMA %u\t------\n", numa);
 		for (pu = 0; pu < topology->nhwpus; pu++)
 		{
-			if (_starpu_numa_id_to_logid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
+			if (starpu_numa_id_to_hwloclogid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
 			{
 				if ((pu % nthreads_per_core) == 0)
 					fprintf(output, "core %u", pu / nthreads_per_core);

+ 3 - 2
src/core/topology.h

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2009-2010, 2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2015, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -71,7 +72,7 @@ void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_AT
 
 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
 
-int _starpu_get_nb_numa_nodes(void);
-unsigned _starpu_numa_id_to_logid(unsigned id);
+int starpu_get_nb_numa_nodes(void);
+int starpu_numa_id_to_hwloclogid(unsigned id);
 	
 #endif // __TOPOLOGY_H__

+ 1 - 1
src/core/workers.c

@@ -1598,7 +1598,7 @@ void starpu_shutdown(void)
 	_starpu_kill_all_workers(&_starpu_config);
 	
 	unsigned i;
-	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+	unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 	for (i=0; i<nb_numa_nodes; i++)
 	{
 		_starpu_free_all_automatically_allocated_buffers(i);

+ 2 - 2
src/datawizard/coherency.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2014  INRIA
+ * Copyright (C) 2014, 2017  INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -326,7 +326,7 @@ static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
 	double timing_best;
 	int best_numa = -1;
 	unsigned numa;
-	const unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+	const unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 	for(numa = 0; numa < nb_numa_nodes; numa++)
 	{
 		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);

+ 4 - 3
src/datawizard/malloc.c

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2009-2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -305,10 +306,10 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 #endif
 	}
 #ifdef STARPU_HAVE_HWLOC
-	if (_starpu_get_nb_numa_nodes() > 1) {
+	if (starpu_get_nb_numa_nodes() > 1) {
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
-		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_numa_id_to_logid(dst_node));
+		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, starpu_numa_id_to_hwloclogid(dst_node));
 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
@@ -494,7 +495,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 #endif
 	}
 #ifdef STARPU_HAVE_HWLOC
-	else if (_starpu_get_nb_numa_nodes() > 1) {
+	else if (starpu_get_nb_numa_nodes() > 1) {
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
 		hwloc_free(hwtopology, A, dim);

+ 4 - 4
src/datawizard/memalloc.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
+ * Copyright (C) 2016, 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -1620,7 +1620,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
 			{
 				unsigned numa;
-				unsigned nnumas = _starpu_get_nb_numa_nodes();
+				unsigned nnumas = starpu_get_nb_numa_nodes();
 				for (numa = 0; numa < nnumas; numa++)
 				{
 					/* TODO : check if starpu_transfer_predict(node, i,...) is the same */
@@ -1651,7 +1651,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
 		{
  	                unsigned i;
-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 			for (i=0; i<nb_numa_nodes; i++)
 			{
 				if (handle->per_node[i].allocated || 
@@ -1683,7 +1683,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 		/* node != 0 */
 		/* try to push data to RAM if we can before to push on disk*/
 			unsigned i;
-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 			for (i=0; i<nb_numa_nodes; i++)
 			{
 				if (handle->per_node[i].allocated || 

+ 19 - 14
src/drivers/cpu/driver_cpu.c

@@ -166,21 +166,26 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
 #if defined(STARPU_HAVE_HWLOC)
 	struct _starpu_machine_topology *topology = &config->topology;
-#ifdef STARPU_USE_NUMA
-        int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
-
-	if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
-	     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
-	else {
-	     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
-	     global_mem = obj->memory.local_memory;
-	     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
-	     limit = starpu_get_env_number(name);
+
+	int nnumas = starpu_get_nb_numa_nodes();
+	if (nnumas > 1)
+	{
+		int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
+
+		if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
+		     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
+		else {
+		     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
+		     global_mem = obj->memory.local_memory;
+		     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
+		     limit = starpu_get_env_number(name);
+		}
+	}
+	else
+	{
+		/* Do not limit ourself to a single NUMA node */
+		global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
 	}
-#else /* STARPU_USE_NUMA */
-	/* Do not limit ourself to a single NUMA node */
-	global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
-#endif /* STARPU_USE_NUMA */
 
 #else /* STARPU_HAVE_HWLOC */
 #ifdef STARPU_DEVEL

+ 8 - 8
tests/datawizard/nowhere.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2016  Université de Bordeaux
+ * Copyright (C) 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,13 +26,6 @@
  * Try the NOWHERE flag
  */
 
-#ifdef STARPU_USE_NUMA
-int main(int argc, char **argv)
-{
-	/* FIXME: assumes only one RAM node */
-	return STARPU_TEST_SKIPPED;
-}
-#else
 static int x, y;
 
 static void prod(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
@@ -91,6 +85,13 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+	if (starpu_get_nb_numa_nodes() > 1)
+	{
+		/* FIXME: assumes only one RAM node */
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
+
 	starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
 	starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
 
@@ -138,4 +139,3 @@ enodev:
 	starpu_shutdown();
 	return STARPU_TEST_SKIPPED;
 }
-#endif