8 years ago · 101a90d0f7
--- a/configure.ac
+++ b/configure.ac
@@ -576,17 +576,6 @@ AC_MSG_RESULT($nmaxnumanodes)
 
				 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
			
 
				 		[maximum number of NUMA nodes])
			
 
				 
			
 
				-AC_ARG_ENABLE(numa, [AS_HELP_STRING([--enable-numa],
			
 
				-	      [use NUMA node(s)])], [enable_numa=$enableval], [enable_numa=no])
			
 
				-
			
 
				-if test x$enable_numa = xyes ; then
			
 
				-	AC_DEFINE(STARPU_USE_NUMA, [1], [NUMA memory nodes support is enabled])
			
 
				-else
			
 
				-	nmaxnumanodes=1
			
 
				-fi
			
 
				-
			
 
				-AM_CONDITIONAL([STARPU_USE_NUMA], [test "x$enable_numa" = "xyes"])
			
 
				-
			
 
				 
			
 
				 ###############################################################################
			
 
				 
			
--- a/doc/doxygen/chapters/api/data_management.doxy
+++ b/doc/doxygen/chapters/api/data_management.doxy
@@ -2,7 +2,7 @@
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				- * Copyright (C) 2011, 2012 INRIA
			
 
				+ * Copyright (C) 2011, 2012, 2017  INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
 
				 
			
@@ -104,9 +104,9 @@ data to StarPU, the specified memory node indicates where the piece of
 
				 data initially resides (we also call this memory node the home node of
			
 
				 a piece of data).
			
 
				 
			
 
				-In the case of NUMA systems, functions starpu_numaphysid_get_memory_node()
			
 
				-and starpu_memory_node_get_numaphysid() can be used to convert from NUMA node
			
 
				-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
			
 
				+In the case of NUMA systems, functions starpu_numa_hwloclogid_to_id()
			
 
				+and starpu_numa_id_to_hwloclogid() can be used to convert from NUMA node
			
 
				+numbers as seen by the HWLOC library and NUMA node numbers as seen by StarPU.
			
 
				 
			
 
				 \fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
			
 
				 \ingroup API_Data_Management
			
--- a/doc/doxygen/chapters/api/workers.doxy
+++ b/doc/doxygen/chapters/api/workers.doxy
@@ -250,15 +250,15 @@ Return the type of \p node as defined by
 
				 this function should be used in the allocation function to determine
			
 
				 on which device the memory needs to be allocated.
			
 
				 
			
 
				-\fn unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid)
			
 
				+\fn int starpu_numa_hwloclogid_to_id(int logid)
			
 
				 \ingroup API_Workers_Properties
			
 
				 This function returns the identifier of the memory node associated to the NUMA
			
 
				-node identified by \p numaphysid by the Operating System.
			
 
				+node identified by \p logid by the HWLOC library.
			
 
				 
			
 
				-\fn unsigned starpu_memory_node_get_numaphysid(unsigned node)
			
 
				+\fn int starpu_numa_id_to_hwloclogid(unsigned id);
			
 
				 \ingroup API_Workers_Properties
			
 
				-This function returns the Operating System identifier of the memory node
			
 
				-whose StarPU identifier is \p node.
			
 
				+This function returns the HWLOC logical identifier of the memory node
			
 
				+whose StarPU identifier is \p id.
			
 
				 
			
 
				 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
			
 
				 \ingroup API_Workers_Properties
			
--- a/examples/cpp/add_vectors_cpp11.cpp
+++ b/examples/cpp/add_vectors_cpp11.cpp
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
			
 
				- * Copyright (C) 2012 INRIA
			
 
				+ * Copyright (C) 2012, 2017  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -29,7 +29,7 @@
 
				 #endif
			
 
				 
			
 
				 #include <starpu.h>
			
 
				-#if !defined(STARPU_HAVE_CXX11) || defined(STARPU_USE_NUMA)
			
 
				+#if !defined(STARPU_HAVE_CXX11)
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				 	return 77;
			
@@ -78,6 +78,12 @@ int main(int argc, char **argv)
 
				 		return 77;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				+	if (starpu_get_nb_numa_nodes() > 1)
			
 
				+	{
			
 
				+		starpu_shutdown();
			
 
				+		return 77;
			
 
				+	}
			
 
				+
			
 
				 	// StarPU data registering
			
 
				 	starpu_data_handle_t spu_vec_A;
			
 
				 	starpu_data_handle_t spu_vec_B;
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
			
 
				- * Copyright (C) 2014  INRIA
			
 
				+ * Copyright (C) 2014, 2017  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -102,7 +102,6 @@
 
				 #undef STARPU_HAVE_GLPK_H
			
 
				 
			
 
				 #undef STARPU_HAVE_LIBNUMA
			
 
				-#undef STARPU_USE_NUMA
			
 
				 
			
 
				 #undef STARPU_HAVE_WINDOWS
			
 
				 #undef STARPU_LINUX_SYS
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2017  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
			
 
				- * Copyright (C) 2016  Inria
			
 
				+ * Copyright (C) 2016, 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -132,8 +132,10 @@ enum starpu_node_kind
 
				 
			
 
				 unsigned starpu_worker_get_memory_node(unsigned workerid);
			
 
				 unsigned starpu_memory_nodes_get_count(void);
			
 
				-unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid);
			
 
				-unsigned starpu_memory_node_get_numaphysid(unsigned node);
			
 
				+int starpu_get_nb_numa_nodes(void);
			
 
				+int starpu_numa_hwloclogid_to_id(int logid);
			
 
				+int starpu_numa_id_to_hwloclogid(unsigned id);
			
 
				+
			
 
				 enum starpu_node_kind starpu_node_get_kind(unsigned node);
			
 
				 
			
 
				 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
			
--- a/src/core/disk.c
+++ b/src/core/disk.c
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2013  Corentin Salingue
			
 
				  * Copyright (C) 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -63,7 +64,7 @@ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_s
 
				 	unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0);
			
 
				 
			
 
				         /* Connect the disk memory node to all numa memory nodes */
			
 
				-        int nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				+        int nb_numa_nodes = starpu_get_nb_numa_nodes();
			
 
				         int numa_node;
			
 
				         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
			
 
				         {
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -175,14 +175,21 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
				 
			
 
				 	/* Allocate a buffer on the host */
			
 
				 	unsigned char *h_buffer;
			
 
				-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
			
 
				-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
			
 
				-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				-#else
			
 
				-	/* we use STARPU_MAIN_RAM */
			
 
				-	_STARPU_MALLOC(h_buffer, size);
			
 
				+	
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		/* NUMA mode activated */
			
 
				+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
			
 
				+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				+	}
			
 
				+	else
			
 
				 #endif
			
 
				-	cudaHostRegister((void *)h_buffer, size, 0);
			
 
				+	{
			
 
				+		/* we use STARPU_MAIN_RAM */
			
 
				+		_STARPU_MALLOC(h_buffer, size);
			
 
				+		cudaHostRegister((void *)h_buffer, size, 0);
			
 
				+	}
			
 
				 
			
 
				 	STARPU_ASSERT(cures == cudaSuccess);
			
 
				 
			
@@ -252,11 +259,18 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
				 
			
 
				 	/* Free buffers */
			
 
				 	cudaHostUnregister(h_buffer);
			
 
				-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
			
 
				-	hwloc_free(hwtopology, h_buffer, size);
			
 
				-#else
			
 
				-	free(h_buffer);
			
 
				+#if defined(STARPU_HAVE_HWLOC) 
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		/* NUMA mode activated */
			
 
				+		hwloc_free(hwtopology, h_buffer, size);
			
 
				+	}
			
 
				+	else
			
 
				 #endif
			
 
				+	{
			
 
				+		free(h_buffer);
			
 
				+	}
			
 
				+
			
 
				 	cudaFree(d_buffer);
			
 
				 
			
 
				 	cudaThreadExit();
			
@@ -421,13 +435,19 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 
				 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 	/* Allocate a buffer on the host */
			
 
				 	unsigned char *h_buffer;
			
 
				-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
			
 
				-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
			
 
				-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				-#else
			
 
				-	/* we use STARPU_MAIN_RAM */
			
 
				-	_STARPU_MALLOC(h_buffer, size);
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		/* NUMA mode activated */
			
 
				+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
			
 
				+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				+	}
			
 
				+	else
			
 
				 #endif
			
 
				+	{
			
 
				+		/* we use STARPU_MAIN_RAM */
			
 
				+		_STARPU_MALLOC(h_buffer, size);
			
 
				+	}
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
@@ -501,11 +521,17 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 
				 	err = clReleaseMemObject(d_buffer);
			
 
				 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
			
 
				 		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
			
 
				-	hwloc_free(hwtopology, h_buffer, size);
			
 
				-#else
			
 
				-	free(h_buffer);
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		/* NUMA mode activated */
			
 
				+		hwloc_free(hwtopology, h_buffer, size);
			
 
				+	}
			
 
				+	else
			
 
				 #endif
			
 
				+	{
			
 
				+		free(h_buffer);
			
 
				+	}
			
 
				 
			
 
				 	/* Uninitiliaze OpenCL context on the device */
			
 
				 	if (not_initialized == 1)
			
@@ -632,48 +658,53 @@ static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing *d
 
				 
			
 
				 static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
			
 
				 {
			
 
				-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
			
 
				-	double start, end, timing;
			
 
				-	unsigned iter;
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		/* NUMA mode activated */
			
 
				+		double start, end, timing;
			
 
				+		unsigned iter;
			
 
				 
			
 
				-	unsigned char *h_buffer;	
			
 
				-	hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
			
 
				-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				+		unsigned char *h_buffer;	
			
 
				+		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
			
 
				+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				 
			
 
				-	unsigned char *d_buffer;	
			
 
				-	hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
			
 
				-	d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				+		unsigned char *d_buffer;	
			
 
				+		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
			
 
				+		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				 
			
 
				-	memset(h_buffer, 0, SIZE);
			
 
				+		memset(h_buffer, 0, SIZE);
			
 
				 
			
 
				-	start = starpu_timing_now();
			
 
				-	for (iter = 0; iter < NITER; iter++)
			
 
				-	{
			
 
				-		memcpy(d_buffer, h_buffer, SIZE);
			
 
				-	}
			
 
				-	end = starpu_timing_now();
			
 
				-	timing = end - start;
			
 
				-	
			
 
				-	numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
			
 
				+		start = starpu_timing_now();
			
 
				+		for (iter = 0; iter < NITER; iter++)
			
 
				+		{
			
 
				+			memcpy(d_buffer, h_buffer, SIZE);
			
 
				+		}
			
 
				+		end = starpu_timing_now();
			
 
				+		timing = end - start;
			
 
				 
			
 
				-	start = starpu_timing_now();
			
 
				-	for (iter = 0; iter < NITER; iter++)
			
 
				-	{
			
 
				-		memcpy(d_buffer, h_buffer, 1);
			
 
				-	}
			
 
				-	end = starpu_timing_now();
			
 
				-	timing = end - start;
			
 
				-	
			
 
				-	numa_latency[numa_src][numa_dst] = timing/NITER;
			
 
				+		numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
			
 
				 
			
 
				-	hwloc_free(hwtopology, h_buffer, SIZE);
			
 
				-	hwloc_free(hwtopology, d_buffer, SIZE);
			
 
				-#else
			
 
				-	/* Cannot make a real calibration */
			
 
				-	numa_timing[numa_src][numa_dst] = 0.01;
			
 
				-	numa_latency[numa_src][numa_dst] = 0;
			
 
				-#endif
			
 
				+		start = starpu_timing_now();
			
 
				+		for (iter = 0; iter < NITER; iter++)
			
 
				+		{
			
 
				+			memcpy(d_buffer, h_buffer, 1);
			
 
				+		}
			
 
				+		end = starpu_timing_now();
			
 
				+		timing = end - start;
			
 
				 
			
 
				+		numa_latency[numa_src][numa_dst] = timing/NITER;
			
 
				+
			
 
				+		hwloc_free(hwtopology, h_buffer, SIZE);
			
 
				+		hwloc_free(hwtopology, d_buffer, SIZE);
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		/* Cannot make a real calibration */
			
 
				+		numa_timing[numa_src][numa_dst] = 0.01;
			
 
				+		numa_latency[numa_src][numa_dst] = 0;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void benchmark_all_gpu_devices(void)
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -1,8 +1,8 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012-2017  Université de Bordeaux
			
 
				- * Copyright (C) 2016  	    Inria
			
 
				- * Copyright (C) 2016, 2017  	    CNRS
			
 
				+ * Copyright (C) 2016, 2017  Inria
			
 
				+ * Copyright (C) 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -1043,7 +1043,7 @@ void _starpu_simgrid_count_ngpus(void)
 
				 			for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
			
 
				 			{
			
 
				 				int numa;
			
 
				-				int nnumas = _starpu_get_nb_numa_nodes();
			
 
				+				int nnumas = starpu_get_nb_numa_nodes();
			
 
				 				int found = 0;
			
 
				 				for (numa = 0; numa < nnumas; numa++)
			
 
				 					if (starpu_bus_get_id(src2, numa) != -1)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -65,9 +65,9 @@ static int nobind;
 
				 /* For checking whether two workers share the same PU, indexed by PU number */
			
 
				 static int cpu_worker[STARPU_MAXCPUS];
			
 
				 static unsigned nb_numa_nodes = 0;
			
 
				-static unsigned numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
			
 
				+static int numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
			
 
				 static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
			
 
				-static int _starpu_worker_numa_node(unsigned workerid);
			
 
				+static int _starpu_get_numa_node_worker(unsigned workerid);
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
			
 
				 
			
@@ -96,12 +96,12 @@ static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
 
				 struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
			
 
				 #endif
			
 
				 
			
 
				-int _starpu_get_nb_numa_nodes(void)
			
 
				+int starpu_get_nb_numa_nodes(void)
			
 
				 {
			
 
				 	return nb_numa_nodes;
			
 
				 }
			
 
				 
			
 
				-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				 static int numa_get_logical_id(hwloc_obj_t obj)
			
 
				 {
			
 
				 	STARPU_ASSERT(obj);
			
@@ -119,42 +119,52 @@ static int numa_get_logical_id(hwloc_obj_t obj)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static int _starpu_worker_numa_node(unsigned workerid)
			
 
				+static int _starpu_get_numa_node_worker(unsigned workerid)
			
 
				 {
			
 
				-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
			
 
				-	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
			
 
				-	struct _starpu_machine_topology *topology = &config->topology ;
			
 
				-
			
 
				-	hwloc_obj_t obj;
			
 
				-	switch(worker->arch) 	
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	char * state;
			
 
				+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
			
 
				 	{
			
 
				-		case STARPU_CPU_WORKER:
			
 
				-			obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
			
 
				-			break;
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				+		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
			
 
				+		struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
			
 
				+		struct _starpu_machine_topology *topology = &config->topology ;
			
 
				 
			
 
				-	return numa_get_logical_id(obj);
			
 
				-		
			
 
				-#else 
			
 
				-	(void) workerid; /* unused */
			
 
				-	return -1;
			
 
				+		hwloc_obj_t obj;
			
 
				+		switch(worker->arch) 	
			
 
				+		{
			
 
				+			case STARPU_CPU_WORKER:
			
 
				+				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
			
 
				+				break;
			
 
				+			default:
			
 
				+				STARPU_ABORT();
			
 
				+		}
			
 
				+
			
 
				+		return numa_get_logical_id(obj);
			
 
				+	}
			
 
				+	else		
			
 
				 #endif 
			
 
				+	{
			
 
				+		(void) workerid; /* unused */
			
 
				+		return -1;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int _starpu_numa_get_logical_id_from_pu(int pu)
			
 
				 {
			
 
				-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				-	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				-	struct _starpu_machine_topology *topology = &config->topology;
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	if (nb_numa_nodes > 1)
			
 
				+	{
			
 
				+		struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				+		struct _starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-	hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
			
 
				-	return numa_get_logical_id(obj);
			
 
				-#else
			
 
				-	return -1;
			
 
				+		hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
			
 
				+		return numa_get_logical_id(obj);
			
 
				+	}
			
 
				+	else
			
 
				 #endif
			
 
				+	{
			
 
				+		return -1;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 
			
@@ -913,18 +923,27 @@ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config S
 
				 #endif
			
 
				         _starpu_init_topology(config);
			
 
				 
			
 
				-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				-	struct _starpu_machine_topology *topology = &config->topology ;
			
 
				-        int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
			
 
				-	int res = nnumanodes > 0 ? nnumanodes : 1 ;
			
 
				-#else /* STARPU_USE_NUMA */
			
 
				-	int res = 1 ;
			
 
				-#endif /* STARPU_USE_NUMA */
			
 
				+	int res;
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+	char * state;
			
 
				+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
			
 
				+	{
			
 
				+		struct _starpu_machine_topology *topology = &config->topology ;
			
 
				+		int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
			
 
				+		res = nnumanodes > 0 ? nnumanodes : 1 ;
			
 
				+	}
			
 
				+	else
			
 
				+#endif 
			
 
				+	{	
			
 
				+		res = 1;
			
 
				+	}
			
 
				+
			
 
				 	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
			
 
				 	return res;
			
 
				 }
			
 
				 
			
 
				-int _starpu_numa_logid_to_id(unsigned logid)
			
 
				+//TODO change this in an array
			
 
				+int starpu_numa_hwloclogid_to_id(int logid)
			
 
				 {
			
 
				 	unsigned n;
			
 
				 	for (n = 0; n < nb_numa_nodes; n++)
			
@@ -933,7 +952,7 @@ int _starpu_numa_logid_to_id(unsigned logid)
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-unsigned _starpu_numa_id_to_logid(unsigned id)
			
 
				+int starpu_numa_id_to_hwloclogid(unsigned id)
			
 
				 {
			
 
				 	STARPU_ASSERT(id < STARPU_MAXNUMANODES);
			
 
				 	return numa_memory_nodes[id];
			
@@ -1888,10 +1907,10 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 		struct _starpu_worker *workerarg = &config->workers[worker];
			
 
				 		if (workerarg->arch == STARPU_CPU_WORKER)
			
 
				 		{
			
 
				-			int numa_logical_id = _starpu_worker_numa_node(worker);
			
 
				+			int numa_logical_id = _starpu_get_numa_node_worker(worker);
			
 
				 
			
 
				 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
			
 
				-			int numa_starpu_id = _starpu_numa_logid_to_id(numa_logical_id);
			
 
				+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(numa_logical_id);
			
 
				 
			
 
				 			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				 			{
			
@@ -1925,120 +1944,124 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
			
 
				 #endif
			
 
				 
			
 
				-
			
 
				-#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
			
 
				-	for (i = 0; i < config->topology.ncudagpus; i++)
			
 
				+	char * state;
			
 
				+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
			
 
				 	{
			
 
				-		hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
			
 
				-		
			
 
				-		/* Hwloc cannot recognize some devices */
			
 
				-		if (!obj)
			
 
				-			continue;
			
 
				-			
			
 
				-		while (obj->type != HWLOC_OBJ_NODE)
			
 
				+		/* NUMA mode activated */
			
 
				+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
			
 
				+		for (i = 0; i < config->topology.ncudagpus; i++)
			
 
				 		{
			
 
				-			obj = obj->parent;
			
 
				+			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
			
 
				 
			
 
				-			/* If we don't find a "node" obj before the root, this means
			
 
				-			 * hwloc does not know whether there are numa nodes or not, so
			
 
				-			 * we should not use a per-node sampling in that case. */
			
 
				+			/* Hwloc cannot recognize some devices */
			
 
				 			if (!obj)
			
 
				 				continue;
			
 
				-		}
			
 
				-		int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
			
 
				 
			
 
				-		if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				-		{
			
 
				-			_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				-			/* Don't create a new NUMA node */
			
 
				-			numa_starpu_id = STARPU_MAIN_RAM;
			
 
				-		}
			
 
				+			while (obj->type != HWLOC_OBJ_NODE)
			
 
				+			{
			
 
				+				obj = obj->parent;
			
 
				 
			
 
				-		if (numa_starpu_id == -1)
			
 
				-		{
			
 
				-			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
			
 
				-			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
			
 
				-			numa_memory_nodes[memnode] = obj->logical_index;
			
 
				-			nb_numa_nodes++;
			
 
				+				/* If we don't find a "node" obj before the root, this means
			
 
				+				 * hwloc does not know whether there are numa nodes or not, so
			
 
				+				 * we should not use a per-node sampling in that case. */
			
 
				+				if (!obj)
			
 
				+					continue;
			
 
				+			}
			
 
				+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
			
 
				+
			
 
				+			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				+			{
			
 
				+				_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				+				/* Don't create a new NUMA node */
			
 
				+				numa_starpu_id = STARPU_MAIN_RAM;
			
 
				+			}
			
 
				+
			
 
				+			if (numa_starpu_id == -1)
			
 
				+			{
			
 
				+				int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
			
 
				+				STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
			
 
				+				numa_memory_nodes[memnode] = obj->logical_index;
			
 
				+				nb_numa_nodes++;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-			snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				-			host = _starpu_simgrid_get_host_by_name(name);
			
 
				-			STARPU_ASSERT(host);
			
 
				-			_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				+				snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				+				host = _starpu_simgrid_get_host_by_name(name);
			
 
				+				STARPU_ASSERT(host);
			
 
				+				_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				 #endif
			
 
				-		}
			
 
				-	}	
			
 
				+			}
			
 
				+		}	
			
 
				 #endif
			
 
				 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
			
 
				-	if (config->topology.nopenclgpus > 0)
			
 
				-	{
			
 
				-		cl_int err;
			
 
				-		cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
 
				-		cl_uint nb_platforms;
			
 
				-		unsigned platform;
			
 
				-		unsigned nb_opencl_devices = 0, num = 0;
			
 
				-
			
 
				-		err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
			
 
				-		if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
			
 
				-			nb_platforms=0;
			
 
				-
			
 
				-		cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
			
 
				-		if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
			
 
				-			device_type |= CL_DEVICE_TYPE_CPU;
			
 
				-		if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
			
 
				-			device_type = CL_DEVICE_TYPE_CPU;
			
 
				-
			
 
				-		for (platform = 0; platform < nb_platforms ; platform++)
			
 
				+		if (config->topology.nopenclgpus > 0)
			
 
				 		{
			
 
				-			err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
			
 
				-			if (err != CL_SUCCESS)
			
 
				-				num = 0;
			
 
				-			nb_opencl_devices += num;
			
 
				-
			
 
				-			for (i = 0; i < num; i++)
			
 
				+			cl_int err;
			
 
				+			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
 
				+			cl_uint nb_platforms;
			
 
				+			unsigned platform;
			
 
				+			unsigned nb_opencl_devices = 0, num = 0;
			
 
				+
			
 
				+			err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
			
 
				+			if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
			
 
				+				nb_platforms=0;
			
 
				+
			
 
				+			cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
			
 
				+			if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
			
 
				+				device_type |= CL_DEVICE_TYPE_CPU;
			
 
				+			if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
			
 
				+				device_type = CL_DEVICE_TYPE_CPU;
			
 
				+
			
 
				+			for (platform = 0; platform < nb_platforms ; platform++)
			
 
				 			{
			
 
				-				hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
			
 
				+				err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
			
 
				+				if (err != CL_SUCCESS)
			
 
				+					num = 0;
			
 
				+				nb_opencl_devices += num;
			
 
				 
			
 
				-				/* Hwloc cannot recognize some devices */
			
 
				-				if (!obj)
			
 
				-					continue;
			
 
				-
			
 
				-				while (obj->type != HWLOC_OBJ_NODE)
			
 
				+				for (i = 0; i < num; i++)
			
 
				 				{
			
 
				-					obj = obj->parent;
			
 
				+					hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
			
 
				 
			
 
				-					/* If we don't find a "node" obj before the root, this means
			
 
				-					 * hwloc does not know whether there are numa nodes or not, so
			
 
				-					 * we should not use a per-node sampling in that case. */
			
 
				+					/* Hwloc cannot recognize some devices */
			
 
				 					if (!obj)
			
 
				 						continue;
			
 
				-				}
			
 
				-				int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
			
 
				 
			
 
				-				if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				-				{
			
 
				-					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				-					/* Don't create a new NUMA node */
			
 
				-					numa_starpu_id = STARPU_MAIN_RAM;
			
 
				-				}
			
 
				+					while (obj->type != HWLOC_OBJ_NODE)
			
 
				+					{
			
 
				+						obj = obj->parent;
			
 
				 
			
 
				-				if (numa_starpu_id == -1)
			
 
				-				{
			
 
				-					int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
			
 
				-					STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
			
 
				-					numa_memory_nodes[memnode] = obj->logical_index;
			
 
				-					nb_numa_nodes++;
			
 
				+						/* If we don't find a "node" obj before the root, this means
			
 
				+						 * hwloc does not know whether there are numa nodes or not, so
			
 
				+						 * we should not use a per-node sampling in that case. */
			
 
				+						if (!obj)
			
 
				+							continue;
			
 
				+					}
			
 
				+					int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
			
 
				+
			
 
				+					if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				+					{
			
 
				+						_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				+						/* Don't create a new NUMA node */
			
 
				+						numa_starpu_id = STARPU_MAIN_RAM;
			
 
				+					}
			
 
				+
			
 
				+					if (numa_starpu_id == -1)
			
 
				+					{
			
 
				+						int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
			
 
				+						STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
			
 
				+						numa_memory_nodes[memnode] = obj->logical_index;
			
 
				+						nb_numa_nodes++;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-					snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				-					host = _starpu_simgrid_get_host_by_name(name);
			
 
				-					STARPU_ASSERT(host);
			
 
				-					_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				+						snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				+						host = _starpu_simgrid_get_host_by_name(name);
			
 
				+						STARPU_ASSERT(host);
			
 
				+						_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				 #endif
			
 
				-				}
			
 
				-			}	
			
 
				+					}
			
 
				+				}	
			
 
				+			}
			
 
				 		}
			
 
				-	}
			
 
				 #endif
			
 
				+	}
			
 
				 	
			
 
				 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
			
 
				 	//Found NUMA nodes from CUDA nodes
			
@@ -2059,38 +2082,43 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 	unsigned numa;
			
 
				 	for (numa = 0; numa < nnuma; numa++)
			
 
				 	{
			
 
				-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				-		hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
			
 
				-		unsigned numa_logical_id = obj->logical_index;
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+		if (nnuma > 1)
			
 
				+		{
			
 
				+			hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
			
 
				+			unsigned numa_logical_id = obj->logical_index;
			
 
				 
			
 
				-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
			
 
				-		STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
			
 
				+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
			
 
				+			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
			
 
				 
			
 
				-		numa_memory_nodes[memnode] = numa_logical_id;
			
 
				-		nb_numa_nodes++;								
			
 
				+			numa_memory_nodes[memnode] = numa_logical_id;
			
 
				+			nb_numa_nodes++;								
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-		snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				-		host = _starpu_simgrid_get_host_by_name(name);
			
 
				-		STARPU_ASSERT(host);
			
 
				-		_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				+			snprintf(name, sizeof(name), "RAM%d", memnode);
			
 
				+			host = _starpu_simgrid_get_host_by_name(name);
			
 
				+			STARPU_ASSERT(host);
			
 
				+			_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				 #endif
			
 
				-#else /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
			
 
				+		}
			
 
				+		else
			
 
				+#endif /* defined(STARPU_HAVE_HWLOC) */
			
 
				+		{
			
 
				 
			
 
				-		/* In this case, nnuma has only one node */
			
 
				-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
			
 
				-		STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
			
 
				+			/* In this case, nnuma has only one node */
			
 
				+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
			
 
				+			STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
			
 
				 
			
 
				-		numa_memory_nodes[memnode] = -1;
			
 
				-		nb_numa_nodes++;								
			
 
				+			numa_memory_nodes[memnode] = -1;
			
 
				+			nb_numa_nodes++;								
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-		char name[16];
			
 
				-		msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
			
 
				-		STARPU_ASSERT(host);
			
 
				-		_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
			
 
				+			char name[16];
			
 
				+			msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
			
 
				+			STARPU_ASSERT(host);
			
 
				+			_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
			
 
				 #endif
			
 
				+		}
			
 
				 
			
 
				-#endif /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
			
 
				 	}	
			
 
				 	
			
 
				 	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
			
@@ -2168,8 +2196,8 @@ _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config,
 
				 		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 			{
			
 
				-				int numa_logical_id = _starpu_worker_numa_node(worker);
			
 
				-				int numa_starpu_id =  _starpu_numa_logid_to_id(numa_logical_id);
			
 
				+				int numa_logical_id = _starpu_get_numa_node_worker(worker);
			
 
				+				int numa_starpu_id =  starpu_numa_hwloclogid_to_id(numa_logical_id);
			
 
				 				if (numa_starpu_id >= STARPU_MAXNUMANODES)
			
 
				 					numa_starpu_id = STARPU_MAIN_RAM;
			
 
				 
			
@@ -2607,7 +2635,7 @@ starpu_topology_print (FILE *output)
 
				 		fprintf(output, "------\tNUMA %u\t------\n", numa);
			
 
				 		for (pu = 0; pu < topology->nhwpus; pu++)
			
 
				 		{
			
 
				-			if (_starpu_numa_id_to_logid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
			
 
				+			if (starpu_numa_id_to_hwloclogid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
			
 
				 			{
			
 
				 				if ((pu % nthreads_per_core) == 0)
			
 
				 					fprintf(output, "core %u", pu / nthreads_per_core);
			
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2010, 2012, 2014-2017  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2015, 2017  CNRS
			
 
				+ * Copyright (C) 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -71,7 +72,7 @@ void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_AT
 
				 
			
 
				 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
			
 
				 
			
 
				-int _starpu_get_nb_numa_nodes(void);
			
 
				-unsigned _starpu_numa_id_to_logid(unsigned id);
			
 
				+int starpu_get_nb_numa_nodes(void);
			
 
				+int starpu_numa_id_to_hwloclogid(unsigned id);
			
 
				 	
			
 
				 #endif // __TOPOLOGY_H__
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1598,7 +1598,7 @@ void starpu_shutdown(void)
 
				 	_starpu_kill_all_workers(&_starpu_config);
			
 
				 	
			
 
				 	unsigned i;
			
 
				-	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				+	unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
			
 
				 	for (i=0; i<nb_numa_nodes; i++)
			
 
				 	{
			
 
				 		_starpu_free_all_automatically_allocated_buffers(i);
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
			
 
				  * Copyright (C) 2009-2017  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				- * Copyright (C) 2014  INRIA
			
 
				+ * Copyright (C) 2014, 2017  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -326,7 +326,7 @@ static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
 
				 	double timing_best;
			
 
				 	int best_numa = -1;
			
 
				 	unsigned numa;
			
 
				-	const unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				+	const unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
			
 
				 	for(numa = 0; numa < nb_numa_nodes; numa++)
			
 
				 	{
			
 
				 		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2010, 2012-2017  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				+ * Copyright (C) 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -305,10 +306,10 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
				 #endif
			
 
				 	}
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				-	if (_starpu_get_nb_numa_nodes() > 1) {
			
 
				+	if (starpu_get_nb_numa_nodes() > 1) {
			
 
				 		struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 		hwloc_topology_t hwtopology = config->topology.hwtopology;
			
 
				-		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_numa_id_to_logid(dst_node));
			
 
				+		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, starpu_numa_id_to_hwloclogid(dst_node));
			
 
				 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
			
 
				 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
			
 
				 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
			
@@ -494,7 +495,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 
				 #endif
			
 
				 	}
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				-	else if (_starpu_get_nb_numa_nodes() > 1) {
			
 
				+	else if (starpu_get_nb_numa_nodes() > 1) {
			
 
				 		struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 		hwloc_topology_t hwtopology = config->topology.hwtopology;
			
 
				 		hwloc_free(hwtopology, A, dim);
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2017  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				- * Copyright (C) 2016  Inria
			
 
				+ * Copyright (C) 2016, 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -1620,7 +1620,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 
				 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
			
 
				 			{
			
 
				 				unsigned numa;
			
 
				-				unsigned nnumas = _starpu_get_nb_numa_nodes();
			
 
				+				unsigned nnumas = starpu_get_nb_numa_nodes();
			
 
				 				for (numa = 0; numa < nnumas; numa++)
			
 
				 				{
			
 
				 					/* TODO : check if starpu_transfer_predict(node, i,...) is the same */
			
@@ -1651,7 +1651,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
			
 
				 		{
			
 
				  	                unsigned i;
			
 
				-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
			
 
				 			for (i=0; i<nb_numa_nodes; i++)
			
 
				 			{
			
 
				 				if (handle->per_node[i].allocated || 
			
@@ -1683,7 +1683,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 		/* node != 0 */
			
 
				 		/* try to push data to RAM if we can before to push on disk*/
			
 
				 			unsigned i;
			
 
				-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
			
 
				 			for (i=0; i<nb_numa_nodes; i++)
			
 
				 			{
			
 
				 				if (handle->per_node[i].allocated || 
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -166,21 +166,26 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
				 
			
 
				 #if defined(STARPU_HAVE_HWLOC)
			
 
				 	struct _starpu_machine_topology *topology = &config->topology;
			
 
				-#ifdef STARPU_USE_NUMA
			
 
				-        int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
			
 
				-
			
 
				-	if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
			
 
				-	     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				-	else {
			
 
				-	     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
			
 
				-	     global_mem = obj->memory.local_memory;
			
 
				-	     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
			
 
				-	     limit = starpu_get_env_number(name);
			
 
				+
			
 
				+	int nnumas = starpu_get_nb_numa_nodes();
			
 
				+	if (nnumas > 1)
			
 
				+	{
			
 
				+		int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
			
 
				+
			
 
				+		if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
			
 
				+		     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				+		else {
			
 
				+		     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
			
 
				+		     global_mem = obj->memory.local_memory;
			
 
				+		     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
			
 
				+		     limit = starpu_get_env_number(name);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* Do not limit ourself to a single NUMA node */
			
 
				+		global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				 	}
			
 
				-#else /* STARPU_USE_NUMA */
			
 
				-	/* Do not limit ourself to a single NUMA node */
			
 
				-	global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				-#endif /* STARPU_USE_NUMA */
			
 
				 
			
 
				 #else /* STARPU_HAVE_HWLOC */
			
 
				 #ifdef STARPU_DEVEL
			
--- a/tests/datawizard/nowhere.c
+++ b/tests/datawizard/nowhere.c
@@ -1,6 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2015-2016  Université de Bordeaux
			
 
				+ * Copyright (C) 2017  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -25,13 +26,6 @@
 
				  * Try the NOWHERE flag
			
 
				  */
			
 
				 
			
 
				-#ifdef STARPU_USE_NUMA
			
 
				-int main(int argc, char **argv)
			
 
				-{
			
 
				-	/* FIXME: assumes only one RAM node */
			
 
				-	return STARPU_TEST_SKIPPED;
			
 
				-}
			
 
				-#else
			
 
				 static int x, y;
			
 
				 
			
 
				 static void prod(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
			
@@ -91,6 +85,13 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				+	if (starpu_get_nb_numa_nodes() > 1)
			
 
				+	{
			
 
				+		/* FIXME: assumes only one RAM node */
			
 
				+		starpu_shutdown();
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+	}
			
 
				+
			
 
				 	starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
			
 
				 	starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
			
 
				 
			
@@ -138,4 +139,3 @@ enodev:
 
				 	starpu_shutdown();
			
 
				 	return STARPU_TEST_SKIPPED;
			
 
				 }
			
 
				-#endif