8 years ago · 101a90d0f7
--- a/configure.ac
+++ b/configure.ac
@@ -576,17 +576,6 @@ AC_MSG_RESULT($nmaxnumanodes)
 
																 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
															
 
																 		[maximum number of NUMA nodes])
															
 
																-AC_ARG_ENABLE(numa, [AS_HELP_STRING([--enable-numa],
															
 
																-	      [use NUMA node(s)])], [enable_numa=$enableval], [enable_numa=no])
															
 
																-
															
 
																-if test x$enable_numa = xyes ; then
															
 
																-	AC_DEFINE(STARPU_USE_NUMA, [1], [NUMA memory nodes support is enabled])
															
 
																-else
															
 
																-	nmaxnumanodes=1
															
 
																-fi
															
 
																-
															
 
																-AM_CONDITIONAL([STARPU_USE_NUMA], [test "x$enable_numa" = "xyes"])
															
 
																-
															
 
																 ###############################################################################
															
--- a/doc/doxygen/chapters/api/data_management.doxy
+++ b/doc/doxygen/chapters/api/data_management.doxy
@@ -2,7 +2,7 @@
 
																  * This file is part of the StarPU Handbook.
															
 
																  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
															
 
																- * Copyright (C) 2011, 2012 INRIA
															
+
																+ * Copyright (C) 2011, 2012, 2017  INRIA
															
 
																  * See the file version.doxy for copying conditions.
															
 
																  */
															
@@ -104,9 +104,9 @@ data to StarPU, the specified memory node indicates where the piece of
 
																 data initially resides (we also call this memory node the home node of
															
 
																 a piece of data).
															
 
																-In the case of NUMA systems, functions starpu_numaphysid_get_memory_node()
															
+
																+In the case of NUMA systems, functions starpu_numa_hwloclogid_to_id()
															
 
																-and starpu_memory_node_get_numaphysid() can be used to convert from NUMA node
															
+
																+and starpu_numa_id_to_hwloclogid() can be used to convert from NUMA node
															
 
																-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
															
+
																+numbers as seen by the HWLOC library and NUMA node numbers as seen by StarPU.
															
 
																 \fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
															
 
																 \ingroup API_Data_Management
															
--- a/doc/doxygen/chapters/api/workers.doxy
+++ b/doc/doxygen/chapters/api/workers.doxy
@@ -250,15 +250,15 @@ Return the type of \p node as defined by
 
																 this function should be used in the allocation function to determine
															
 
																 on which device the memory needs to be allocated.
															
 
																-\fn unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid)
															
+
																+\fn int starpu_numa_hwloclogid_to_id(int logid)
															
 
																 \ingroup API_Workers_Properties
															
 
																 This function returns the identifier of the memory node associated to the NUMA
															
 
																-node identified by \p numaphysid by the Operating System.
															
+
																+node identified by \p logid by the HWLOC library.
															
 
																-\fn unsigned starpu_memory_node_get_numaphysid(unsigned node)
															
+
																+\fn int starpu_numa_id_to_hwloclogid(unsigned id);
															
 
																 \ingroup API_Workers_Properties
															
 
																-This function returns the Operating System identifier of the memory node
															
+
																+This function returns the HWLOC logical identifier of the memory node
															
 
																-whose StarPU identifier is \p node.
															
+
																+whose StarPU identifier is \p id.
															
 
																 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
															
 
																 \ingroup API_Workers_Properties
															
--- a/examples/cpp/add_vectors_cpp11.cpp
+++ b/examples/cpp/add_vectors_cpp11.cpp
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
															
 
																- * Copyright (C) 2012 INRIA
															
+
																+ * Copyright (C) 2012, 2017  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -29,7 +29,7 @@
 
																 #endif
															
 
																 #include <starpu.h>
															
 
																-#if !defined(STARPU_HAVE_CXX11) || defined(STARPU_USE_NUMA)
															
+
																+#if !defined(STARPU_HAVE_CXX11)
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																 	return 77;
															
@@ -78,6 +78,12 @@ int main(int argc, char **argv)
 
																 		return 77;
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+	if (starpu_get_nb_numa_nodes() > 1)
															
 
																+	{
															
 
																+		starpu_shutdown();
															
 
																+		return 77;
															
 
																+	}
															
 
																+
															
 
																 	// StarPU data registering
															
 
																 	starpu_data_handle_t spu_vec_A;
															
 
																 	starpu_data_handle_t spu_vec_B;
															
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009-2016  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
															
 
																- * Copyright (C) 2014  INRIA
															
+
																+ * Copyright (C) 2014, 2017  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -102,7 +102,6 @@
 
																 #undef STARPU_HAVE_GLPK_H
															
 
																 #undef STARPU_HAVE_LIBNUMA
															
 
																-#undef STARPU_USE_NUMA
															
 
																 #undef STARPU_HAVE_WINDOWS
															
 
																 #undef STARPU_LINUX_SYS
															
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2010-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
															
 
																- * Copyright (C) 2016  Inria
															
+
																+ * Copyright (C) 2016, 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -132,8 +132,10 @@ enum starpu_node_kind
 
																 unsigned starpu_worker_get_memory_node(unsigned workerid);
															
 
																 unsigned starpu_memory_nodes_get_count(void);
															
 
																-unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid);
															
+
																+int starpu_get_nb_numa_nodes(void);
															
 
																-unsigned starpu_memory_node_get_numaphysid(unsigned node);
															
+
																+int starpu_numa_hwloclogid_to_id(int logid);
															
 
																+int starpu_numa_id_to_hwloclogid(unsigned id);
															
 
																+
															
 
																 enum starpu_node_kind starpu_node_get_kind(unsigned node);
															
 
																 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
															
--- a/src/core/disk.c
+++ b/src/core/disk.c
@@ -2,6 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2013  Corentin Salingue
															
 
																  * Copyright (C) 2015, 2016  CNRS
															
 
																+ * Copyright (C) 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -63,7 +64,7 @@ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_s
 
																 	unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0);
															
 
																         /* Connect the disk memory node to all numa memory nodes */
															
 
																-        int nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
+
																+        int nb_numa_nodes = starpu_get_nb_numa_nodes();
															
 
																         int numa_node;
															
 
																         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
															
 
																         {
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -175,14 +175,21 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
																 	/* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
 
																-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
															
+
																+	
															
 
																-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
															
+
																+	if (nnumas > 1)
															
 
																-#else
															
+
																+	{
															
 
																-	/* we use STARPU_MAIN_RAM */
															
+
																+		/* NUMA mode activated */
															
 
																-	_STARPU_MALLOC(h_buffer, size);
															
+
																+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
															
 
																+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
															
 
																+	}
															
 
																+	else
															
 
																 #endif
															
 
																-	cudaHostRegister((void *)h_buffer, size, 0);
															
+
																+	{
															
 
																+		/* we use STARPU_MAIN_RAM */
															
 
																+		_STARPU_MALLOC(h_buffer, size);
															
 
																+		cudaHostRegister((void *)h_buffer, size, 0);
															
 
																+	}
															
 
																 	STARPU_ASSERT(cures == cudaSuccess);
															
@@ -252,11 +259,18 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
																 	/* Free buffers */
															
 
																 	cudaHostUnregister(h_buffer);
															
 
																-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
															
+
																+#if defined(STARPU_HAVE_HWLOC) 
															
 
																-	hwloc_free(hwtopology, h_buffer, size);
															
+
																+	if (nnumas > 1)
															
 
																-#else
															
+
																+	{
															
 
																-	free(h_buffer);
															
+
																+		/* NUMA mode activated */
															
 
																+		hwloc_free(hwtopology, h_buffer, size);
															
 
																+	}
															
 
																+	else
															
 
																 #endif
															
 
																+	{
															
 
																+		free(h_buffer);
															
 
																+	}
															
 
																+
															
 
																 	cudaFree(d_buffer);
															
 
																 	cudaThreadExit();
															
@@ -421,13 +435,19 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 
																 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
 
																-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
															
+
																+	if (nnumas > 1)
															
 
																-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
															
+
																+	{
															
 
																-#else
															
+
																+		/* NUMA mode activated */
															
 
																-	/* we use STARPU_MAIN_RAM */
															
+
																+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
															
 
																-	_STARPU_MALLOC(h_buffer, size);
															
+
																+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
															
 
																+	}
															
 
																+	else
															
 
																 #endif
															
 
																+	{
															
 
																+		/* we use STARPU_MAIN_RAM */
															
 
																+		_STARPU_MALLOC(h_buffer, size);
															
 
																+	}
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
@@ -501,11 +521,17 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 
																 	err = clReleaseMemObject(d_buffer);
															
 
																 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
															
 
																 		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	hwloc_free(hwtopology, h_buffer, size);
															
+
																+	if (nnumas > 1)
															
 
																-#else
															
+
																+	{
															
 
																-	free(h_buffer);
															
+
																+		/* NUMA mode activated */
															
 
																+		hwloc_free(hwtopology, h_buffer, size);
															
 
																+	}
															
 
																+	else
															
 
																 #endif
															
 
																+	{
															
 
																+		free(h_buffer);
															
 
																+	}
															
 
																 	/* Uninitiliaze OpenCL context on the device */
															
 
																 	if (not_initialized == 1)
															
@@ -632,48 +658,53 @@ static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing *d
 
																 static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
															
 
																 {
															
 
																-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	double start, end, timing;
															
+
																+	if (nnumas > 1)
															
 
																-	unsigned iter;
															
+
																+	{
															
 
																+		/* NUMA mode activated */
															
 
																+		double start, end, timing;
															
 
																+		unsigned iter;
															
 
																-	unsigned char *h_buffer;	
															
+
																+		unsigned char *h_buffer;	
															
 
																-	hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
															
+
																+		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
															
 
																-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
															
+
																+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
															
 
																-	unsigned char *d_buffer;	
															
+
																+		unsigned char *d_buffer;	
															
 
																-	hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
															
+
																+		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
															
 
																-	d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
															
+
																+		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
															
 
																-	memset(h_buffer, 0, SIZE);
															
+
																+		memset(h_buffer, 0, SIZE);
															
 
																-	start = starpu_timing_now();
															
+
																+		start = starpu_timing_now();
															
 
																-	for (iter = 0; iter < NITER; iter++)
															
+
																+		for (iter = 0; iter < NITER; iter++)
															
 
																-	{
															
+
																+		{
															
 
																-		memcpy(d_buffer, h_buffer, SIZE);
															
+
																+			memcpy(d_buffer, h_buffer, SIZE);
															
 
																-	}
															
+
																+		}
															
 
																-	end = starpu_timing_now();
															
+
																+		end = starpu_timing_now();
															
 
																-	timing = end - start;
															
+
																+		timing = end - start;
															
 
																-	
															
 
																-	numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
															
 
																-	start = starpu_timing_now();
															
+
																+		numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
															
 
																-	for (iter = 0; iter < NITER; iter++)
															
 
																-	{
															
 
																-		memcpy(d_buffer, h_buffer, 1);
															
 
																-	}
															
 
																-	end = starpu_timing_now();
															
 
																-	timing = end - start;
															
 
																-	
															
 
																-	numa_latency[numa_src][numa_dst] = timing/NITER;
															
 
																-	hwloc_free(hwtopology, h_buffer, SIZE);
															
+
																+		start = starpu_timing_now();
															
 
																-	hwloc_free(hwtopology, d_buffer, SIZE);
															
+
																+		for (iter = 0; iter < NITER; iter++)
															
 
																-#else
															
+
																+		{
															
 
																-	/* Cannot make a real calibration */
															
+
																+			memcpy(d_buffer, h_buffer, 1);
															
 
																-	numa_timing[numa_src][numa_dst] = 0.01;
															
+
																+		}
															
 
																-	numa_latency[numa_src][numa_dst] = 0;
															
+
																+		end = starpu_timing_now();
															
 
																-#endif
															
+
																+		timing = end - start;
															
 
																+		numa_latency[numa_src][numa_dst] = timing/NITER;
															
 
																+
															
 
																+		hwloc_free(hwtopology, h_buffer, SIZE);
															
 
																+		hwloc_free(hwtopology, d_buffer, SIZE);
															
 
																+	}
															
 
																+	else
															
 
																+#endif
															
 
																+	{
															
 
																+		/* Cannot make a real calibration */
															
 
																+		numa_timing[numa_src][numa_dst] = 0.01;
															
 
																+		numa_latency[numa_src][numa_dst] = 0;
															
 
																+	}
															
 
																 }
															
 
																 static void benchmark_all_gpu_devices(void)
															
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -1,8 +1,8 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2012-2017  Université de Bordeaux
															
 
																- * Copyright (C) 2016  	    Inria
															
+
																+ * Copyright (C) 2016, 2017  Inria
															
 
																- * Copyright (C) 2016, 2017  	    CNRS
															
+
																+ * Copyright (C) 2016, 2017  CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -1043,7 +1043,7 @@ void _starpu_simgrid_count_ngpus(void)
 
																 			for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
															
 
																 			{
															
 
																 				int numa;
															
 
																-				int nnumas = _starpu_get_nb_numa_nodes();
															
+
																+				int nnumas = starpu_get_nb_numa_nodes();
															
 
																 				int found = 0;
															
 
																 				for (numa = 0; numa < nnumas; numa++)
															
 
																 					if (starpu_bus_get_id(src2, numa) != -1)
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -65,9 +65,9 @@ static int nobind;
 
																 /* For checking whether two workers share the same PU, indexed by PU number */
															
 
																 static int cpu_worker[STARPU_MAXCPUS];
															
 
																 static unsigned nb_numa_nodes = 0;
															
 
																-static unsigned numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
															
+
																+static int numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
															
 
																 static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
															
 
																-static int _starpu_worker_numa_node(unsigned workerid);
															
+
																+static int _starpu_get_numa_node_worker(unsigned workerid);
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
															
@@ -96,12 +96,12 @@ static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
 
																 struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
															
 
																 #endif
															
 
																-int _starpu_get_nb_numa_nodes(void)
															
+
																+int starpu_get_nb_numa_nodes(void)
															
 
																 {
															
 
																 	return nb_numa_nodes;
															
 
																 }
															
 
																-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																 static int numa_get_logical_id(hwloc_obj_t obj)
															
 
																 {
															
 
																 	STARPU_ASSERT(obj);
															
@@ -119,42 +119,52 @@ static int numa_get_logical_id(hwloc_obj_t obj)
 
																 }
															
 
																 #endif
															
 
																-static int _starpu_worker_numa_node(unsigned workerid)
															
+
																+static int _starpu_get_numa_node_worker(unsigned workerid)
															
 
																 {
															
 
																-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
															
+
																+	char * state;
															
 
																-	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
															
+
																+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
															
 
																-	struct _starpu_machine_topology *topology = &config->topology ;
															
 
																-
															
 
																-	hwloc_obj_t obj;
															
 
																-	switch(worker->arch) 	
															
 
																 	{
															
 
																-		case STARPU_CPU_WORKER:
															
+
																+		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
															
 
																-			obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
															
+
																+		struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
															
 
																-			break;
															
+
																+		struct _starpu_machine_topology *topology = &config->topology ;
															
 
																-		default:
															
 
																-			STARPU_ABORT();
															
 
																-	}
															
 
																-	return numa_get_logical_id(obj);
															
+
																+		hwloc_obj_t obj;
															
 
																-		
															
+
																+		switch(worker->arch) 	
															
 
																-#else 
															
+
																+		{
															
 
																-	(void) workerid; /* unused */
															
+
																+			case STARPU_CPU_WORKER:
															
 
																-	return -1;
															
+
																+				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
															
 
																+				break;
															
 
																+			default:
															
 
																+				STARPU_ABORT();
															
 
																+		}
															
 
																+
															
 
																+		return numa_get_logical_id(obj);
															
 
																+	}
															
 
																+	else		
															
 
																 #endif 
															
 
																+	{
															
 
																+		(void) workerid; /* unused */
															
 
																+		return -1;
															
 
																+	}
															
 
																 }
															
 
																 static int _starpu_numa_get_logical_id_from_pu(int pu)
															
 
																 {
															
 
																-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
+
																+	if (nb_numa_nodes > 1)
															
 
																-	struct _starpu_machine_topology *topology = &config->topology;
															
+
																+	{
															
 
																+		struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																+		struct _starpu_machine_topology *topology = &config->topology;
															
 
																-	hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
															
+
																+		hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
															
 
																-	return numa_get_logical_id(obj);
															
+
																+		return numa_get_logical_id(obj);
															
 
																-#else
															
+
																+	}
															
 
																-	return -1;
															
+
																+	else
															
 
																 #endif
															
 
																+	{
															
 
																+		return -1;
															
 
																+	}
															
 
																 }
															
@@ -913,18 +923,27 @@ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config S
 
																 #endif
															
 
																         _starpu_init_topology(config);
															
 
																-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
															
+
																+	int res;
															
 
																-	struct _starpu_machine_topology *topology = &config->topology ;
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-        int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
															
+
																+	char * state;
															
 
																-	int res = nnumanodes > 0 ? nnumanodes : 1 ;
															
+
																+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
															
 
																-#else /* STARPU_USE_NUMA */
															
+
																+	{
															
 
																-	int res = 1 ;
															
+
																+		struct _starpu_machine_topology *topology = &config->topology ;
															
 
																-#endif /* STARPU_USE_NUMA */
															
+
																+		int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
															
 
																+		res = nnumanodes > 0 ? nnumanodes : 1 ;
															
 
																+	}
															
 
																+	else
															
 
																+#endif 
															
 
																+	{	
															
 
																+		res = 1;
															
 
																+	}
															
 
																+
															
 
																 	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
															
 
																 	return res;
															
 
																 }
															
 
																-int _starpu_numa_logid_to_id(unsigned logid)
															
+
																+//TODO change this in an array
															
 
																+int starpu_numa_hwloclogid_to_id(int logid)
															
 
																 {
															
 
																 	unsigned n;
															
 
																 	for (n = 0; n < nb_numa_nodes; n++)
															
@@ -933,7 +952,7 @@ int _starpu_numa_logid_to_id(unsigned logid)
 
																 	return -1;
															
 
																 }
															
 
																-unsigned _starpu_numa_id_to_logid(unsigned id)
															
+
																+int starpu_numa_id_to_hwloclogid(unsigned id)
															
 
																 {
															
 
																 	STARPU_ASSERT(id < STARPU_MAXNUMANODES);
															
 
																 	return numa_memory_nodes[id];
															
@@ -1888,10 +1907,10 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
																 		struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																 		if (workerarg->arch == STARPU_CPU_WORKER)
															
 
																 		{
															
 
																-			int numa_logical_id = _starpu_worker_numa_node(worker);
															
+
																+			int numa_logical_id = _starpu_get_numa_node_worker(worker);
															
 
																 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
															
 
																-			int numa_starpu_id = _starpu_numa_logid_to_id(numa_logical_id);
															
+
																+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(numa_logical_id);
															
 
																 			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
															
 
																 			{
															
@@ -1925,120 +1944,124 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
																 	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
															
 
																 #endif
															
 
																-
															
+
																+	char * state;
															
 
																-#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
															
+
																+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
															
 
																-	for (i = 0; i < config->topology.ncudagpus; i++)
															
 
																 	{
															
 
																-		hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
															
+
																+		/* NUMA mode activated */
															
 
																-		
															
+
																+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
															
 
																-		/* Hwloc cannot recognize some devices */
															
+
																+		for (i = 0; i < config->topology.ncudagpus; i++)
															
 
																-		if (!obj)
															
 
																-			continue;
															
 
																-			
															
 
																-		while (obj->type != HWLOC_OBJ_NODE)
															
 
																 		{
															
 
																-			obj = obj->parent;
															
+
																+			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
															
 
																-			/* If we don't find a "node" obj before the root, this means
															
+
																+			/* Hwloc cannot recognize some devices */
															
 
																-			 * hwloc does not know whether there are numa nodes or not, so
															
 
																-			 * we should not use a per-node sampling in that case. */
															
 
																 			if (!obj)
															
 
																 				continue;
															
 
																-		}
															
 
																-		int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
															
 
																-		if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
															
+
																+			while (obj->type != HWLOC_OBJ_NODE)
															
 
																-		{
															
+
																+			{
															
 
																-			_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
															
+
																+				obj = obj->parent;
															
 
																-			/* Don't create a new NUMA node */
															
 
																-			numa_starpu_id = STARPU_MAIN_RAM;
															
 
																-		}
															
 
																-		if (numa_starpu_id == -1)
															
+
																+				/* If we don't find a "node" obj before the root, this means
															
 
																-		{
															
+
																+				 * hwloc does not know whether there are numa nodes or not, so
															
 
																-			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
															
+
																+				 * we should not use a per-node sampling in that case. */
															
 
																-			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
															
+
																+				if (!obj)
															
 
																-			numa_memory_nodes[memnode] = obj->logical_index;
															
+
																+					continue;
															
 
																-			nb_numa_nodes++;
															
+
																+			}
															
 
																+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
															
 
																+
															
 
																+			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
															
 
																+			{
															
 
																+				_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
															
 
																+				/* Don't create a new NUMA node */
															
 
																+				numa_starpu_id = STARPU_MAIN_RAM;
															
 
																+			}
															
 
																+
															
 
																+			if (numa_starpu_id == -1)
															
 
																+			{
															
 
																+				int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
															
 
																+				STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
															
 
																+				numa_memory_nodes[memnode] = obj->logical_index;
															
 
																+				nb_numa_nodes++;
															
 
																 #ifdef STARPU_SIMGRID
															
 
																-			snprintf(name, sizeof(name), "RAM%d", memnode);
															
+
																+				snprintf(name, sizeof(name), "RAM%d", memnode);
															
 
																-			host = _starpu_simgrid_get_host_by_name(name);
															
+
																+				host = _starpu_simgrid_get_host_by_name(name);
															
 
																-			STARPU_ASSERT(host);
															
+
																+				STARPU_ASSERT(host);
															
 
																-			_starpu_simgrid_memory_node_set_host(memnode, host);
															
+
																+				_starpu_simgrid_memory_node_set_host(memnode, host);
															
 
																 #endif
															
 
																-		}
															
+
																+			}
															
 
																-	}	
															
+
																+		}	
															
 
																 #endif
															
 
																 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
															
 
																-	if (config->topology.nopenclgpus > 0)
															
+
																+		if (config->topology.nopenclgpus > 0)
															
 
																-	{
															
 
																-		cl_int err;
															
 
																-		cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
															
 
																-		cl_uint nb_platforms;
															
 
																-		unsigned platform;
															
 
																-		unsigned nb_opencl_devices = 0, num = 0;
															
 
																-
															
 
																-		err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
															
 
																-		if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
															
 
																-			nb_platforms=0;
															
 
																-
															
 
																-		cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
															
 
																-		if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
															
 
																-			device_type |= CL_DEVICE_TYPE_CPU;
															
 
																-		if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
															
 
																-			device_type = CL_DEVICE_TYPE_CPU;
															
 
																-
															
 
																-		for (platform = 0; platform < nb_platforms ; platform++)
															
 
																 		{
															
 
																-			err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
															
+
																+			cl_int err;
															
 
																-			if (err != CL_SUCCESS)
															
+
																+			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
															
 
																-				num = 0;
															
+
																+			cl_uint nb_platforms;
															
 
																-			nb_opencl_devices += num;
															
+
																+			unsigned platform;
															
 
																-
															
+
																+			unsigned nb_opencl_devices = 0, num = 0;
															
 
																-			for (i = 0; i < num; i++)
															
+
																+
															
 
																+			err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
															
 
																+			if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
															
 
																+				nb_platforms=0;
															
 
																+
															
 
																+			cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
															
 
																+			if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
															
 
																+				device_type |= CL_DEVICE_TYPE_CPU;
															
 
																+			if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
															
 
																+				device_type = CL_DEVICE_TYPE_CPU;
															
 
																+
															
 
																+			for (platform = 0; platform < nb_platforms ; platform++)
															
 
																 			{
															
 
																-				hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
															
+
																+				err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
															
 
																+				if (err != CL_SUCCESS)
															
 
																+					num = 0;
															
 
																+				nb_opencl_devices += num;
															
 
																-				/* Hwloc cannot recognize some devices */
															
+
																+				for (i = 0; i < num; i++)
															
 
																-				if (!obj)
															
 
																-					continue;
															
 
																-
															
 
																-				while (obj->type != HWLOC_OBJ_NODE)
															
 
																 				{
															
 
																-					obj = obj->parent;
															
+
																+					hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
															
 
																-					/* If we don't find a "node" obj before the root, this means
															
+
																+					/* Hwloc cannot recognize some devices */
															
 
																-					 * hwloc does not know whether there are numa nodes or not, so
															
 
																-					 * we should not use a per-node sampling in that case. */
															
 
																 					if (!obj)
															
 
																 						continue;
															
 
																-				}
															
 
																-				int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
															
 
																-				if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
															
+
																+					while (obj->type != HWLOC_OBJ_NODE)
															
 
																-				{
															
+
																+					{
															
 
																-					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
															
+
																+						obj = obj->parent;
															
 
																-					/* Don't create a new NUMA node */
															
 
																-					numa_starpu_id = STARPU_MAIN_RAM;
															
 
																-				}
															
 
																-				if (numa_starpu_id == -1)
															
+
																+						/* If we don't find a "node" obj before the root, this means
															
 
																-				{
															
+
																+						 * hwloc does not know whether there are numa nodes or not, so
															
 
																-					int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
															
+
																+						 * we should not use a per-node sampling in that case. */
															
 
																-					STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
															
+
																+						if (!obj)
															
 
																-					numa_memory_nodes[memnode] = obj->logical_index;
															
+
																+							continue;
															
 
																-					nb_numa_nodes++;
															
+
																+					}
															
 
																+					int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
															
 
																+
															
 
																+					if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
															
 
																+					{
															
 
																+						_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
															
 
																+						/* Don't create a new NUMA node */
															
 
																+						numa_starpu_id = STARPU_MAIN_RAM;
															
 
																+					}
															
 
																+
															
 
																+					if (numa_starpu_id == -1)
															
 
																+					{
															
 
																+						int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
															
 
																+						STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
															
 
																+						numa_memory_nodes[memnode] = obj->logical_index;
															
 
																+						nb_numa_nodes++;
															
 
																 #ifdef STARPU_SIMGRID
															
 
																-					snprintf(name, sizeof(name), "RAM%d", memnode);
															
+
																+						snprintf(name, sizeof(name), "RAM%d", memnode);
															
 
																-					host = _starpu_simgrid_get_host_by_name(name);
															
+
																+						host = _starpu_simgrid_get_host_by_name(name);
															
 
																-					STARPU_ASSERT(host);
															
+
																+						STARPU_ASSERT(host);
															
 
																-					_starpu_simgrid_memory_node_set_host(memnode, host);
															
+
																+						_starpu_simgrid_memory_node_set_host(memnode, host);
															
 
																 #endif
															
 
																-				}
															
+
																+					}
															
 
																-			}	
															
+
																+				}	
															
 
																+			}
															
 
																 		}
															
 
																-	}
															
 
																 #endif
															
 
																+	}
															
 
																 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
															
 
																 	//Found NUMA nodes from CUDA nodes
															
@@ -2059,38 +2082,43 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
																 	unsigned numa;
															
 
																 	for (numa = 0; numa < nnuma; numa++)
															
 
																 	{
															
 
																-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
															
+
																+#if defined(STARPU_HAVE_HWLOC)
															
 
																-		hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
															
+
																+		if (nnuma > 1)
															
 
																-		unsigned numa_logical_id = obj->logical_index;
															
+
																+		{
															
 
																+			hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
															
 
																+			unsigned numa_logical_id = obj->logical_index;
															
 
																-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
															
+
																+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
															
 
																-		STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
															
+
																+			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
															
 
																-		numa_memory_nodes[memnode] = numa_logical_id;
															
+
																+			numa_memory_nodes[memnode] = numa_logical_id;
															
 
																-		nb_numa_nodes++;								
															
+
																+			nb_numa_nodes++;								
															
 
																 #ifdef STARPU_SIMGRID
															
 
																-		snprintf(name, sizeof(name), "RAM%d", memnode);
															
+
																+			snprintf(name, sizeof(name), "RAM%d", memnode);
															
 
																-		host = _starpu_simgrid_get_host_by_name(name);
															
+
																+			host = _starpu_simgrid_get_host_by_name(name);
															
 
																-		STARPU_ASSERT(host);
															
+
																+			STARPU_ASSERT(host);
															
 
																-		_starpu_simgrid_memory_node_set_host(memnode, host);
															
+
																+			_starpu_simgrid_memory_node_set_host(memnode, host);
															
 
																 #endif
															
 
																-#else /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
															
+
																+		}
															
 
																+		else
															
 
																+#endif /* defined(STARPU_HAVE_HWLOC) */
															
 
																+		{
															
 
																-		/* In this case, nnuma has only one node */
															
+
																+			/* In this case, nnuma has only one node */
															
 
																-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
															
+
																+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
															
 
																-		STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
															
+
																+			STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
															
 
																-		numa_memory_nodes[memnode] = -1;
															
+
																+			numa_memory_nodes[memnode] = -1;
															
 
																-		nb_numa_nodes++;								
															
+
																+			nb_numa_nodes++;								
															
 
																 #ifdef STARPU_SIMGRID
															
 
																-		char name[16];
															
+
																+			char name[16];
															
 
																-		msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
															
+
																+			msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
															
 
																-		STARPU_ASSERT(host);
															
+
																+			STARPU_ASSERT(host);
															
 
																-		_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
															
+
																+			_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
															
 
																 #endif
															
 
																+		}
															
 
																-#endif /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
															
 
																 	}	
															
 
																 	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
															
@@ -2168,8 +2196,8 @@ _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config,
 
																 		{
															
 
																 			case STARPU_CPU_WORKER:
															
 
																 			{
															
 
																-				int numa_logical_id = _starpu_worker_numa_node(worker);
															
+
																+				int numa_logical_id = _starpu_get_numa_node_worker(worker);
															
 
																-				int numa_starpu_id =  _starpu_numa_logid_to_id(numa_logical_id);
															
+
																+				int numa_starpu_id =  starpu_numa_hwloclogid_to_id(numa_logical_id);
															
 
																 				if (numa_starpu_id >= STARPU_MAXNUMANODES)
															
 
																 					numa_starpu_id = STARPU_MAIN_RAM;
															
@@ -2607,7 +2635,7 @@ starpu_topology_print (FILE *output)
 
																 		fprintf(output, "------\tNUMA %u\t------\n", numa);
															
 
																 		for (pu = 0; pu < topology->nhwpus; pu++)
															
 
																 		{
															
 
																-			if (_starpu_numa_id_to_logid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
															
+
																+			if (starpu_numa_id_to_hwloclogid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
															
 
																 			{
															
 
																 				if ((pu % nthreads_per_core) == 0)
															
 
																 					fprintf(output, "core %u", pu / nthreads_per_core);
															
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -2,6 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009-2010, 2012, 2014-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2015, 2017  CNRS
															
 
																+ * Copyright (C) 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -71,7 +72,7 @@ void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_AT
 
																 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
															
 
																-int _starpu_get_nb_numa_nodes(void);
															
+
																+int starpu_get_nb_numa_nodes(void);
															
 
																-unsigned _starpu_numa_id_to_logid(unsigned id);
															
+
																+int starpu_numa_id_to_hwloclogid(unsigned id);
															
 
																 #endif // __TOPOLOGY_H__
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1598,7 +1598,7 @@ void starpu_shutdown(void)
 
																 	_starpu_kill_all_workers(&_starpu_config);
															
 
																 	unsigned i;
															
 
																-	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
+
																+	unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
															
 
																 	for (i=0; i<nb_numa_nodes; i++)
															
 
																 	{
															
 
																 		_starpu_free_all_automatically_allocated_buffers(i);
															
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
															
 
																  * Copyright (C) 2009-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
															
 
																- * Copyright (C) 2014  INRIA
															
+
																+ * Copyright (C) 2014, 2017  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -326,7 +326,7 @@ static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
 
																 	double timing_best;
															
 
																 	int best_numa = -1;
															
 
																 	unsigned numa;
															
 
																-	const unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
+
																+	const unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
															
 
																 	for(numa = 0; numa < nb_numa_nodes; numa++)
															
 
																 	{
															
 
																 		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);
															
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -2,6 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009-2010, 2012-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
															
 
																+ * Copyright (C) 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -305,10 +306,10 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
																 #endif
															
 
																 	}
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-	if (_starpu_get_nb_numa_nodes() > 1) {
															
+
																+	if (starpu_get_nb_numa_nodes() > 1) {
															
 
																 		struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 		hwloc_topology_t hwtopology = config->topology.hwtopology;
															
 
																-		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_numa_id_to_logid(dst_node));
															
+
																+		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, starpu_numa_id_to_hwloclogid(dst_node));
															
 
																 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
															
 
																 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
															
 
																 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
															
@@ -494,7 +495,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 
																 #endif
															
 
																 	}
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-	else if (_starpu_get_nb_numa_nodes() > 1) {
															
+
																+	else if (starpu_get_nb_numa_nodes() > 1) {
															
 
																 		struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 		hwloc_topology_t hwtopology = config->topology.hwtopology;
															
 
																 		hwloc_free(hwtopology, A, dim);
															
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2009-2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
															
 
																- * Copyright (C) 2016  Inria
															
+
																+ * Copyright (C) 2016, 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -1620,7 +1620,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 
																 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
															
 
																 			{
															
 
																 				unsigned numa;
															
 
																-				unsigned nnumas = _starpu_get_nb_numa_nodes();
															
+
																+				unsigned nnumas = starpu_get_nb_numa_nodes();
															
 
																 				for (numa = 0; numa < nnumas; numa++)
															
 
																 				{
															
 
																 					/* TODO : check if starpu_transfer_predict(node, i,...) is the same */
															
@@ -1651,7 +1651,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
																 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
															
 
																 		{
															
 
																  	                unsigned i;
															
 
																-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
+
																+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
															
 
																 			for (i=0; i<nb_numa_nodes; i++)
															
 
																 			{
															
 
																 				if (handle->per_node[i].allocated || 
															
@@ -1683,7 +1683,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
																 		/* node != 0 */
															
 
																 		/* try to push data to RAM if we can before to push on disk*/
															
 
																 			unsigned i;
															
 
																-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
															
+
																+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
															
 
																 			for (i=0; i<nb_numa_nodes; i++)
															
 
																 			{
															
 
																 				if (handle->per_node[i].allocated || 
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -166,21 +166,26 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
																 #if defined(STARPU_HAVE_HWLOC)
															
 
																 	struct _starpu_machine_topology *topology = &config->topology;
															
 
																-#ifdef STARPU_USE_NUMA
															
+
																+
															
 
																-        int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
															
+
																+	int nnumas = starpu_get_nb_numa_nodes();
															
 
																-
															
+
																+	if (nnumas > 1)
															
 
																-	if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
															
+
																+	{
															
 
																-	     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
+
																+		int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
															
 
																-	else {
															
+
																+
															
 
																-	     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
															
+
																+		if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
															
 
																-	     global_mem = obj->memory.local_memory;
															
+
																+		     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
 
																-	     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
															
+
																+		else {
															
 
																-	     limit = starpu_get_env_number(name);
															
+
																+		     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
															
 
																+		     global_mem = obj->memory.local_memory;
															
 
																+		     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
															
 
																+		     limit = starpu_get_env_number(name);
															
 
																+		}
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		/* Do not limit ourself to a single NUMA node */
															
 
																+		global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
 
																 	}
															
 
																-#else /* STARPU_USE_NUMA */
															
 
																-	/* Do not limit ourself to a single NUMA node */
															
 
																-	global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
															
 
																-#endif /* STARPU_USE_NUMA */
															
 
																 #else /* STARPU_HAVE_HWLOC */
															
 
																 #ifdef STARPU_DEVEL
															
--- a/tests/datawizard/nowhere.c
+++ b/tests/datawizard/nowhere.c
@@ -1,6 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2015-2016  Université de Bordeaux
															
 
																+ * Copyright (C) 2017  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -25,13 +26,6 @@
 
																  * Try the NOWHERE flag
															
 
																  */
															
 
																-#ifdef STARPU_USE_NUMA
															
 
																-int main(int argc, char **argv)
															
 
																-{
															
 
																-	/* FIXME: assumes only one RAM node */
															
 
																-	return STARPU_TEST_SKIPPED;
															
 
																-}
															
 
																-#else
															
 
																 static int x, y;
															
 
																 static void prod(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
															
@@ -91,6 +85,13 @@ int main(int argc, char **argv)
 
																 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+	if (starpu_get_nb_numa_nodes() > 1)
															
 
																+	{
															
 
																+		/* FIXME: assumes only one RAM node */
															
 
																+		starpu_shutdown();
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																+	}
															
 
																+
															
 
																 	starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
															
 
																 	starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
															
@@ -138,4 +139,3 @@ enodev:
 
																 	starpu_shutdown();
															
 
																 	return STARPU_TEST_SKIPPED;
															
 
																 }
															
 
																-#endif