Browse Source

use STARPU_USE_NUMA when lauching application to use NUMA mode instead of --enable-numa when compiling
Disable also --enable-numa in configure.ac

Corentin Salingue 8 years ago
parent
commit
101a90d0f7

+ 0 - 11
configure.ac

@@ -576,17 +576,6 @@ AC_MSG_RESULT($nmaxnumanodes)
 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
 AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes],
 		[maximum number of NUMA nodes])
 		[maximum number of NUMA nodes])
 
 
-AC_ARG_ENABLE(numa, [AS_HELP_STRING([--enable-numa],
-	      [use NUMA node(s)])], [enable_numa=$enableval], [enable_numa=no])
-
-if test x$enable_numa = xyes ; then
-	AC_DEFINE(STARPU_USE_NUMA, [1], [NUMA memory nodes support is enabled])
-else
-	nmaxnumanodes=1
-fi
-
-AM_CONDITIONAL([STARPU_USE_NUMA], [test "x$enable_numa" = "xyes"])
-
 
 
 ###############################################################################
 ###############################################################################
 
 

+ 4 - 4
doc/doxygen/chapters/api/data_management.doxy

@@ -2,7 +2,7 @@
  * This file is part of the StarPU Handbook.
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2011, 2012 INRIA
+ * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  * See the file version.doxy for copying conditions.
  */
  */
 
 
@@ -104,9 +104,9 @@ data to StarPU, the specified memory node indicates where the piece of
 data initially resides (we also call this memory node the home node of
 data initially resides (we also call this memory node the home node of
 a piece of data).
 a piece of data).
 
 
-In the case of NUMA systems, functions starpu_numaphysid_get_memory_node()
-and starpu_memory_node_get_numaphysid() can be used to convert from NUMA node
-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
+In the case of NUMA systems, functions starpu_numa_hwloclogid_to_id()
+and starpu_numa_id_to_hwloclogid() can be used to convert from NUMA node
+numbers as seen by the HWLOC library and NUMA node numbers as seen by StarPU.
 
 
 \fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
 \fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
 \ingroup API_Data_Management
 \ingroup API_Data_Management

+ 5 - 5
doc/doxygen/chapters/api/workers.doxy

@@ -250,15 +250,15 @@ Return the type of \p node as defined by
 this function should be used in the allocation function to determine
 this function should be used in the allocation function to determine
 on which device the memory needs to be allocated.
 on which device the memory needs to be allocated.
 
 
-\fn unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid)
+\fn int starpu_numa_hwloclogid_to_id(int logid)
 \ingroup API_Workers_Properties
 \ingroup API_Workers_Properties
 This function returns the identifier of the memory node associated to the NUMA
 This function returns the identifier of the memory node associated to the NUMA
-node identified by \p numaphysid by the Operating System.
+node identified by \p logid by the HWLOC library.
 
 
-\fn unsigned starpu_memory_node_get_numaphysid(unsigned node)
+\fn int starpu_numa_id_to_hwloclogid(unsigned id);
 \ingroup API_Workers_Properties
 \ingroup API_Workers_Properties
-This function returns the Operating System identifier of the memory node
-whose StarPU identifier is \p node.
+This function returns the HWLOC logical identifier of the memory node
+whose StarPU identifier is \p id.
 
 
 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 \ingroup API_Workers_Properties
 \ingroup API_Workers_Properties

+ 8 - 2
examples/cpp/add_vectors_cpp11.cpp

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
- * Copyright (C) 2012 INRIA
+ * Copyright (C) 2012, 2017  INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,7 +29,7 @@
 #endif
 #endif
 
 
 #include <starpu.h>
 #include <starpu.h>
-#if !defined(STARPU_HAVE_CXX11) || defined(STARPU_USE_NUMA)
+#if !defined(STARPU_HAVE_CXX11)
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	return 77;
 	return 77;
@@ -78,6 +78,12 @@ int main(int argc, char **argv)
 		return 77;
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
+	if (starpu_get_nb_numa_nodes() > 1)
+	{
+		starpu_shutdown();
+		return 77;
+	}
+
 	// StarPU data registering
 	// StarPU data registering
 	starpu_data_handle_t spu_vec_A;
 	starpu_data_handle_t spu_vec_A;
 	starpu_data_handle_t spu_vec_B;
 	starpu_data_handle_t spu_vec_B;

+ 1 - 2
include/starpu_config.h.in

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
- * Copyright (C) 2014  INRIA
+ * Copyright (C) 2014, 2017  INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -102,7 +102,6 @@
 #undef STARPU_HAVE_GLPK_H
 #undef STARPU_HAVE_GLPK_H
 
 
 #undef STARPU_HAVE_LIBNUMA
 #undef STARPU_HAVE_LIBNUMA
-#undef STARPU_USE_NUMA
 
 
 #undef STARPU_HAVE_WINDOWS
 #undef STARPU_HAVE_WINDOWS
 #undef STARPU_LINUX_SYS
 #undef STARPU_LINUX_SYS

+ 5 - 3
include/starpu_data.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
- * Copyright (C) 2016  Inria
+ * Copyright (C) 2016, 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -132,8 +132,10 @@ enum starpu_node_kind
 
 
 unsigned starpu_worker_get_memory_node(unsigned workerid);
 unsigned starpu_worker_get_memory_node(unsigned workerid);
 unsigned starpu_memory_nodes_get_count(void);
 unsigned starpu_memory_nodes_get_count(void);
-unsigned starpu_numaphysid_get_memory_node(unsigned numaphysid);
-unsigned starpu_memory_node_get_numaphysid(unsigned node);
+int starpu_get_nb_numa_nodes(void);
+int starpu_numa_hwloclogid_to_id(int logid);
+int starpu_numa_id_to_hwloclogid(unsigned id);
+
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
 
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);

+ 2 - 1
src/core/disk.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2013  Corentin Salingue
  * Copyright (C) 2013  Corentin Salingue
  * Copyright (C) 2015, 2016  CNRS
  * Copyright (C) 2015, 2016  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -63,7 +64,7 @@ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_s
 	unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0);
 	unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0);
 
 
         /* Connect the disk memory node to all numa memory nodes */
         /* Connect the disk memory node to all numa memory nodes */
-        int nb_numa_nodes = _starpu_get_nb_numa_nodes();
+        int nb_numa_nodes = starpu_get_nb_numa_nodes();
         int numa_node;
         int numa_node;
         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
         {
         {

+ 87 - 56
src/core/perfmodel/perfmodel_bus.c

@@ -175,14 +175,21 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
 
 	/* Allocate a buffer on the host */
 	/* Allocate a buffer on the host */
 	unsigned char *h_buffer;
 	unsigned char *h_buffer;
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
-#else
-	/* we use STARPU_MAIN_RAM */
-	_STARPU_MALLOC(h_buffer, size);
+	
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+	}
+	else
 #endif
 #endif
-	cudaHostRegister((void *)h_buffer, size, 0);
+	{
+		/* we use STARPU_MAIN_RAM */
+		_STARPU_MALLOC(h_buffer, size);
+		cudaHostRegister((void *)h_buffer, size, 0);
+	}
 
 
 	STARPU_ASSERT(cures == cudaSuccess);
 	STARPU_ASSERT(cures == cudaSuccess);
 
 
@@ -252,11 +259,18 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
 
 	/* Free buffers */
 	/* Free buffers */
 	cudaHostUnregister(h_buffer);
 	cudaHostUnregister(h_buffer);
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_free(hwtopology, h_buffer, size);
-#else
-	free(h_buffer);
+#if defined(STARPU_HAVE_HWLOC) 
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_free(hwtopology, h_buffer, size);
+	}
+	else
 #endif
 #endif
+	{
+		free(h_buffer);
+	}
+
 	cudaFree(d_buffer);
 	cudaFree(d_buffer);
 
 
 	cudaThreadExit();
 	cudaThreadExit();
@@ -421,13 +435,19 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
 	/* Allocate a buffer on the host */
 	/* Allocate a buffer on the host */
 	unsigned char *h_buffer;
 	unsigned char *h_buffer;
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
-#else
-	/* we use STARPU_MAIN_RAM */
-	_STARPU_MALLOC(h_buffer, size);
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0);
+	}
+	else
 #endif
 #endif
+	{
+		/* we use STARPU_MAIN_RAM */
+		_STARPU_MALLOC(h_buffer, size);
+	}
 
 
 	/* hack to avoid third party libs to rebind threads */
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
 	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
@@ -501,11 +521,17 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev,
 	err = clReleaseMemObject(d_buffer);
 	err = clReleaseMemObject(d_buffer);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
 		STARPU_OPENCL_REPORT_ERROR(err);
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	hwloc_free(hwtopology, h_buffer, size);
-#else
-	free(h_buffer);
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		hwloc_free(hwtopology, h_buffer, size);
+	}
+	else
 #endif
 #endif
+	{
+		free(h_buffer);
+	}
 
 
 	/* Uninitiliaze OpenCL context on the device */
 	/* Uninitiliaze OpenCL context on the device */
 	if (not_initialized == 1)
 	if (not_initialized == 1)
@@ -632,48 +658,53 @@ static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing *d
 
 
 static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
 static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
 {
 {
-#if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_NUMA)
-	double start, end, timing;
-	unsigned iter;
+#if defined(STARPU_HAVE_HWLOC)
+	if (nnumas > 1)
+	{
+		/* NUMA mode activated */
+		double start, end, timing;
+		unsigned iter;
 
 
-	unsigned char *h_buffer;	
-	hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
-	h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
+		unsigned char *h_buffer;	
+		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
+		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
 
 
-	unsigned char *d_buffer;	
-	hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
-	d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
+		unsigned char *d_buffer;	
+		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
+		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
 
 
-	memset(h_buffer, 0, SIZE);
+		memset(h_buffer, 0, SIZE);
 
 
-	start = starpu_timing_now();
-	for (iter = 0; iter < NITER; iter++)
-	{
-		memcpy(d_buffer, h_buffer, SIZE);
-	}
-	end = starpu_timing_now();
-	timing = end - start;
-	
-	numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
+		start = starpu_timing_now();
+		for (iter = 0; iter < NITER; iter++)
+		{
+			memcpy(d_buffer, h_buffer, SIZE);
+		}
+		end = starpu_timing_now();
+		timing = end - start;
 
 
-	start = starpu_timing_now();
-	for (iter = 0; iter < NITER; iter++)
-	{
-		memcpy(d_buffer, h_buffer, 1);
-	}
-	end = starpu_timing_now();
-	timing = end - start;
-	
-	numa_latency[numa_src][numa_dst] = timing/NITER;
+		numa_timing[numa_src][numa_dst] = timing/NITER/SIZE;
 
 
-	hwloc_free(hwtopology, h_buffer, SIZE);
-	hwloc_free(hwtopology, d_buffer, SIZE);
-#else
-	/* Cannot make a real calibration */
-	numa_timing[numa_src][numa_dst] = 0.01;
-	numa_latency[numa_src][numa_dst] = 0;
-#endif
+		start = starpu_timing_now();
+		for (iter = 0; iter < NITER; iter++)
+		{
+			memcpy(d_buffer, h_buffer, 1);
+		}
+		end = starpu_timing_now();
+		timing = end - start;
 
 
+		numa_latency[numa_src][numa_dst] = timing/NITER;
+
+		hwloc_free(hwtopology, h_buffer, SIZE);
+		hwloc_free(hwtopology, d_buffer, SIZE);
+	}
+	else
+#endif
+	{
+		/* Cannot make a real calibration */
+		numa_timing[numa_src][numa_dst] = 0.01;
+		numa_latency[numa_src][numa_dst] = 0;
+	}
 }
 }
 
 
 static void benchmark_all_gpu_devices(void)
 static void benchmark_all_gpu_devices(void)

+ 3 - 3
src/core/simgrid.c

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2017  Université de Bordeaux
  * Copyright (C) 2012-2017  Université de Bordeaux
- * Copyright (C) 2016  	    Inria
- * Copyright (C) 2016, 2017  	    CNRS
+ * Copyright (C) 2016, 2017  Inria
+ * Copyright (C) 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -1043,7 +1043,7 @@ void _starpu_simgrid_count_ngpus(void)
 			for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
 			for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
 			{
 			{
 				int numa;
 				int numa;
-				int nnumas = _starpu_get_nb_numa_nodes();
+				int nnumas = starpu_get_nb_numa_nodes();
 				int found = 0;
 				int found = 0;
 				for (numa = 0; numa < nnumas; numa++)
 				for (numa = 0; numa < nnumas; numa++)
 					if (starpu_bus_get_id(src2, numa) != -1)
 					if (starpu_bus_get_id(src2, numa) != -1)

+ 184 - 156
src/core/topology.c

@@ -65,9 +65,9 @@ static int nobind;
 /* For checking whether two workers share the same PU, indexed by PU number */
 /* For checking whether two workers share the same PU, indexed by PU number */
 static int cpu_worker[STARPU_MAXCPUS];
 static int cpu_worker[STARPU_MAXCPUS];
 static unsigned nb_numa_nodes = 0;
 static unsigned nb_numa_nodes = 0;
-static unsigned numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
+static int numa_memory_nodes[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
 static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
 static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
-static int _starpu_worker_numa_node(unsigned workerid);
+static int _starpu_get_numa_node_worker(unsigned workerid);
 
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 
 
@@ -96,12 +96,12 @@ static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
 struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
 struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
 #endif
 #endif
 
 
-int _starpu_get_nb_numa_nodes(void)
+int starpu_get_nb_numa_nodes(void)
 {
 {
 	return nb_numa_nodes;
 	return nb_numa_nodes;
 }
 }
 
 
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
+#if defined(STARPU_HAVE_HWLOC)
 static int numa_get_logical_id(hwloc_obj_t obj)
 static int numa_get_logical_id(hwloc_obj_t obj)
 {
 {
 	STARPU_ASSERT(obj);
 	STARPU_ASSERT(obj);
@@ -119,42 +119,52 @@ static int numa_get_logical_id(hwloc_obj_t obj)
 }
 }
 #endif
 #endif
 
 
-static int _starpu_worker_numa_node(unsigned workerid)
+static int _starpu_get_numa_node_worker(unsigned workerid)
 {
 {
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
-	struct _starpu_machine_topology *topology = &config->topology ;
-
-	hwloc_obj_t obj;
-	switch(worker->arch) 	
+#if defined(STARPU_HAVE_HWLOC)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
 	{
 	{
-		case STARPU_CPU_WORKER:
-			obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
-			break;
-		default:
-			STARPU_ABORT();
-	}
+		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+		struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
+		struct _starpu_machine_topology *topology = &config->topology ;
 
 
-	return numa_get_logical_id(obj);
-		
-#else 
-	(void) workerid; /* unused */
-	return -1;
+		hwloc_obj_t obj;
+		switch(worker->arch) 	
+		{
+			case STARPU_CPU_WORKER:
+				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
+				break;
+			default:
+				STARPU_ABORT();
+		}
+
+		return numa_get_logical_id(obj);
+	}
+	else		
 #endif 
 #endif 
+	{
+		(void) workerid; /* unused */
+		return -1;
+	}
 }
 }
 
 
 static int _starpu_numa_get_logical_id_from_pu(int pu)
 static int _starpu_numa_get_logical_id_from_pu(int pu)
 {
 {
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_machine_config *config = _starpu_get_machine_config();
-	struct _starpu_machine_topology *topology = &config->topology;
+#if defined(STARPU_HAVE_HWLOC)
+	if (nb_numa_nodes > 1)
+	{
+		struct _starpu_machine_config *config = _starpu_get_machine_config();
+		struct _starpu_machine_topology *topology = &config->topology;
 
 
-	hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
-	return numa_get_logical_id(obj);
-#else
-	return -1;
+		hwloc_obj_t obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, pu);
+		return numa_get_logical_id(obj);
+	}
+	else
 #endif
 #endif
+	{
+		return -1;
+	}
 }
 }
 
 
 
 
@@ -913,18 +923,27 @@ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config S
 #endif
 #endif
         _starpu_init_topology(config);
         _starpu_init_topology(config);
 
 
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-	struct _starpu_machine_topology *topology = &config->topology ;
-        int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
-	int res = nnumanodes > 0 ? nnumanodes : 1 ;
-#else /* STARPU_USE_NUMA */
-	int res = 1 ;
-#endif /* STARPU_USE_NUMA */
+	int res;
+#if defined(STARPU_HAVE_HWLOC)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
+	{
+		struct _starpu_machine_topology *topology = &config->topology ;
+		int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
+		res = nnumanodes > 0 ? nnumanodes : 1 ;
+	}
+	else
+#endif 
+	{	
+		res = 1;
+	}
+
 	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
 	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
 	return res;
 	return res;
 }
 }
 
 
-int _starpu_numa_logid_to_id(unsigned logid)
+//TODO change this in an array
+int starpu_numa_hwloclogid_to_id(int logid)
 {
 {
 	unsigned n;
 	unsigned n;
 	for (n = 0; n < nb_numa_nodes; n++)
 	for (n = 0; n < nb_numa_nodes; n++)
@@ -933,7 +952,7 @@ int _starpu_numa_logid_to_id(unsigned logid)
 	return -1;
 	return -1;
 }
 }
 
 
-unsigned _starpu_numa_id_to_logid(unsigned id)
+int starpu_numa_id_to_hwloclogid(unsigned id)
 {
 {
 	STARPU_ASSERT(id < STARPU_MAXNUMANODES);
 	STARPU_ASSERT(id < STARPU_MAXNUMANODES);
 	return numa_memory_nodes[id];
 	return numa_memory_nodes[id];
@@ -1888,10 +1907,10 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 		struct _starpu_worker *workerarg = &config->workers[worker];
 		struct _starpu_worker *workerarg = &config->workers[worker];
 		if (workerarg->arch == STARPU_CPU_WORKER)
 		if (workerarg->arch == STARPU_CPU_WORKER)
 		{
 		{
-			int numa_logical_id = _starpu_worker_numa_node(worker);
+			int numa_logical_id = _starpu_get_numa_node_worker(worker);
 
 
 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
-			int numa_starpu_id = _starpu_numa_logid_to_id(numa_logical_id);
+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(numa_logical_id);
 
 
 			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
 			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
 			{
 			{
@@ -1925,120 +1944,124 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
 	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
 #endif
 #endif
 
 
-
-#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
-	for (i = 0; i < config->topology.ncudagpus; i++)
+	char * state;
+	if ((state = starpu_getenv("STARPU_USE_NUMA")) && atoi(state))
 	{
 	{
-		hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
-		
-		/* Hwloc cannot recognize some devices */
-		if (!obj)
-			continue;
-			
-		while (obj->type != HWLOC_OBJ_NODE)
+		/* NUMA mode activated */
+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
+		for (i = 0; i < config->topology.ncudagpus; i++)
 		{
 		{
-			obj = obj->parent;
+			hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
 
 
-			/* If we don't find a "node" obj before the root, this means
-			 * hwloc does not know whether there are numa nodes or not, so
-			 * we should not use a per-node sampling in that case. */
+			/* Hwloc cannot recognize some devices */
 			if (!obj)
 			if (!obj)
 				continue;
 				continue;
-		}
-		int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
 
 
-		if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
-		{
-			_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
-			/* Don't create a new NUMA node */
-			numa_starpu_id = STARPU_MAIN_RAM;
-		}
+			while (obj->type != HWLOC_OBJ_NODE)
+			{
+				obj = obj->parent;
 
 
-		if (numa_starpu_id == -1)
-		{
-			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
-			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
-			numa_memory_nodes[memnode] = obj->logical_index;
-			nb_numa_nodes++;
+				/* If we don't find a "node" obj before the root, this means
+				 * hwloc does not know whether there are numa nodes or not, so
+				 * we should not use a per-node sampling in that case. */
+				if (!obj)
+					continue;
+			}
+			int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
+
+			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+			{
+				_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+				/* Don't create a new NUMA node */
+				numa_starpu_id = STARPU_MAIN_RAM;
+			}
+
+			if (numa_starpu_id == -1)
+			{
+				int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+				STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+				numa_memory_nodes[memnode] = obj->logical_index;
+				nb_numa_nodes++;
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-			snprintf(name, sizeof(name), "RAM%d", memnode);
-			host = _starpu_simgrid_get_host_by_name(name);
-			STARPU_ASSERT(host);
-			_starpu_simgrid_memory_node_set_host(memnode, host);
+				snprintf(name, sizeof(name), "RAM%d", memnode);
+				host = _starpu_simgrid_get_host_by_name(name);
+				STARPU_ASSERT(host);
+				_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
 #endif
-		}
-	}	
+			}
+		}	
 #endif
 #endif
 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
-	if (config->topology.nopenclgpus > 0)
-	{
-		cl_int err;
-		cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
-		cl_uint nb_platforms;
-		unsigned platform;
-		unsigned nb_opencl_devices = 0, num = 0;
-
-		err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
-		if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
-			nb_platforms=0;
-
-		cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
-		if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
-			device_type |= CL_DEVICE_TYPE_CPU;
-		if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
-			device_type = CL_DEVICE_TYPE_CPU;
-
-		for (platform = 0; platform < nb_platforms ; platform++)
+		if (config->topology.nopenclgpus > 0)
 		{
 		{
-			err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
-			if (err != CL_SUCCESS)
-				num = 0;
-			nb_opencl_devices += num;
-
-			for (i = 0; i < num; i++)
+			cl_int err;
+			cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
+			cl_uint nb_platforms;
+			unsigned platform;
+			unsigned nb_opencl_devices = 0, num = 0;
+
+			err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
+			if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
+				nb_platforms=0;
+
+			cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+			if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
+				device_type |= CL_DEVICE_TYPE_CPU;
+			if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
+				device_type = CL_DEVICE_TYPE_CPU;
+
+			for (platform = 0; platform < nb_platforms ; platform++)
 			{
 			{
-				hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
+				err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
+				if (err != CL_SUCCESS)
+					num = 0;
+				nb_opencl_devices += num;
 
 
-				/* Hwloc cannot recognize some devices */
-				if (!obj)
-					continue;
-
-				while (obj->type != HWLOC_OBJ_NODE)
+				for (i = 0; i < num; i++)
 				{
 				{
-					obj = obj->parent;
+					hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
 
 
-					/* If we don't find a "node" obj before the root, this means
-					 * hwloc does not know whether there are numa nodes or not, so
-					 * we should not use a per-node sampling in that case. */
+					/* Hwloc cannot recognize some devices */
 					if (!obj)
 					if (!obj)
 						continue;
 						continue;
-				}
-				int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
 
 
-				if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
-				{
-					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
-					/* Don't create a new NUMA node */
-					numa_starpu_id = STARPU_MAIN_RAM;
-				}
+					while (obj->type != HWLOC_OBJ_NODE)
+					{
+						obj = obj->parent;
 
 
-				if (numa_starpu_id == -1)
-				{
-					int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
-					STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
-					numa_memory_nodes[memnode] = obj->logical_index;
-					nb_numa_nodes++;
+						/* If we don't find a "node" obj before the root, this means
+						 * hwloc does not know whether there are numa nodes or not, so
+						 * we should not use a per-node sampling in that case. */
+						if (!obj)
+							continue;
+					}
+					int numa_starpu_id = starpu_numa_hwloclogid_to_id(obj->logical_index);
+
+					if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+					{
+						_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+						/* Don't create a new NUMA node */
+						numa_starpu_id = STARPU_MAIN_RAM;
+					}
+
+					if (numa_starpu_id == -1)
+					{
+						int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+						STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+						numa_memory_nodes[memnode] = obj->logical_index;
+						nb_numa_nodes++;
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-					snprintf(name, sizeof(name), "RAM%d", memnode);
-					host = _starpu_simgrid_get_host_by_name(name);
-					STARPU_ASSERT(host);
-					_starpu_simgrid_memory_node_set_host(memnode, host);
+						snprintf(name, sizeof(name), "RAM%d", memnode);
+						host = _starpu_simgrid_get_host_by_name(name);
+						STARPU_ASSERT(host);
+						_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
 #endif
-				}
-			}	
+					}
+				}	
+			}
 		}
 		}
-	}
 #endif
 #endif
+	}
 	
 	
 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
 	//Found NUMA nodes from CUDA nodes
 	//Found NUMA nodes from CUDA nodes
@@ -2059,38 +2082,43 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 	unsigned numa;
 	unsigned numa;
 	for (numa = 0; numa < nnuma; numa++)
 	for (numa = 0; numa < nnuma; numa++)
 	{
 	{
-#if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
-		hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
-		unsigned numa_logical_id = obj->logical_index;
+#if defined(STARPU_HAVE_HWLOC)
+		if (nnuma > 1)
+		{
+			hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
+			unsigned numa_logical_id = obj->logical_index;
 
 
-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
-		STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
+			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
 
 
-		numa_memory_nodes[memnode] = numa_logical_id;
-		nb_numa_nodes++;								
+			numa_memory_nodes[memnode] = numa_logical_id;
+			nb_numa_nodes++;								
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-		snprintf(name, sizeof(name), "RAM%d", memnode);
-		host = _starpu_simgrid_get_host_by_name(name);
-		STARPU_ASSERT(host);
-		_starpu_simgrid_memory_node_set_host(memnode, host);
+			snprintf(name, sizeof(name), "RAM%d", memnode);
+			host = _starpu_simgrid_get_host_by_name(name);
+			STARPU_ASSERT(host);
+			_starpu_simgrid_memory_node_set_host(memnode, host);
 #endif
 #endif
-#else /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
+		}
+		else
+#endif /* defined(STARPU_HAVE_HWLOC) */
+		{
 
 
-		/* In this case, nnuma has only one node */
-		int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
-		STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
+			/* In this case, nnuma has only one node */
+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
+			STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
 
 
-		numa_memory_nodes[memnode] = -1;
-		nb_numa_nodes++;								
+			numa_memory_nodes[memnode] = -1;
+			nb_numa_nodes++;								
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-		char name[16];
-		msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
-		STARPU_ASSERT(host);
-		_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
+			char name[16];
+			msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
+			STARPU_ASSERT(host);
+			_starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
 #endif
 #endif
+		}
 
 
-#endif /* defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC) */
 	}	
 	}	
 	
 	
 	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
 	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
@@ -2168,8 +2196,8 @@ _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config,
 		{
 		{
 			case STARPU_CPU_WORKER:
 			case STARPU_CPU_WORKER:
 			{
 			{
-				int numa_logical_id = _starpu_worker_numa_node(worker);
-				int numa_starpu_id =  _starpu_numa_logid_to_id(numa_logical_id);
+				int numa_logical_id = _starpu_get_numa_node_worker(worker);
+				int numa_starpu_id =  starpu_numa_hwloclogid_to_id(numa_logical_id);
 				if (numa_starpu_id >= STARPU_MAXNUMANODES)
 				if (numa_starpu_id >= STARPU_MAXNUMANODES)
 					numa_starpu_id = STARPU_MAIN_RAM;
 					numa_starpu_id = STARPU_MAIN_RAM;
 
 
@@ -2607,7 +2635,7 @@ starpu_topology_print (FILE *output)
 		fprintf(output, "------\tNUMA %u\t------\n", numa);
 		fprintf(output, "------\tNUMA %u\t------\n", numa);
 		for (pu = 0; pu < topology->nhwpus; pu++)
 		for (pu = 0; pu < topology->nhwpus; pu++)
 		{
 		{
-			if (_starpu_numa_id_to_logid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
+			if (starpu_numa_id_to_hwloclogid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
 			{
 			{
 				if ((pu % nthreads_per_core) == 0)
 				if ((pu % nthreads_per_core) == 0)
 					fprintf(output, "core %u", pu / nthreads_per_core);
 					fprintf(output, "core %u", pu / nthreads_per_core);

+ 3 - 2
src/core/topology.h

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2009-2010, 2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2009-2010, 2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2015, 2017  CNRS
  * Copyright (C) 2010, 2015, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -71,7 +72,7 @@ void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_AT
 
 
 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
 
 
-int _starpu_get_nb_numa_nodes(void);
-unsigned _starpu_numa_id_to_logid(unsigned id);
+int starpu_get_nb_numa_nodes(void);
+int starpu_numa_id_to_hwloclogid(unsigned id);
 	
 	
 #endif // __TOPOLOGY_H__
 #endif // __TOPOLOGY_H__

+ 1 - 1
src/core/workers.c

@@ -1598,7 +1598,7 @@ void starpu_shutdown(void)
 	_starpu_kill_all_workers(&_starpu_config);
 	_starpu_kill_all_workers(&_starpu_config);
 	
 	
 	unsigned i;
 	unsigned i;
-	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+	unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 	for (i=0; i<nb_numa_nodes; i++)
 	for (i=0; i<nb_numa_nodes; i++)
 	{
 	{
 		_starpu_free_all_automatically_allocated_buffers(i);
 		_starpu_free_all_automatically_allocated_buffers(i);

+ 2 - 2
src/datawizard/coherency.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2014  INRIA
+ * Copyright (C) 2014, 2017  INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -326,7 +326,7 @@ static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
 	double timing_best;
 	double timing_best;
 	int best_numa = -1;
 	int best_numa = -1;
 	unsigned numa;
 	unsigned numa;
-	const unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+	const unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 	for(numa = 0; numa < nb_numa_nodes; numa++)
 	for(numa = 0; numa < nb_numa_nodes; numa++)
 	{
 	{
 		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);
 		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);

+ 4 - 3
src/datawizard/malloc.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2009-2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2009-2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -305,10 +306,10 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 #endif
 #endif
 	}
 	}
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
-	if (_starpu_get_nb_numa_nodes() > 1) {
+	if (starpu_get_nb_numa_nodes() > 1) {
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
-		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_numa_id_to_logid(dst_node));
+		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, starpu_numa_id_to_hwloclogid(dst_node));
 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
@@ -494,7 +495,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 #endif
 #endif
 	}
 	}
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
-	else if (_starpu_get_nb_numa_nodes() > 1) {
+	else if (starpu_get_nb_numa_nodes() > 1) {
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		struct _starpu_machine_config *config = _starpu_get_machine_config();
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
 		hwloc_topology_t hwtopology = config->topology.hwtopology;
 		hwloc_free(hwtopology, A, dim);
 		hwloc_free(hwtopology, A, dim);

+ 4 - 4
src/datawizard/memalloc.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
- * Copyright (C) 2016  Inria
+ * Copyright (C) 2016, 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -1620,7 +1620,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
 			{
 			{
 				unsigned numa;
 				unsigned numa;
-				unsigned nnumas = _starpu_get_nb_numa_nodes();
+				unsigned nnumas = starpu_get_nb_numa_nodes();
 				for (numa = 0; numa < nnumas; numa++)
 				for (numa = 0; numa < nnumas; numa++)
 				{
 				{
 					/* TODO : check if starpu_transfer_predict(node, i,...) is the same */
 					/* TODO : check if starpu_transfer_predict(node, i,...) is the same */
@@ -1651,7 +1651,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM))
 		{
 		{
  	                unsigned i;
  	                unsigned i;
-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 			for (i=0; i<nb_numa_nodes; i++)
 			for (i=0; i<nb_numa_nodes; i++)
 			{
 			{
 				if (handle->per_node[i].allocated || 
 				if (handle->per_node[i].allocated || 
@@ -1683,7 +1683,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 		/* node != 0 */
 		/* node != 0 */
 		/* try to push data to RAM if we can before to push on disk*/
 		/* try to push data to RAM if we can before to push on disk*/
 			unsigned i;
 			unsigned i;
-			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+			unsigned nb_numa_nodes = starpu_get_nb_numa_nodes();
 			for (i=0; i<nb_numa_nodes; i++)
 			for (i=0; i<nb_numa_nodes; i++)
 			{
 			{
 				if (handle->per_node[i].allocated || 
 				if (handle->per_node[i].allocated || 

+ 19 - 14
src/drivers/cpu/driver_cpu.c

@@ -166,21 +166,26 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
 
 #if defined(STARPU_HAVE_HWLOC)
 #if defined(STARPU_HAVE_HWLOC)
 	struct _starpu_machine_topology *topology = &config->topology;
 	struct _starpu_machine_topology *topology = &config->topology;
-#ifdef STARPU_USE_NUMA
-        int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
-
-	if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
-	     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
-	else {
-	     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
-	     global_mem = obj->memory.local_memory;
-	     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
-	     limit = starpu_get_env_number(name);
+
+	int nnumas = starpu_get_nb_numa_nodes();
+	if (nnumas > 1)
+	{
+		int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
+
+		if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
+		     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
+		else {
+		     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
+		     global_mem = obj->memory.local_memory;
+		     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
+		     limit = starpu_get_env_number(name);
+		}
+	}
+	else
+	{
+		/* Do not limit ourself to a single NUMA node */
+		global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
 	}
 	}
-#else /* STARPU_USE_NUMA */
-	/* Do not limit ourself to a single NUMA node */
-	global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
-#endif /* STARPU_USE_NUMA */
 
 
 #else /* STARPU_HAVE_HWLOC */
 #else /* STARPU_HAVE_HWLOC */
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL

+ 8 - 8
tests/datawizard/nowhere.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2015-2016  Université de Bordeaux
  * Copyright (C) 2015-2016  Université de Bordeaux
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,13 +26,6 @@
  * Try the NOWHERE flag
  * Try the NOWHERE flag
  */
  */
 
 
-#ifdef STARPU_USE_NUMA
-int main(int argc, char **argv)
-{
-	/* FIXME: assumes only one RAM node */
-	return STARPU_TEST_SKIPPED;
-}
-#else
 static int x, y;
 static int x, y;
 
 
 static void prod(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
 static void prod(void *descr[], void *_args STARPU_ATTRIBUTE_UNUSED)
@@ -91,6 +85,13 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
+	if (starpu_get_nb_numa_nodes() > 1)
+	{
+		/* FIXME: assumes only one RAM node */
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
+
 	starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
 	starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
 	starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
 	starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
 
 
@@ -138,4 +139,3 @@ enodev:
 	starpu_shutdown();
 	starpu_shutdown();
 	return STARPU_TEST_SKIPPED;
 	return STARPU_TEST_SKIPPED;
 }
 }
-#endif