8 years ago · e7589c5fed
--- a/src/core/disk.c
+++ b/src/core/disk.c
@@ -67,9 +67,8 @@ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_s
 
				         int numa_node;
			
 
				         for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++)
			
 
				         {
			
 
				-                int numa_memnode = _starpu_numalogid_to_memnode(numa_node);
			
 
				-                _starpu_register_bus(disk_memnode, numa_memnode);
			
 
				-                _starpu_register_bus(numa_memnode, disk_memnode);
			
 
				+                _starpu_register_bus(disk_memnode, numa_node);
			
 
				+                _starpu_register_bus(numa_node, disk_memnode);
			
 
				         }
			
 
				 
			
 
				 	/* connect disk */
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -112,6 +112,10 @@ struct starpu_perfmodel_arch * _starpu_arch_comb_get(int comb);
 
				 
			
 
				 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb);
			
 
				 
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				+hwloc_topology_t _starpu_perfmodel_get_hwtopology();
			
 
				+#endif
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -837,13 +837,23 @@ _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
 
				 
			
 
				 unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+        _starpu_opencl_init();
			
 
				+#endif
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				+        _starpu_init_cuda();
			
 
				+#endif
			
 
				+        _starpu_init_topology(config);
			
 
				+
			
 
				 #if defined(STARPU_USE_NUMA) && defined(STARPU_HAVE_HWLOC)
			
 
				 	struct _starpu_machine_topology *topology = &config->topology ;
			
 
				         int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NODE) ;
			
 
				-	return nnumanodes > 0 ? nnumanodes : 1 ;
			
 
				+	int res = nnumanodes > 0 ? nnumanodes : 1 ;
			
 
				 #else /* STARPU_USE_NUMA */
			
 
				-	return 1 ;
			
 
				+	int res = 1 ;
			
 
				 #endif /* STARPU_USE_NUMA */
			
 
				+	STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
			
 
				+	return res;
			
 
				 }
			
 
				 
			
 
				 int _starpu_numa_logid_to_id(unsigned logid)
			
@@ -855,6 +865,11 @@ int _starpu_numa_logid_to_id(unsigned logid)
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				+unsigned _starpu_numa_id_to_logid(unsigned id)
			
 
				+{
			
 
				+	STARPU_ASSERT(id >= 0 && id < STARPU_MAXNUMANODES);
			
 
				+	return numa_memory_nodes[id];
			
 
				+}
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 void _starpu_topology_filter(hwloc_topology_t topology)
			
@@ -1809,7 +1824,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 			/* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
			
 
				 			int numa_starpu_id = _starpu_numa_logid_to_id(numa_logical_id);
			
 
				 
			
 
				-			if (numa_starpu_id == -1 && nb_numa_nodes == (STARPU_MAXNUMANODES-1))
			
 
				+			if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
			
 
				 			{
			
 
				 				_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				 				/* Don't create a new NUMA node */
			
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -71,8 +71,7 @@ void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_AT
 
				 
			
 
				 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
			
 
				 
			
 
				-int _starpu_numalogid_to_memnode(unsigned numalogid);
			
 
				-int _starpu_memnode_to_numalogid(unsigned memnode);
			
 
				 int _starpu_get_nb_numa_nodes(void);
			
 
				+unsigned _starpu_numa_id_to_logid(unsigned id);
			
 
				 	
			
 
				 #endif // __TOPOLOGY_H__
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1584,8 +1584,7 @@ void starpu_shutdown(void)
 
				 	unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				 	for (i=0; i<nb_numa_nodes; i++)
			
 
				 	{
			
 
				-		unsigned id = _starpu_numalogid_to_memnode(i);
			
 
				-		_starpu_free_all_automatically_allocated_buffers(id);
			
 
				+		_starpu_free_all_automatically_allocated_buffers(i);
			
 
				 	}
			
 
				 
			
 
				 	{
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -296,7 +296,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	if (_starpu_get_nb_numa_nodes() > 1) {
			
 
				 		hwloc_topology_t hwtopology = config->topology.hwtopology;
			
 
				-		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_memnode_to_numalogid(dst_node));
			
 
				+		hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, _starpu_numa_id_to_logid(dst_node));
			
 
				 		hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
			
 
				 		*A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
			
 
				 		//fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1650,11 +1650,10 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				 			for (i=0; i<nb_numa_nodes; i++)
			
 
				 			{
			
 
				-				unsigned id = _starpu_numalogid_to_memnode(i);
			
 
				-				if (handle->per_node[id].allocated || 
			
 
				-				    _starpu_memory_manager_test_allocate_size(id, size_handle) == 1)
			
 
				+				if (handle->per_node[i].allocated || 
			
 
				+				    _starpu_memory_manager_test_allocate_size(i, size_handle) == 1)
			
 
				 				{
			
 
				-					target = id;
			
 
				+					target = i;
			
 
				 					break;
			
 
				 				}
			
 
				 			}
			
@@ -1683,11 +1682,10 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 			unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
			
 
				 			for (i=0; i<nb_numa_nodes; i++)
			
 
				 			{
			
 
				-				unsigned id = _starpu_numalogid_to_memnode(i);
			
 
				-				if (handle->per_node[id].allocated || 
			
 
				-				    _starpu_memory_manager_test_allocate_size(id, size_handle) == 1)
			
 
				+				if (handle->per_node[i].allocated || 
			
 
				+				    _starpu_memory_manager_test_allocate_size(i, size_handle) == 1)
			
 
				 				{
			
 
				-					target = id;
			
 
				+					target = i;
			
 
				 					break;
			
 
				 				}
			
 
				 			}
			
--- a/src/drivers/mpi/driver_mpi_common.c
+++ b/src/drivers/mpi/driver_mpi_common.c
@@ -466,7 +466,7 @@ void _starpu_mpi_common_barrier(void)
 
				 /* Compute bandwidth and latency between source and sink nodes
			
 
				  * Source node has to have the entire set of times at the end
			
 
				  */
			
 
				-void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS])
			
 
				+void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS])
			
 
				 {
			
 
				         int ret;
			
 
				         unsigned iter;
			
@@ -502,7 +502,7 @@ void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_M
 
				                                         STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
			
 
				                                 }
			
 
				                                 end = starpu_timing_now();
			
 
				-                                bandwidth_dtod[sender][receiver] = (NITER*1000000)/(end - start);
			
 
				+                                timing_dtod[sender][receiver] = (end - start)/NITER/SIZE_BANDWIDTH;
			
 
				 
			
 
				                                 /* measure latency sender to receiver */
			
 
				                                 start = starpu_timing_now();
			
@@ -542,14 +542,14 @@ void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_M
 
				                 /* if we are the sender, we send the data */
			
 
				                 if (sender == id_proc)
			
 
				                 {
			
 
				-                        MPI_Send(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
			
 
				+                        MPI_Send(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
			
 
				                         MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
			
 
				                 }
			
 
				 
			
 
				                 /* the master node receives the data */
			
 
				                 if (src_node_id == id_proc)
			
 
				                 {
			
 
				-                        MPI_Recv(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			
 
				+                        MPI_Recv(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			
 
				                         MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			
 
				                 }