15 years ago · f86959ff77
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -256,8 +256,15 @@ static int find_numa_node(hwloc_obj_t obj)
 
																 	hwloc_obj_t current = obj;
															
 
																 	while (current->depth != HWLOC_OBJ_NODE)
															
 
																+	{
															
 
																 		current = current->parent;
															
 
																+		/* If we don't find a "node" obj before the root, this means
															
 
																+		 * hwloc does not know whether there are numa nodes or not, so
															
 
																+		 * we should not use a per-node sampling in that case. */
															
 
																+		STARPU_ASSERT(current);
															
 
																+	}
															
 
																+
															
 
																 	STARPU_ASSERT(current->depth == HWLOC_OBJ_NODE);
															
 
																 	return current->logical_index; 
															
@@ -272,6 +279,10 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 	int cpu_depth = hwloc_get_type_depth(hwtopology, HWLOC_OBJ_CORE);
															
 
																 	int nnuma_nodes = hwloc_get_nbobjs_by_depth(hwtopology, HWLOC_OBJ_NODE);
															
 
																+
															
 
																+	/* If no NUMA node was found, we assume that we have a single memory
															
 
																+	 * bank. */
															
 
																+	const unsigned no_node_obj_was_found = (nnuma_nodes == 0);
															
 
																 	unsigned is_available_per_numa_node[nnuma_nodes];
															
 
																 	double dev_timing_htod_per_numa_node[nnuma_nodes];
															
@@ -286,18 +297,21 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 		dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].cpu_id = cpu;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-		hwloc_obj_t obj = hwloc_get_obj_by_depth(hwtopology, cpu_depth, cpu);
															
 
																-
															
 
																-		int numa_id = find_numa_node(obj);
															
 
																-
															
 
																-		if (is_available_per_numa_node[numa_id])
															
 
																+		if (!no_node_obj_was_found)
															
 
																 		{
															
 
																-			/* We reuse the previous numbers for that NUMA node */
															
 
																-			dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod =
															
 
																-				dev_timing_htod_per_numa_node[numa_id];
															
 
																-			dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh =
															
 
																-				dev_timing_dtoh_per_numa_node[numa_id];
															
 
																-			continue;
															
 
																+			hwloc_obj_t obj = hwloc_get_obj_by_depth(hwtopology, cpu_depth, cpu);
															
 
																+	
															
 
																+			int numa_id = find_numa_node(obj);
															
 
																+	
															
 
																+			if (is_available_per_numa_node[numa_id])
															
 
																+			{
															
 
																+				/* We reuse the previous numbers for that NUMA node */
															
 
																+				dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod =
															
 
																+					dev_timing_htod_per_numa_node[numa_id];
															
 
																+				dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh =
															
 
																+					dev_timing_dtoh_per_numa_node[numa_id];
															
 
																+				continue;
															
 
																+			}
															
 
																 		}
															
 
																 #endif
															
@@ -311,7 +325,7 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 #endif
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-		if (!is_available_per_numa_node[numa_id])
															
 
																+		if (!no_node_obj_was_found && !is_available_per_numa_node[numa_id])
															
 
																 		{
															
 
																 			/* Save the results for that NUMA node */
															
 
																 			dev_timing_htod_per_numa_node[numa_id] =