8 anni fa · 62fb4c8538
--- a/doc/doxygen/chapters/api/workers.doxy
+++ b/doc/doxygen/chapters/api/workers.doxy
@@ -252,12 +252,12 @@ on which device the memory needs to be allocated.
 
				 
			
 
				 \fn int starpu_memory_nodes_numa_id_to_devid(int osid)
			
 
				 \ingroup API_Workers_Properties
			
 
				-This function returns the identifier of the memory node associated to the NUMA
			
 
				+Return the identifier of the memory node associated to the NUMA
			
 
				 node identified by \p osid by the Operating System.
			
 
				 
			
 
				 \fn int starpu_memory_nodes_numa_devid_to_id(unsigned id);
			
 
				 \ingroup API_Workers_Properties
			
 
				-This function returns the Operating System identifier of the memory node
			
 
				+Return the Operating System identifier of the memory node
			
 
				 whose StarPU identifier is \p id.
			
 
				 
			
 
				 \fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
			
@@ -287,7 +287,7 @@ Must be called after a potentially blocking call is complete, to
 
				 restore the relax state in place before the corresponding relax_on.
			
 
				 Decreases \c state_relax_refcnt. Calls to \ref starpu_worker_relax_on
			
 
				 and \c starpu_worker_relax_off must be well parenthesized. This
			
 
				-function is automatically called by \ref starpu_worker_unlock after the 
			
 
				+function is automatically called by \ref starpu_worker_unlock after the
			
 
				 target worker has been unlocked.
			
 
				 
			
 
				 \fn int starpu_worker_get_relax_state(void)
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -184,7 +184,7 @@ void _starpu_wait_job(struct _starpu_job *j)
 
				 	{
			
 
				 		STARPU_PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				         _STARPU_LOG_OUT();
			
 
				 }
			
@@ -368,7 +368,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	 * to tell them that we will not exist any more before notifying the
			
 
				 	 * tasks waiting for us
			
 
				 	 *
			
 
				-	 * For continuations, implicit dependency handles are only released 
			
 
				+	 * For continuations, implicit dependency handles are only released
			
 
				 	 * when the task fully completes */
			
 
				 	if (j->implicit_dep_handle && !continuation)
			
 
				 	{
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -177,7 +177,7 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
				 
			
 
				 	/* Allocate a buffer on the host */
			
 
				 	unsigned char *h_buffer;
			
 
				-	
			
 
				+
			
 
				 #if defined(STARPU_HAVE_HWLOC)
			
 
				 	if (nnuma_nodes > 1)
			
 
				 	{
			
@@ -261,7 +261,7 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 
				 
			
 
				 	/* Free buffers */
			
 
				 	cudaHostUnregister(h_buffer);
			
 
				-#if defined(STARPU_HAVE_HWLOC) 
			
 
				+#if defined(STARPU_HAVE_HWLOC)
			
 
				 	if (nnuma_nodes > 1)
			
 
				 	{
			
 
				 		/* NUMA mode activated */
			
@@ -669,11 +669,11 @@ static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst)
 
				 		double start, end, timing;
			
 
				 		unsigned iter;
			
 
				 
			
 
				-		unsigned char *h_buffer;	
			
 
				+		unsigned char *h_buffer;
			
 
				 		hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_src);
			
 
				 		h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				 
			
 
				-		unsigned char *d_buffer;	
			
 
				+		unsigned char *d_buffer;
			
 
				 		hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_dst);
			
 
				 		d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0);
			
 
				 
			
@@ -1038,7 +1038,7 @@ static int check_bus_affinity_file(void)
 
				 
			
 
				 	ret = fscanf(f, "# GPU\t");
			
 
				 	STARPU_ASSERT(ret == 0);
			
 
				-	
			
 
				+
			
 
				 	ret = fscanf(f, "NUMA%u\t", &dummy);
			
 
				 
			
 
				 	if (locked)
			
@@ -1255,7 +1255,7 @@ static double search_bus_best_latency(int src, char * type, int htod)
 
				 		}
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-		if (strncmp(type, "OpenCL", 6) == 0)		
			
 
				+		if (strncmp(type, "OpenCL", 6) == 0)
			
 
				 		{
			
 
				 			if (htod)
			
 
				 				actual = opencldev_timing_per_numa[src*STARPU_MAXNUMANODES+numa].latency_htod;
			
@@ -1338,7 +1338,7 @@ static void write_bus_latency_file_content(void)
 
				 			{
			
 
				 				b_low = b_up = 0;
			
 
				 
			
 
				-				/* ---- Begin NUMA ---- */				
			
 
				+				/* ---- Begin NUMA ---- */
			
 
				 				b_up += nnumas;
			
 
				 
			
 
				 				if (src >= b_low && src < b_up && dst >= b_low && dst < b_up)
			
@@ -1347,7 +1347,7 @@ static void write_bus_latency_file_content(void)
 
				 				/* copy interval to check numa index later */
			
 
				 				unsigned numa_low = b_low;
			
 
				 				unsigned numa_up = b_up;
			
 
				-				
			
 
				+
			
 
				 				b_low += nnumas;
			
 
				 				/* ---- End NUMA ---- */
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -1390,7 +1390,7 @@ static void write_bus_latency_file_content(void)
 
				 				b_up += nmic;
			
 
				 				/* TODO Latency MIC */
			
 
				 				b_low += nmic;
			
 
				-#endif                                
			
 
				+#endif
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 				b_up += nmpi_ms;
			
 
				 				/* Modify MPI src and MPI dst if they contain the master node or not
			
@@ -1577,9 +1577,9 @@ static double search_bus_best_timing(int src, char * type, int htod)
 
				 {
			
 
				         /* Search the best latency for this node */
			
 
				         double best = 0.0;
			
 
				-        double actual = 0.0; 
			
 
				+        double actual = 0.0;
			
 
				         unsigned check = 0;
			
 
				-        unsigned numa; 
			
 
				+        unsigned numa;
			
 
				         for (numa = 0; numa < nnumas; numa++)
			
 
				         {
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -1592,7 +1592,7 @@ static double search_bus_best_timing(int src, char * type, int htod)
 
				 		}
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-		if (strncmp(type, "OpenCL", 6) == 0)		
			
 
				+		if (strncmp(type, "OpenCL", 6) == 0)
			
 
				 		{
			
 
				 			if (htod)
			
 
				 				actual = opencldev_timing_per_numa[src*STARPU_MAXNUMANODES+numa].timing_htod;
			
@@ -2852,7 +2852,7 @@ flat_cuda:
 
				 	if (locked)
			
 
				 		_starpu_fwrunlock(f);
			
 
				 	fclose(f);
			
 
				-	
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static void generate_bus_platform_file(void)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -150,7 +150,7 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid)
 
				 		struct _starpu_machine_topology *topology = &config->topology ;
			
 
				 
			
 
				 		hwloc_obj_t obj;
			
 
				-		switch(worker->arch) 	
			
 
				+		switch(worker->arch)
			
 
				 		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
			
@@ -161,8 +161,8 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid)
 
				 
			
 
				 		return numa_get_logical_id(obj);
			
 
				 	}
			
 
				-	else		
			
 
				-#endif 
			
 
				+	else
			
 
				+#endif
			
 
				 	{
			
 
				 		(void) workerid; /* unused */
			
 
				 		return STARPU_NUMA_MAIN_RAM;
			
@@ -180,7 +180,7 @@ static int _starpu_get_physical_numa_node_worker(unsigned workerid)
 
				 		struct _starpu_machine_topology *topology = &config->topology ;
			
 
				 
			
 
				 		hwloc_obj_t obj;
			
 
				-		switch(worker->arch) 	
			
 
				+		switch(worker->arch)
			
 
				 		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
			
@@ -191,8 +191,8 @@ static int _starpu_get_physical_numa_node_worker(unsigned workerid)
 
				 
			
 
				 		return numa_get_physical_id(obj);
			
 
				 	}
			
 
				-	else		
			
 
				-#endif 
			
 
				+	else
			
 
				+#endif
			
 
				 	{
			
 
				 		(void) workerid; /* unused */
			
 
				 		return STARPU_NUMA_MAIN_RAM;
			
@@ -997,8 +997,8 @@ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config S
 
				 		res = nnumanodes > 0 ? nnumanodes : 1 ;
			
 
				 	}
			
 
				 	else
			
 
				-#endif 
			
 
				-	{	
			
 
				+#endif
			
 
				+	{
			
 
				 		res = 1;
			
 
				 	}
			
 
				 
			
@@ -1701,7 +1701,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 
				 					(nworker_per_cuda * topology->ncudagpus) :
			
 
				 					topology->ncudagpus;
			
 
				 #endif
			
 
				-			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus 
			
 
				+			unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
			
 
				 				+ cuda_busy_cpus
			
 
				 				+ topology->nopenclgpus + topology->nsccdevices;
			
 
				 
			
@@ -2074,7 +2074,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 				_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				 #endif
			
 
				 			}
			
 
				-		}	
			
 
				+		}
			
 
				 #endif
			
 
				 #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
			
 
				 		if (config->topology.nopenclgpus > 0)
			
@@ -2086,7 +2086,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 			unsigned nb_opencl_devices = 0, num = 0;
			
 
				 
			
 
				 			err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
			
 
				-			if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
			
 
				+			if (STARPU_UNLIKELY(err != CL_SUCCESS))
			
 
				 				nb_platforms=0;
			
 
				 
			
 
				 			cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
			
@@ -2134,7 +2134,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 						int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
			
 
				 						STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
			
 
				 						numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index;
			
 
				-						numa_memory_nodes_to_physicalid[memnode] = obj->os_index;	
			
 
				+						numa_memory_nodes_to_physicalid[memnode] = obj->os_index;
			
 
				 						nb_numa_nodes++;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 						snprintf(name, sizeof(name), "RAM%d", memnode);
			
@@ -2143,12 +2143,12 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 						_starpu_simgrid_memory_node_set_host(memnode, host);
			
 
				 #endif
			
 
				 					}
			
 
				-				}	
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 #endif
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
			
 
				 	//Found NUMA nodes from CUDA nodes
			
 
				 	if (nb_numa_nodes != 0)
			
@@ -2164,7 +2164,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 	if (nnuma > STARPU_MAXNUMANODES)
			
 
				 	{
			
 
				 		_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
			
 
				-		nnuma = STARPU_MAXNUMANODES;		
			
 
				+		nnuma = STARPU_MAXNUMANODES;
			
 
				 	}
			
 
				 
			
 
				 	unsigned numa;
			
@@ -2182,7 +2182,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 
			
 
				 			numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id;
			
 
				 			numa_memory_nodes_to_physicalid[memnode] = numa_physical_id;
			
 
				-			nb_numa_nodes++;								
			
 
				+			nb_numa_nodes++;
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 			snprintf(name, sizeof(name), "RAM%d", memnode);
			
@@ -2201,7 +2201,7 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 
			
 
				 			numa_memory_nodes_to_hwloclogid[memnode] = STARPU_NUMA_MAIN_RAM;
			
 
				 			numa_memory_nodes_to_physicalid[memnode] = STARPU_NUMA_MAIN_RAM;
			
 
				-			nb_numa_nodes++;								
			
 
				+			nb_numa_nodes++;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 			char name[16];
			
 
				 			msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
			
@@ -2210,9 +2210,9 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 
				 #endif
			
 
				 		}
			
 
				 
			
 
				-	}	
			
 
				-	
			
 
				-	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");	
			
 
				+	}
			
 
				+
			
 
				+	STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");
			
 
				 }
			
 
				 
			
 
				 static void _starpu_init_numa_bus()
			
@@ -2524,9 +2524,9 @@ _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config,
 
				 					mpi_init[devid] = 1;
			
 
				 					mpi_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
			
 
				 					memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid);
			
 
				-		
			
 
				+
			
 
				 					for (numa = 0; numa < nb_numa_nodes; numa++)
			
 
				-					{	
			
 
				+					{
			
 
				 						_starpu_register_bus(numa, memory_node);
			
 
				 						_starpu_register_bus(memory_node, numa);
			
 
				 					}
			
@@ -2780,4 +2780,3 @@ starpu_topology_print (FILE *output)
 
				 		fprintf(output, "\n");
			
 
				 	}
			
 
				 }
			
 
				-
			
--- a/src/datawizard/datawizard.h
+++ b/src/datawizard/datawizard.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014, 2017  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -166,12 +166,15 @@ static size_t _starpu_cpu_get_global_mem_size(int nodeid STARPU_ATTRIBUTE_UNUSED
 
				 		int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NODE);
			
 
				 
			
 
				 		if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
			
 
				-		     global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				-		else {
			
 
				-		     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
			
 
				-		     global_mem = obj->memory.local_memory;
			
 
				-		     sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
			
 
				-		     limit = starpu_get_env_number(name);
			
 
				+		{
			
 
				+			global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
			
 
				+			global_mem = obj->memory.local_memory;
			
 
				+			sprintf(name, "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
			
 
				+			limit = starpu_get_env_number(name);
			
 
				 		}
			
 
				 	}
			
 
				 	else
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -163,7 +163,7 @@ cudaStream_t starpu_cuda_get_local_in_transfer_stream()
 
				 	int worker = starpu_worker_get_id_check();
			
 
				 	int devid = starpu_worker_get_devid(worker);
			
 
				 	cudaStream_t stream;
			
 
				-	
			
 
				+
			
 
				 	stream = in_transfer_streams[devid];
			
 
				 	STARPU_ASSERT(stream);
			
 
				 	return stream;
			
@@ -251,11 +251,13 @@ void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED)
 
				 	}
			
 
				 #else
			
 
				 	for (i = 0; i < conf->n_cuda_opengl_interoperability; i++)
			
 
				+	{
			
 
				 		if (conf->cuda_opengl_interoperability[i] == devid)
			
 
				 		{
			
 
				 			cures = cudaGLSetGLDevice(devid);
			
 
				 			goto done;
			
 
				 		}
			
 
				+	}
			
 
				 #endif
			
 
				 
			
 
				 	cures = cudaSetDevice(devid);