hace 9 años · 877d29d194
--- a/doc/doxygen/chapters/40environment_variables.doxy
+++ b/doc/doxygen/chapters/40environment_variables.doxy
@@ -134,13 +134,15 @@ CPUs. This is for instance useful when running the testsuite in parallel.
 
				 <dd>
			
 
				 \anchor STARPU_WORKERS_CPUID
			
 
				 \addindex __env__STARPU_WORKERS_CPUID
			
 
				-Passing an array of integers (starting from 0) in \ref STARPU_WORKERS_CPUID
			
 
				+Passing an array of integers in \ref STARPU_WORKERS_CPUID
			
 
				 specifies on which logical CPU the different workers should be
			
 
				 bound. For instance, if <c>STARPU_WORKERS_CPUID = "0 1 4 5"</c>, the first
			
 
				 worker will be bound to logical CPU #0, the second CPU worker will be bound to
			
 
				 logical CPU #1 and so on.  Note that the logical ordering of the CPUs is either
			
 
				 determined by the OS, or provided by the library <c>hwloc</c> in case it is
			
 
				-available.
			
 
				+available. Ranges can be provided: for instance, <c>STARPU_WORKERS_CPUID = "1:3
			
 
				+5"</c> will bind the first three workers on logical CPUs #1, #2, and #3, and the
			
 
				+fourth worker on logical CPU #5.
			
 
				 
			
 
				 Note that the first workers correspond to the CUDA workers, then come the
			
 
				 OpenCL workers, and finally the CPU workers. For example if
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -130,7 +130,7 @@ static hwloc_topology_t hwtopology;
 
				 static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int cpu, struct dev_timing *dev_timing_per_cpu)
			
 
				 {
			
 
				 	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 	size_t size = SIZE;
			
 
				 
			
 
				 	/* Initialize CUDA context on the device */
			
@@ -139,13 +139,13 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
				 	cudaSetDevice(dev);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	/* hack to force the initialization */
			
 
				 	cudaFree(0);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				         /* Get the maximum size which can be allocated on the device */
			
 
				 	struct cudaDeviceProp prop;
			
@@ -161,7 +161,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
				 	STARPU_ASSERT(cures == cudaSuccess);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	/* Allocate a buffer on the host */
			
 
				 	unsigned char *h_buffer;
			
@@ -169,14 +169,14 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
				 	STARPU_ASSERT(cures == cudaSuccess);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	/* Fill them */
			
 
				 	memset(h_buffer, 0, size);
			
 
				 	cudaMemset(d_buffer, 0, size);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	unsigned iter;
			
 
				 	double timing;
			
@@ -348,7 +348,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
				 	int not_initialized;
			
 
				 
			
 
				         struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	/* Is the context already initialised ? */
			
 
				         starpu_opencl_get_context(dev, &context);
			
@@ -380,7 +380,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
				 	}
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				 	/* Allocate a buffer on the device */
			
 
				 	cl_mem d_buffer;
			
@@ -388,21 +388,21 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
				 	if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				         /* Allocate a buffer on the host */
			
 
				 	unsigned char *h_buffer;
			
 
				         h_buffer = (unsigned char *)malloc(size);
			
 
				 	STARPU_ASSERT(h_buffer);
			
 
				 
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				         /* Fill them */
			
 
				 	memset(h_buffer, 0, size);
			
 
				         err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL);
			
 
				         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				         clFinish(queue);
			
 
				 	/* hack to avoid third party libs to rebind threads */
			
 
				-	_starpu_bind_thread_on_cpu(config, cpu);
			
 
				+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
			
 
				 
			
 
				         unsigned iter;
			
 
				 	double timing;
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -1896,7 +1896,7 @@ unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker)
 
				 
			
 
				 void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid)
			
 
				 {
			
 
				-	_starpu_bind_thread_on_cpu(_starpu_get_machine_config(), cpuid);
			
 
				+	_starpu_bind_thread_on_cpu(_starpu_get_machine_config(), cpuid, STARPU_NOWORKERID);
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -50,6 +50,9 @@
 
				 static unsigned topology_is_initialized = 0;
			
 
				 static int nobind;
			
 
				 
			
 
				+/* For checking whether two workers share the same PU, indexed by PU number */
			
 
				+static int cpu_worker[STARPU_MAXCPUS];
			
 
				+
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
			
 
				 
			
 
				 struct handle_entry
			
@@ -590,6 +593,22 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 
				 					topology->workers_bindid[i] =
			
 
				 						(unsigned)(val % topology->nhwpus);
			
 
				 					strval = endptr;
			
 
				+					if (*strval == ':')
			
 
				+					{
			
 
				+						/* range of values */
			
 
				+						long int endval;
			
 
				+						strval++;
			
 
				+						endval = strtol(strval, &endptr, 10);
			
 
				+						strval = endptr;
			
 
				+						for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
			
 
				+						{
			
 
				+							i++;
			
 
				+							topology->workers_bindid[i] =
			
 
				+								(unsigned)(val % topology->nhwpus);
			
 
				+						}
			
 
				+					}
			
 
				+					if (*strval == ',')
			
 
				+						strval++;
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
@@ -648,6 +667,9 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 
				 			i++;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	for (i = 0; i < STARPU_MAXCPUS;i++)
			
 
				+		cpu_worker[i] = STARPU_NOWORKERID;
			
 
				 }
			
 
				 
			
 
				 /* This function gets the identifier of the next core on which to bind a
			
@@ -1260,7 +1282,7 @@ void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
 
				 void
			
 
				 _starpu_bind_thread_on_cpu (
			
 
				 	struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
			
 
				-	int cpuid STARPU_ATTRIBUTE_UNUSED)
			
 
				+	int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	return;
			
@@ -1269,6 +1291,16 @@ _starpu_bind_thread_on_cpu (
 
				 		return;
			
 
				 	if (cpuid < 0)
			
 
				 		return;
			
 
				+
			
 
				+	if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
			
 
				+	{
			
 
				+		int previous = cpu_worker[cpuid];
			
 
				+		if (previous != STARPU_NOWORKERID && previous != workerid)
			
 
				+			_STARPU_DISP("Warning: both workers %d and %d are bound to the same PU %d, this will strongly degrade performance\n", previous, workerid, cpuid);
			
 
				+		else
			
 
				+			cpu_worker[cpuid] = workerid;
			
 
				+	}
			
 
				+
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	const struct hwloc_topology_support *support;
			
 
				 
			
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -41,10 +41,11 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config);
 
				 /* returns the number of logical cpus */
			
 
				 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
			
 
				 
			
 
				+#define STARPU_NOWORKERID -1
			
 
				 /* Bind the current thread on the CPU logically identified by "cpuid". The
			
 
				  * logical ordering of the processors is either that of hwloc (if available),
			
 
				  * or the ordering exposed by the OS. */
			
 
				-void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config, int cpuid);
			
 
				+void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config, int cpuid, int workerid);
			
 
				 
			
 
				 struct _starpu_combined_worker;
			
 
				 /* Bind the current thread on the set of CPUs for the given combined worker. */
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -600,7 +600,7 @@ void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsig
 
				 	_starpu_worker_start(worker, fut_key, sync);
			
 
				 #endif
			
 
				 
			
 
				-	_starpu_bind_thread_on_cpu(worker->config, worker->bindid);
			
 
				+	_starpu_bind_thread_on_cpu(worker->config, worker->bindid, worker->workerid);
			
 
				 
			
 
				         _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid);
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -112,7 +112,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 		}
			
 
				 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
			
 
				 			/* rebind to single CPU */
			
 
				-			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
			
 
				+			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid, cpu_args->workerid);
			
 
				 	}
			
 
				 
			
 
				 	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -605,7 +605,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 	}
			
 
				 
			
 
				 	/* one more time to avoid hacks from third party lib :) */
			
 
				-	_starpu_bind_thread_on_cpu(worker0->config, worker0->bindid);
			
 
				+	_starpu_bind_thread_on_cpu(worker0->config, worker0->bindid, worker0->workerid);
			
 
				 
			
 
				 	for (i = 0; i < worker_set->nworkers; i++)
			
 
				 	{
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2013  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -55,7 +55,7 @@ void *gordon_worker_progress(void *arg)
 
				 	struct _starpu_worker_set *gordon_set_arg = arg;
			
 
				 	unsigned prog_thread_bind_id =
			
 
				 		(gordon_set_arg->workers[0].bindid + 1)%(gordon_set_arg->config->nhwcores);
			
 
				-	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id);
			
 
				+	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id, gordon_set_arg->workers[0].workerid);
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
			
 
				 	progress_thread_is_inited = 1;
			
@@ -438,7 +438,7 @@ void *_starpu_gordon_worker(void *arg)
 
				 {
			
 
				 	struct _starpu_worker_set *gordon_set_arg = arg;
			
 
				 
			
 
				-	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid);
			
 
				+	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid, gordon_set_arg->workers[0].workerid);
			
 
				 
			
 
				 	/* TODO set_local_memory_node per SPU */
			
 
				 	gordon_init(gordon_set_arg->nworkers);
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -612,7 +612,7 @@ int _starpu_opencl_driver_init(struct _starpu_worker *worker)
 
				 	_starpu_opencl_init_context(devid);
			
 
				 
			
 
				 	/* one more time to avoid hacks from third party lib :) */
			
 
				-	_starpu_bind_thread_on_cpu(worker->config, worker->bindid);
			
 
				+	_starpu_bind_thread_on_cpu(worker->config, worker->bindid, worker->workerid);
			
 
				 
			
 
				 	_starpu_opencl_limit_gpu_mem_if_needed(devid);
			
 
				 	_starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_opencl_get_global_mem_size(devid));