9 years ago · 877d29d194
--- a/doc/doxygen/chapters/40environment_variables.doxy
+++ b/doc/doxygen/chapters/40environment_variables.doxy
@@ -134,13 +134,15 @@ CPUs. This is for instance useful when running the testsuite in parallel.
 
																 <dd>
															
 
																 \anchor STARPU_WORKERS_CPUID
															
 
																 \addindex __env__STARPU_WORKERS_CPUID
															
 
																-Passing an array of integers (starting from 0) in \ref STARPU_WORKERS_CPUID
															
 
																+Passing an array of integers in \ref STARPU_WORKERS_CPUID
															
 
																 specifies on which logical CPU the different workers should be
															
 
																 bound. For instance, if <c>STARPU_WORKERS_CPUID = "0 1 4 5"</c>, the first
															
 
																 worker will be bound to logical CPU #0, the second CPU worker will be bound to
															
 
																 logical CPU #1 and so on.  Note that the logical ordering of the CPUs is either
															
 
																 determined by the OS, or provided by the library <c>hwloc</c> in case it is
															
 
																-available.
															
 
																+available. Ranges can be provided: for instance, <c>STARPU_WORKERS_CPUID = "1:3
															
 
																+5"</c> will bind the first three workers on logical CPUs #1, #2, and #3, and the
															
 
																+fourth worker on logical CPU #5.
															
 
																 Note that the first workers correspond to the CUDA workers, then come the
															
 
																 OpenCL workers, and finally the CPU workers. For example if
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -130,7 +130,7 @@ static hwloc_topology_t hwtopology;
 
																 static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int cpu, struct dev_timing *dev_timing_per_cpu)
															
 
																 {
															
 
																 	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	size_t size = SIZE;
															
 
																 	/* Initialize CUDA context on the device */
															
@@ -139,13 +139,13 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	cudaSetDevice(dev);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* hack to force the initialization */
															
 
																 	cudaFree(0);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																         /* Get the maximum size which can be allocated on the device */
															
 
																 	struct cudaDeviceProp prop;
															
@@ -161,7 +161,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	STARPU_ASSERT(cures == cudaSuccess);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
@@ -169,14 +169,14 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	STARPU_ASSERT(cures == cudaSuccess);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* Fill them */
															
 
																 	memset(h_buffer, 0, size);
															
 
																 	cudaMemset(d_buffer, 0, size);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	unsigned iter;
															
 
																 	double timing;
															
@@ -348,7 +348,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																 	int not_initialized;
															
 
																         struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* Is the context already initialised ? */
															
 
																         starpu_opencl_get_context(dev, &context);
															
@@ -380,7 +380,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																 	}
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																 	/* Allocate a buffer on the device */
															
 
																 	cl_mem d_buffer;
															
@@ -388,21 +388,21 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																 	if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																         /* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
 
																         h_buffer = (unsigned char *)malloc(size);
															
 
																 	STARPU_ASSERT(h_buffer);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																         /* Fill them */
															
 
																 	memset(h_buffer, 0, size);
															
 
																         err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL);
															
 
																         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																         clFinish(queue);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																-	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	_starpu_bind_thread_on_cpu(config, cpu, STARPU_NOWORKERID);
															
 
																         unsigned iter;
															
 
																 	double timing;
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -1896,7 +1896,7 @@ unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker)
 
																 void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid)
															
 
																 {
															
 
																-	_starpu_bind_thread_on_cpu(_starpu_get_machine_config(), cpuid);
															
 
																+	_starpu_bind_thread_on_cpu(_starpu_get_machine_config(), cpuid, STARPU_NOWORKERID);
															
 
																 }
															
 
																 unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id)
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -50,6 +50,9 @@
 
																 static unsigned topology_is_initialized = 0;
															
 
																 static int nobind;
															
 
																+/* For checking whether two workers share the same PU, indexed by PU number */
															
 
																+static int cpu_worker[STARPU_MAXCPUS];
															
 
																+
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
															
 
																 struct handle_entry
															
@@ -590,6 +593,22 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 
																 					topology->workers_bindid[i] =
															
 
																 						(unsigned)(val % topology->nhwpus);
															
 
																 					strval = endptr;
															
 
																+					if (*strval == ':')
															
 
																+					{
															
 
																+						/* range of values */
															
 
																+						long int endval;
															
 
																+						strval++;
															
 
																+						endval = strtol(strval, &endptr, 10);
															
 
																+						strval = endptr;
															
 
																+						for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
															
 
																+						{
															
 
																+							i++;
															
 
																+							topology->workers_bindid[i] =
															
 
																+								(unsigned)(val % topology->nhwpus);
															
 
																+						}
															
 
																+					}
															
 
																+					if (*strval == ',')
															
 
																+						strval++;
															
 
																 				}
															
 
																 				else
															
 
																 				{
															
@@ -648,6 +667,9 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 
																 			i++;
															
 
																 		}
															
 
																 	}
															
 
																+
															
 
																+	for (i = 0; i < STARPU_MAXCPUS;i++)
															
 
																+		cpu_worker[i] = STARPU_NOWORKERID;
															
 
																 }
															
 
																 /* This function gets the identifier of the next core on which to bind a
															
@@ -1260,7 +1282,7 @@ void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
 
																 void
															
 
																 _starpu_bind_thread_on_cpu (
															
 
																 	struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
															
 
																-	int cpuid STARPU_ATTRIBUTE_UNUSED)
															
 
																+	int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
															
 
																 {
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 	return;
															
@@ -1269,6 +1291,16 @@ _starpu_bind_thread_on_cpu (
 
																 		return;
															
 
																 	if (cpuid < 0)
															
 
																 		return;
															
 
																+
															
 
																+	if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
															
 
																+	{
															
 
																+		int previous = cpu_worker[cpuid];
															
 
																+		if (previous != STARPU_NOWORKERID && previous != workerid)
															
 
																+			_STARPU_DISP("Warning: both workers %d and %d are bound to the same PU %d, this will strongly degrade performance\n", previous, workerid, cpuid);
															
 
																+		else
															
 
																+			cpu_worker[cpuid] = workerid;
															
 
																+	}
															
 
																+
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 	const struct hwloc_topology_support *support;
															
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -41,10 +41,11 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config);
 
																 /* returns the number of logical cpus */
															
 
																 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
															
 
																+#define STARPU_NOWORKERID -1
															
 
																 /* Bind the current thread on the CPU logically identified by "cpuid". The
															
 
																  * logical ordering of the processors is either that of hwloc (if available),
															
 
																  * or the ordering exposed by the OS. */
															
 
																-void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config, int cpuid);
															
 
																+void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config, int cpuid, int workerid);
															
 
																 struct _starpu_combined_worker;
															
 
																 /* Bind the current thread on the set of CPUs for the given combined worker. */
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -600,7 +600,7 @@ void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsig
 
																 	_starpu_worker_start(worker, fut_key, sync);
															
 
																 #endif
															
 
																-	_starpu_bind_thread_on_cpu(worker->config, worker->bindid);
															
 
																+	_starpu_bind_thread_on_cpu(worker->config, worker->bindid, worker->workerid);
															
 
																         _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid);
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -112,7 +112,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
																 		}
															
 
																 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
															
 
																 			/* rebind to single CPU */
															
 
																-			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
															
 
																+			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid, cpu_args->workerid);
															
 
																 	}
															
 
																 	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
															
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -605,7 +605,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
																 	}
															
 
																 	/* one more time to avoid hacks from third party lib :) */
															
 
																-	_starpu_bind_thread_on_cpu(worker0->config, worker0->bindid);
															
 
																+	_starpu_bind_thread_on_cpu(worker0->config, worker0->bindid, worker0->workerid);
															
 
																 	for (i = 0; i < worker_set->nworkers; i++)
															
 
																 	{
															
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2014  Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2015  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2013  CNRS
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
@@ -55,7 +55,7 @@ void *gordon_worker_progress(void *arg)
 
																 	struct _starpu_worker_set *gordon_set_arg = arg;
															
 
																 	unsigned prog_thread_bind_id =
															
 
																 		(gordon_set_arg->workers[0].bindid + 1)%(gordon_set_arg->config->nhwcores);
															
 
																-	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id);
															
 
																+	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id, gordon_set_arg->workers[0].workerid);
															
 
																 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
															
 
																 	progress_thread_is_inited = 1;
															
@@ -438,7 +438,7 @@ void *_starpu_gordon_worker(void *arg)
 
																 {
															
 
																 	struct _starpu_worker_set *gordon_set_arg = arg;
															
 
																-	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid);
															
 
																+	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid, gordon_set_arg->workers[0].workerid);
															
 
																 	/* TODO set_local_memory_node per SPU */
															
 
																 	gordon_init(gordon_set_arg->nworkers);
															
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -612,7 +612,7 @@ int _starpu_opencl_driver_init(struct _starpu_worker *worker)
 
																 	_starpu_opencl_init_context(devid);
															
 
																 	/* one more time to avoid hacks from third party lib :) */
															
 
																-	_starpu_bind_thread_on_cpu(worker->config, worker->bindid);
															
 
																+	_starpu_bind_thread_on_cpu(worker->config, worker->bindid, worker->workerid);
															
 
																 	_starpu_opencl_limit_gpu_mem_if_needed(devid);
															
 
																 	_starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_opencl_get_global_mem_size(devid));