浏览代码

Add a configuration parameter to enable only one combined worker. Use it to provide an OpenMP example which should work with most OpenMP implementations.

Samuel Thibault 14 年之前
父节点
当前提交
2f21c1e693
共有 6 个文件被更改,包括 129 次插入7 次删除
  1. 8 0
      doc/starpu.texi
  2. 1 0
      examples/Makefile.am
  3. 105 0
      examples/openmp/vector_scal.c
  4. 4 1
      include/starpu.h
  5. 2 0
      src/core/workers.c
  6. 9 6
      src/sched_policies/detect_combined_workers.c

+ 8 - 0
doc/starpu.texi

@@ -3200,6 +3200,14 @@ default value is overwritten by the @code{STARPU_CALIBRATE} environment
 variable when it is set.
 @end table
 
+@item @code{single_combined_worker} (default = 0):
+By default, StarPU creates various combined workers according to the machine
+structure. Some parallel libraries (e.g. most OpenMP implementations) however do
+not support concurrent calls to parallel code. In such case, setting this flag
+makes StarPU only create one combined worker, containing all
+the CPU workers. The default value is overwritten by the
+@code{STARPU_SINGLE_COMBINED_WORKER} environment variable when it is set.
+
 @end table
 
 

+ 1 - 0
examples/Makefile.am

@@ -50,6 +50,7 @@ EXTRA_DIST = 					\
 	basic_examples/variable_kernels_opencl_kernel.cl	\
 	matvecmult/matvecmult_kernel.cl				\
 	basic_examples/block_opencl_kernel.cl			\
+	openmp/vector_scal.c			\
 	filters/fblock_opencl_kernel.cl
 
 CLEANFILES = 					\

+ 105 - 0
examples/openmp/vector_scal.c

@@ -0,0 +1,105 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* gcc build:
+
+   gcc -fopenmp vector_scal.c -o vector_scal $(pkg-config --cflags libstarpu) $(pkg-config --libs libstarpu)
+
+ */
+
+#include <starpu.h>
+#include <stdio.h>
+#include <limits.h>
+
+#define	NX	2048
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+void scal_cpu_func(void *buffers[], void *_args) {
+	unsigned i;
+	float *factor = _args;
+	starpu_vector_interface_t *vector = buffers[0];
+	unsigned n = STARPU_VECTOR_GET_NX(vector);
+	float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
+
+	FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size());
+
+#pragma omp parallel for num_threads(starpu_combined_worker_get_size())
+	for (i = 0; i < n; i++)
+		val[i] *= *factor;
+}
+
+static struct starpu_perfmodel_t vector_scal_model = {
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "vector_scale_parallel"
+};
+
+static starpu_codelet cl = {
+	.where = STARPU_CPU,
+	.type = STARPU_FORKJOIN,
+	.max_parallelism = INT_MAX,
+	.cpu_func = scal_cpu_func,
+	.nbuffers = 1,
+	.model = &vector_scal_model,
+};
+
+int main(int argc, char **argv)
+{
+	struct starpu_conf conf;
+	float vector[NX];
+	unsigned i;
+	for (i = 0; i < NX; i++)
+                vector[i] = (i+1.0f);
+
+	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
+	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
+
+	starpu_conf_init(&conf);
+
+	/* Most OpenMP implementations do not support concurrent parallel
+	 * sections, so only create one big worker */
+	conf.single_combined_worker = 1;
+
+	starpu_init(&conf);
+
+	starpu_data_handle vector_handle;
+	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+
+	float factor = 3.14;
+
+	struct starpu_task *task = starpu_task_create();
+	task->synchronous = 1;
+
+	task->cl = &cl;
+
+	task->buffers[0].handle = vector_handle;
+	task->buffers[0].mode = STARPU_RW;
+	task->cl_arg = &factor;
+	task->cl_arg_size = sizeof(factor);
+
+	starpu_task_submit(task);
+	starpu_data_unregister(vector_handle);
+
+	starpu_task_destroy(task);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
+	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
+
+	return 0;
+}

+ 4 - 1
include/starpu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -72,6 +72,9 @@ struct starpu_conf {
 
 	/* calibrate performance models, if any (-1 for default) */
 	int calibrate;
+
+	/* Create only one combined worker, containing all CPU workers */
+	int single_combined_worker;
 };
 
 /* Initialize a starpu_conf structure with default values. */

+ 2 - 0
src/core/workers.c

@@ -284,6 +284,8 @@ int starpu_conf_init(struct starpu_conf *conf)
 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
 
+	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
+
 	return 0;
 }
 

+ 9 - 6
src/sched_policies/detect_combined_workers.c

@@ -145,7 +145,6 @@ static void find_combinations_without_hwloc(struct starpu_machine_topology_s *to
 }
 #endif
 
-#if 0
 static void combine_all_cpu_workers(struct starpu_machine_topology_s *topology)
 {
 	struct starpu_machine_config_s *config = _starpu_get_machine_config();
@@ -167,15 +166,19 @@ static void combine_all_cpu_workers(struct starpu_machine_topology_s *topology)
 		STARPU_ASSERT(ret >= 0);
 	}
 }
-#endif
 
 void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *topology)
 {
-//	combine_all_cpu_workers(topology);
+	struct starpu_machine_config_s *config = _starpu_get_machine_config();
+
+	if (config->user_conf && config->user_conf->single_combined_worker)
+		combine_all_cpu_workers(topology);
+	else {
 #ifdef STARPU_HAVE_HWLOC
-	find_combinations_with_hwloc(topology);
-	//find_combinations_without_hwloc(topology);
+		find_combinations_with_hwloc(topology);
+		//find_combinations_without_hwloc(topology);
 #else
-	find_combinations_without_hwloc(topology);
+		find_combinations_without_hwloc(topology);
 #endif
+	}
 }