Преглед на файлове

Add a configuration parameter to enable only one combined worker. Use it to provide an OpenMP example which should work with most OpenMP implementations.

Samuel Thibault преди 14 години
родител
ревизия
2f21c1e693
променени са 6 файла, в които са добавени 129 реда и са изтрити 7 реда
  1. 8 0
      doc/starpu.texi
  2. 1 0
      examples/Makefile.am
  3. 105 0
      examples/openmp/vector_scal.c
  4. 4 1
      include/starpu.h
  5. 2 0
      src/core/workers.c
  6. 9 6
      src/sched_policies/detect_combined_workers.c

+ 8 - 0
doc/starpu.texi

@@ -3200,6 +3200,14 @@ default value is overwritten by the @code{STARPU_CALIBRATE} environment
 variable when it is set.
 @end table
 
+@item @code{single_combined_worker} (default = 0):
+By default, StarPU creates various combined workers according to the machine
+structure. Some parallel libraries (e.g. most OpenMP implementations) however do
+not support concurrent calls to parallel code. In such case, setting this flag
+makes StarPU only create one combined worker, containing all
+the CPU workers. The default value is overwritten by the
+@code{STARPU_SINGLE_COMBINED_WORKER} environment variable when it is set.
+
 @end table
 
 

+ 1 - 0
examples/Makefile.am

@@ -50,6 +50,7 @@ EXTRA_DIST = 					\
 	basic_examples/variable_kernels_opencl_kernel.cl	\
 	matvecmult/matvecmult_kernel.cl				\
 	basic_examples/block_opencl_kernel.cl			\
+	openmp/vector_scal.c			\
 	filters/fblock_opencl_kernel.cl
 
 CLEANFILES = 					\

+ 105 - 0
examples/openmp/vector_scal.c

@@ -0,0 +1,105 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* gcc build:
+
+   gcc -fopenmp vector_scal.c -o vector_scal $(pkg-config --cflags libstarpu) $(pkg-config --libs libstarpu)
+
+ */
+
+#include <starpu.h>
+#include <stdio.h>
+#include <limits.h>
+
+#define	NX	2048
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+void scal_cpu_func(void *buffers[], void *_args) {
+	unsigned i;
+	float *factor = _args;
+	starpu_vector_interface_t *vector = buffers[0];
+	unsigned n = STARPU_VECTOR_GET_NX(vector);
+	float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
+
+	FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size());
+
+#pragma omp parallel for num_threads(starpu_combined_worker_get_size())
+	for (i = 0; i < n; i++)
+		val[i] *= *factor;
+}
+
+static struct starpu_perfmodel_t vector_scal_model = {
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "vector_scale_parallel"
+};
+
+static starpu_codelet cl = {
+	.where = STARPU_CPU,
+	.type = STARPU_FORKJOIN,
+	.max_parallelism = INT_MAX,
+	.cpu_func = scal_cpu_func,
+	.nbuffers = 1,
+	.model = &vector_scal_model,
+};
+
+int main(int argc, char **argv)
+{
+	struct starpu_conf conf;
+	float vector[NX];
+	unsigned i;
+	for (i = 0; i < NX; i++)
+                vector[i] = (i+1.0f);
+
+	FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]);
+	FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]);
+
+	starpu_conf_init(&conf);
+
+	/* Most OpenMP implementations do not support concurrent parallel
+	 * sections, so only create one big worker */
+	conf.single_combined_worker = 1;
+
+	starpu_init(&conf);
+
+	starpu_data_handle vector_handle;
+	starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
+
+	float factor = 3.14;
+
+	struct starpu_task *task = starpu_task_create();
+	task->synchronous = 1;
+
+	task->cl = &cl;
+
+	task->buffers[0].handle = vector_handle;
+	task->buffers[0].mode = STARPU_RW;
+	task->cl_arg = &factor;
+	task->cl_arg_size = sizeof(factor);
+
+	starpu_task_submit(task);
+	starpu_data_unregister(vector_handle);
+
+	starpu_task_destroy(task);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
+	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
+
+	return 0;
+}

+ 4 - 1
include/starpu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2010  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -72,6 +72,9 @@ struct starpu_conf {
 
 	/* calibrate performance models, if any (-1 for default) */
 	int calibrate;
+
+	/* Create only one combined worker, containing all CPU workers */
+	int single_combined_worker;
 };
 
 /* Initialize a starpu_conf structure with default values. */

+ 2 - 0
src/core/workers.c

@@ -284,6 +284,8 @@ int starpu_conf_init(struct starpu_conf *conf)
 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
 
+	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
+
 	return 0;
 }
 

+ 9 - 6
src/sched_policies/detect_combined_workers.c

@@ -145,7 +145,6 @@ static void find_combinations_without_hwloc(struct starpu_machine_topology_s *to
 }
 #endif
 
-#if 0
 static void combine_all_cpu_workers(struct starpu_machine_topology_s *topology)
 {
 	struct starpu_machine_config_s *config = _starpu_get_machine_config();
@@ -167,15 +166,19 @@ static void combine_all_cpu_workers(struct starpu_machine_topology_s *topology)
 		STARPU_ASSERT(ret >= 0);
 	}
 }
-#endif
 
 void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *topology)
 {
-//	combine_all_cpu_workers(topology);
+	struct starpu_machine_config_s *config = _starpu_get_machine_config();
+
+	if (config->user_conf && config->user_conf->single_combined_worker)
+		combine_all_cpu_workers(topology);
+	else {
 #ifdef STARPU_HAVE_HWLOC
-	find_combinations_with_hwloc(topology);
-	//find_combinations_without_hwloc(topology);
+		find_combinations_with_hwloc(topology);
+		//find_combinations_without_hwloc(topology);
 #else
-	find_combinations_without_hwloc(topology);
+		find_combinations_without_hwloc(topology);
 #endif
+	}
 }