Browse Source

merge trunk 6500:6518

Nathalie Furmento 12 years ago
parent
commit
a425c6bf5a

+ 5 - 2
AUTHORS

@@ -1,16 +1,19 @@
 Cédric Augonnet <cedric.augonnet@inria.fr>
 Nicolas Collin <nicolas.collin@inria.fr>
 Jérôme Clet-Ortega <jerome.clet-ortega@labri.fr>
+Nicolas Collin <nicolas.collin@inria.fr>
 Nathalie Furmento <nathalie.furmento@labri.fr>
 Sylvain Henry <sylvain.henry@inria.fr>
-Mehdi Juhoor <mjuhoor@gmail.com>
+Cyril Roélandt <cyril.roelandt@inria.fr>
 François Tessier <francois.tessier@inria.fr>
 Samuel Thibault <samuel.thibault@labri.fr>
+Pierre André Wacrenier <wacrenier@labri.fr>
 William Braik <wbraik@gmail.com>
 Yann Courtois <yann.courtois33@gmail.com>
 Jean-Marie Couteyen <jm.couteyen@gmail.com>
+Mehdi Juhoor <mjuhoor@gmail.com>
 Anthony Roy <theanthony33@gmail.com>
 David Gómez <david_gomez1380@yahoo.com.mx>
 Nguyen Quôc Dinh <nguyen.quocdinh@gmail.com>
 Antoine Lucas <antoine.lucas.33@gmail.com>
-Pierre André Wacrenier <wacrenier@labri.fr>
+

+ 8 - 0
configure.ac

@@ -1695,3 +1695,11 @@ AC_MSG_NOTICE([
 	       SOCL enabled:  $build_socl
                Scheduler Hypervisor: $build_sched_ctx_hypervisor
 ])
+
+if test x"$have_valid_hwloc" = xno
+then
+  AC_MSG_NOTICE([
+WARNING: hwloc was not enabled.  If the target machine is hyperthreaded the
+performance may be impacted a lot.  It is strongly recommended to install
+hwloc])
+fi

+ 10 - 1
doc/chapters/advanced-examples.texi

@@ -349,7 +349,7 @@ struct starpu_codelet cl = @{
 
 @item
 Measured at runtime and refined by regression (@code{STARPU_*REGRESSION_BASED}
-model type). This still assumes performance regularity, but can work
+model type). This still assumes performance regularity, but works
 with various data input sizes, by applying regression over observed
 execution times. STARPU_REGRESSION_BASED uses an a*n^b regression
 form, STARPU_NL_REGRESSION_BASED uses an a*n^b+c (more precise than
@@ -787,6 +787,15 @@ topology (NUMA node, socket, cache, ...) a combined worker will be created. If
 some nodes of the hierarchy have a big arity (e.g. many cores in a socket
 without a hierarchy of shared caches), StarPU will create combined workers of
 intermediate sizes.
+The user can give some hints to StarPU about combined workers sizes to favor.
+This can be done by using the environment variables @code{STARPU_MIN_WORKERSIZE}
+and @code{STARPU_MAX_WORKERSIZE}. When set, they will force StarPU to create the
+biggest combined workers possible without overstepping the defined boundaries.
+However, StarPU will create the remaining combined workers without abiding by
+the rules if not possible.
+For example : if the user specifies a minimum and maximum combined workers size
+of 3 on a machine containing 8 CPUs, StarPU will create a combined worker of
+size 2 beside the combined workers of size 3.
 
 @subsection Concurrent parallel tasks
 

+ 3 - 5
doc/chapters/basic-api.texi

@@ -41,15 +41,13 @@ indicates that no worker was available (so that StarPU was not initialized).
 @item @code{enum starpu_archtype type}
 The type of the driver. Only STARPU_CUDA_DRIVER and STARPU_OPENCL_DRIVER are
 currently supported.
-@item @code{union id}
-@deftp {Data type} {anonymous union}
+@item @code{union id} Anonymous union
 @table @asis
 @item @code{unsigned cuda_id}
 Should only be used if type is STARPU_CUDA_WORKER.
 @item @code{cl_device_id opencl_id}
 Should only be used if type is STARPU_OPENCL_WORKER.
 @end table
-@end deftp
 @end table
 @end deftp
 
@@ -1725,7 +1723,7 @@ This function is similar to @code{starpu_tag_wait} except that it blocks until
 terminated.
 @end deftypefun
 
-@deftypefun void starpu_tag_restart (unsigned @var{id})
+@deftypefun void starpu_tag_restart (starpu_tag_t @var{id})
 This function can be used to clear the "already notified" status
 of a tag which is not associated with a task. Before that, calling
 @code{starpu_tag_notify_from_apps} again will not notify the successors. After
@@ -2167,7 +2165,7 @@ successfull. It returns 0 if the synchronous copy was successful, or
 fails otherwise.
 @end deftypefun
 
-@deftypefun void starpu_cuda_set_device (int@var{devid})
+@deftypefun void starpu_cuda_set_device (int @var{devid})
 Calls @code{cudaSetDevice(devid)} or @code{cudaGLSetGLDevice(devid)}, according to
 whether @code{devid} is among the @code{cuda_opengl_interoperability} field of
 the @code{starpu_conf} structure.

+ 30 - 8
doc/chapters/configuration.texi

@@ -223,14 +223,17 @@ By default, it is disabled.
 @subsection Configuring workers
 
 @menu
-* STARPU_NCPUS::                Number of CPU workers
-* STARPU_NCUDA::                Number of CUDA workers
-* STARPU_NOPENCL::              Number of OpenCL workers
-* STARPU_NGORDON::              Number of SPU workers (Cell)
-* STARPU_WORKERS_NOBIND::       Do not bind workers
-* STARPU_WORKERS_CPUID::        Bind workers to specific CPUs
-* STARPU_WORKERS_CUDAID::       Select specific CUDA devices
-* STARPU_WORKERS_OPENCLID::     Select specific OpenCL devices
+* STARPU_NCPUS::                	Number of CPU workers
+* STARPU_NCUDA::                	Number of CUDA workers
+* STARPU_NOPENCL::              	Number of OpenCL workers
+* STARPU_NGORDON::              	Number of SPU workers (Cell)
+* STARPU_WORKERS_NOBIND::       	Do not bind workers
+* STARPU_WORKERS_CPUID::        	Bind workers to specific CPUs
+* STARPU_WORKERS_CUDAID::       	Select specific CUDA devices
+* STARPU_WORKERS_OPENCLID::     	Select specific OpenCL devices
+* STARPU_SINGLE_COMBINED_WORKER:: 	Do not use concurrent workers
+* STARPU_MIN_WORKERSIZE::	 	Minimum size of the combined workers
+* STARPU_MAX_WORKERSIZE:: 		Maximum size of the combined workers
 @end menu
 
 @node STARPU_NCPUS
@@ -312,6 +315,25 @@ OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.
 This variable is ignored if the @code{use_explicit_workers_opencl_gpuid} flag of
 the @code{starpu_conf} structure passed to @code{starpu_init} is set.
 
+@node STARPU_SINGLE_COMBINED_WORKER
+@subsubsection @code{STARPU_SINGLE_COMBINED_WORKER} -- Do not use concurrent workers
+
+If set, StarPU will create several workers which won't be able to work
+concurrently. It will create combined workers which size goes from 1 to the
+total number of CPU workers in the system.
+
+@node STARPU_MIN_WORKERSIZE
+@subsubsection @code{STARPU_MIN_WORKERSIZE} -- Minimum size of the combined workers
+
+Let the user give a hint to StarPU about which how many workers
+(minimum boundary) the combined workers should contain.
+
+@node STARPU_MAX_WORKERSIZE
+@subsubsection @code{STARPU_MAX_WORKERSIZE} -- Maximum size of the combined workers
+
+Let the user give a hint to StarPU about which how many workers
+(maximum boundary) the combined workers should contain.
+
 @node Scheduling
 @subsection Configuring the Scheduling engine
 

+ 2 - 1
gcc-plugin/tests/Makefile.am

@@ -38,9 +38,10 @@ gcc_tests =					\
   heap-allocated-errors.c			\
   verbose.c					\
   debug-tree.c					\
-  opencl-types.c				\
   shutdown-errors.c
 
+#  opencl-types.c				
+
 dist_noinst_HEADERS = mocks.h
 
 CLEANFILES = *.gimple *.o			\

+ 15 - 14
src/core/workers.c

@@ -548,20 +548,6 @@ int starpu_init(struct starpu_conf *user_conf)
 
 	srand(2008);
 
-#ifdef STARPU_USE_FXT
-	_starpu_start_fxt_profiling();
-#endif
-
-	_starpu_open_debug_logfile();
-
-	_starpu_data_interface_init();
-
-	_starpu_timing_init();
-
-//	_starpu_profiling_init();
-
-	_starpu_load_bus_performance_files();
-
 	/* store the pointer to the user explicit configuration during the
 	 * initialization */
 	if (user_conf == NULL)
@@ -583,6 +569,21 @@ int starpu_init(struct starpu_conf *user_conf)
 	_starpu_conf_check_environment(config.conf);
 
 	_starpu_init_all_sched_ctxs(&config);
+
+#ifdef STARPU_USE_FXT
+	_starpu_start_fxt_profiling();
+#endif
+
+	_starpu_open_debug_logfile();
+
+	_starpu_data_interface_init();
+
+	_starpu_timing_init();
+
+	_starpu_profiling_init();
+
+	_starpu_load_bus_performance_files();
+
 	ret = _starpu_build_topology(&config);
 	if (ret)
 	{