7 years ago · f8b6e4bfca
--- a/ChangeLog
+++ b/ChangeLog
@@ -78,6 +78,8 @@ Small features:
 
				   * Add sched_data field in starpu_task structure.
			
 
				   * New starpu_fxt_tool option -label-deps to label dependencies on
			
 
				     the output graph
			
 
				+  * New environment variable STARPU_GENERATE_TRACE_OPTIONS to specify
			
 
				+    fxt options (to be used with STARPU_GENERATE_TRACE)
			
 
				 
			
 
				 Changes:
			
 
				   * Vastly improve simgrid simulation time.
			
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -66,8 +66,8 @@ which will be concurrently running on the devices. The default value is 1.
 
				 <dd>
			
 
				 \anchor STARPU_CUDA_THREAD_PER_WORKER
			
 
				 \addindex __env__STARPU_CUDA_THREAD_PER_WORKER
			
 
				-Specify if the cuda driver should provide a thread per stream or a single thread 
			
 
				-dealing with all the streams. 0 if one thread per stream, 1 otherwise. The default 
			
 
				+Specify if the cuda driver should provide a thread per stream or a single thread
			
 
				+dealing with all the streams. 0 if one thread per stream, 1 otherwise. The default
			
 
				 value is 0. Setting it to 1 is contradictory with setting STARPU_CUDA_THREAD_PER_DEV to 1.
			
 
				 </dd>
			
 
				 
			
@@ -75,8 +75,8 @@ value is 0. Setting it to 1 is contradictory with setting STARPU_CUDA_THREAD_PER
 
				 <dd>
			
 
				 \anchor STARPU_CUDA_THREAD_PER_DEV
			
 
				 \addindex __env__STARPU_CUDA_THREAD_PER_DEV
			
 
				-Specify if the cuda driver should provide a thread per device or a single thread 
			
 
				-dealing with all the devices. 0 if one thread per device, 1 otherwise. The default 
			
 
				+Specify if the cuda driver should provide a thread per device or a single thread
			
 
				+dealing with all the devices. 0 if one thread per device, 1 otherwise. The default
			
 
				 value is 1, unless STARPU_CUDA_THREAD_PER_WORKER is set to 1. Setting it to 1 is
			
 
				 contradictory with setting STARPU_CUDA_THREAD_PER_WORKER to 1.
			
 
				 </dd>
			
@@ -369,7 +369,7 @@ Disable asynchronous copies between CPU and MIC devices.
 
				 
			
 
				 <dt>STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY</dt>
			
 
				 <dd>
			
 
				-\anchor STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY 
			
 
				+\anchor STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY
			
 
				 \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY
			
 
				 Disable asynchronous copies between CPU and MPI Slave devices.
			
 
				 </dd>
			
@@ -946,7 +946,7 @@ full.
 
				 This specifies then backend to be used by StarPU to push data when the main
			
 
				 memory is getting full. The default is unistd (i.e. using read/write functions),
			
 
				 other values are stdio (i.e. using fread/fwrite), unistd_o_direct (i.e. using
			
 
				-read/write with O_DIRECT), leveldb (i.e. using a leveldb database), and hdf5 
			
 
				+read/write with O_DIRECT), leveldb (i.e. using a leveldb database), and hdf5
			
 
				 (i.e. using HDF5 library).
			
 
				 </dd>
			
 
				 
			
@@ -961,7 +961,7 @@ memory is getting full. The default is unlimited.
 
				 <dt>STARPU_LIMIT_MAX_SUBMITTED_TASKS</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_LIMIT_MAX_SUBMITTED_TASKS
			
 
				-\addindex __env__STARPU_LIMIT_MAX_SUBMITTED_TASKS    
			
 
				+\addindex __env__STARPU_LIMIT_MAX_SUBMITTED_TASKS
			
 
				 This variable allows the user to control the task submission flow by specifying
			
 
				 to StarPU a maximum number of submitted tasks allowed at a given time, i.e. when
			
 
				 this limit is reached task submission becomes blocking until enough tasks have
			
@@ -972,7 +972,7 @@ Setting it enables allocation cache buffer reuse in main memory.
 
				 <dt>STARPU_LIMIT_MIN_SUBMITTED_TASKS</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_LIMIT_MIN_SUBMITTED_TASKS
			
 
				-\addindex __env__STARPU_LIMIT_MIN_SUBMITTED_TASKS    
			
 
				+\addindex __env__STARPU_LIMIT_MIN_SUBMITTED_TASKS
			
 
				 This variable allows the user to control the task submission flow by specifying
			
 
				 to StarPU a submitted task threshold to wait before unblocking task submission. This
			
 
				 variable has to be used in conjunction with \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS
			
@@ -997,6 +997,15 @@ When set to <c>1</c>, this variable indicates that StarPU should automatically
 
				 generate a Paje trace when starpu_shutdown() is called.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>STARPU_GENERATE_TRACE_OPTIONS</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_GENERATE_TRACE_OPTIONS
			
 
				+\addindex __env__STARPU_GENERATE_TRACE_OPTIONS
			
 
				+When the variable \ref STARPU_GENERATE_TRACE is set to <c>1</c> to
			
 
				+generate a Paje trace, this variable can be set to specify options (see
			
 
				+<c>starpu_fxt_tool --help</c>).
			
 
				+</dd>
			
 
				+
			
 
				 <dt>STARPU_ENABLE_STATS</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_ENABLE_STATS
			
@@ -1162,7 +1171,7 @@ accesses (see \ref ConcurrentDataAccess).
 
				 
			
 
				 <dt>STARPU_USE_NUMA</dt>
			
 
				 <dd>
			
 
				-\anchor STARPU_USE_NUMA 
			
 
				+\anchor STARPU_USE_NUMA
			
 
				 \addindex __env__STARPU_USE_NUMA
			
 
				 When defined, NUMA nodes are taking into account by StarPU. Otherwise, memory
			
 
				 is considered as only one node. This is experimental for now.
			
@@ -1191,7 +1200,7 @@ The file will contain the sum of the idle times of all the workers.
 
				 <dd>
			
 
				 \anchor SC_HYPERVISOR_POLICY
			
 
				 \addindex __env__SC_HYPERVISOR_POLICY
			
 
				-Choose between the different resizing policies proposed by StarPU for the hypervisor: 
			
 
				+Choose between the different resizing policies proposed by StarPU for the hypervisor:
			
 
				 idle, app_driven, feft_lp, teft_lp; ispeed_lp, throughput_lp etc.
			
 
				 
			
 
				 Use <c>SC_HYPERVISOR_POLICY=help</c> to get the list of available policies for the hypervisor
			
@@ -1221,7 +1230,7 @@ of the total time of execution of the application. The default value is the resi
 
				 \addindex __env__SC_HYPERVISOR_MAX_SPEED_GAP
			
 
				 Indicate the ratio of speed difference between contexts that should trigger the hypervisor.
			
 
				 This situation may occur only when a theoretical speed could not be computed and the hypervisor
			
 
				-has no value to compare the speed to. Otherwise the resizing of a context is not influenced by the 
			
 
				+has no value to compare the speed to. Otherwise the resizing of a context is not influenced by the
			
 
				 the speed of the other contexts, but only by the the value that a context should have.
			
 
				 </dd>
			
 
				 
			
@@ -1239,7 +1248,7 @@ is not done.
 
				 \anchor SC_HYPERVISOR_LAZY_RESIZE
			
 
				 \addindex __env__SC_HYPERVISOR_LAZY_RESIZE
			
 
				 By default the hypervisor resizes the contexts in a lazy way, that is workers are firstly added to a new context
			
 
				-before removing them from the previous one. Once this workers are clearly taken into account 
			
 
				+before removing them from the previous one. Once this workers are clearly taken into account
			
 
				 into the new context (a task was poped there) we remove them from the previous one. However if the application
			
 
				 would like that the change in the distribution of workers should change right away this variable should be set to 0
			
 
				 </dd>
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012-2013,2015                           Inria
			
 
				  * Copyright (C) 2008-2017                                Université de Bordeaux
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -180,13 +180,66 @@ void _starpu_fxt_init_profiling(unsigned trace_buffer_size)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+static void _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options)
			
 
				+{
			
 
				+	if (strcmp(option, "-c") == 0)
			
 
				+	{
			
 
				+		options->per_task_colour = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-events") == 0)
			
 
				+	{
			
 
				+		options->no_events = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-counter") == 0)
			
 
				+	{
			
 
				+		options->no_counter = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-bus") == 0)
			
 
				+	{
			
 
				+		options->no_bus = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-flops") == 0)
			
 
				+	{
			
 
				+		options->no_flops = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-smooth") == 0)
			
 
				+	{
			
 
				+		options->no_smooth = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-no-acquire") == 0)
			
 
				+	{
			
 
				+		options->no_acquire = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-internal") == 0)
			
 
				+	{
			
 
				+		options->internal = 1;
			
 
				+	}
			
 
				+	else if (strcmp(option, "-label-deps") == 0)
			
 
				+	{
			
 
				+		options->label_deps = 1;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		_STARPU_MSG("Option <%s> is not a valid option for starpu_fxt_tool\n", option);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
			
 
				 {
			
 
				 	/* We take default options */
			
 
				 	struct starpu_fxt_options options;
			
 
				 	starpu_fxt_options_init(&options);
			
 
				 
			
 
				-	/* TODO parse some STARPU_GENERATE_TRACE_OPTIONS env variable */
			
 
				+	char *trace_options = starpu_getenv("STARPU_GENERATE_TRACE_OPTIONS");
			
 
				+	if (trace_options)
			
 
				+	{
			
 
				+		char *option = strtok(trace_options, " ");
			
 
				+		while (option)
			
 
				+		{
			
 
				+			_starpu_generate_paje_trace_read_option(option, &options);
			
 
				+			option = strtok(NULL, " ");
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	options.ninputfiles = 1;
			
 
				 	options.filenames[0] = input_fxt_filename;