Browse Source

Merge remote-tracking branch 'gitlab/master' into ft_checkpoint

Romain LION 4 years ago
parent
commit
e8e9281716

+ 6 - 0
ChangeLog

@@ -50,6 +50,12 @@ Small changes:
 StarPU 1.3.5 (git revision xxx)
 ====================================================================
 
+Small features:
+  * New environment variable STARPU_FXT_SUFFIX to set the filename in
+    which to save the fxt trace
+  * New option -d for starpu_fxt_tool to specify in which directory to
+    generate files
+
 Small changes:
   * Move MPI cache functions into the public API
   * Add STARPU_MPI_NOBIND environment variable.

+ 9 - 3
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -84,7 +84,11 @@ Or you can simply point the <c>PKG_CONFIG_PATH</c> to
 When FxT is enabled, a trace is generated when StarPU is terminated by calling
 starpu_shutdown(). The trace is a binary file whose name has the form
 <c>prof_file_XXX_YYY</c> where <c>XXX</c> is the user name, and
-<c>YYY</c> is the pid of the process that used StarPU. This file is saved in the
+<c>YYY</c> is the MPI id of the process that used StarPU (or 0 when running a sequential program).
+One can change
+the name of the file by setting the environnement variable \ref
+STARPU_FXT_SUFFIX, its contents will be used instead of <c>prof_file_XXX</c>.
+This file is saved in the
 <c>/tmp/</c> directory by default, or by the directory specified by
 the environment variable \ref STARPU_FXT_PREFIX.
 
@@ -104,8 +108,10 @@ $ starpu_fxt_tool -i /tmp/prof_file_something
 \endverbatim
 
 Or alternatively, setting the environment variable \ref STARPU_GENERATE_TRACE
-to <c>1</c> before application execution will make StarPU do it automatically at
-application shutdown.
+to <c>1</c> before application execution will make StarPU
+automatically generate all traces at application shutdown. Note that
+if the environment variable \ref STARPU_FXT_PREFIX is set, files will
+be generated in the given directory.
 
 One can also set the environment variable \ref
 STARPU_GENERATE_TRACE_OPTIONS to specify options, see

+ 9 - 2
doc/doxygen/chapters/501_environment_variables.doxy

@@ -935,14 +935,21 @@ Specify in which file the debugging output should be saved to.
 <dd>
 \anchor STARPU_FXT_PREFIX
 \addindex __env__STARPU_FXT_PREFIX
-Specify in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
+Specify in which directory to save the generated trace if FxT is enabled.
+</dd>
+
+<dt>STARPU_FXT_SUFFIX</dt>
+<dd>
+\anchor STARPU_FXT_SUFFIX
+\addindex __env__STARPU_FXT_SUFFIX
+Specify in which file to save the generated trace if FxT is enabled.
 </dd>
 
 <dt>STARPU_FXT_TRACE</dt>
 <dd>
 \anchor STARPU_FXT_TRACE
 \addindex __env__STARPU_FXT_TRACE
-Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY . The default is 1 (generate it)
+Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). The default is 1 (generate it)
 </dd>
 
 <dt>STARPU_LIMIT_CUDA_devid_MEM</dt>

+ 2 - 0
include/starpu_fxt.h

@@ -69,6 +69,7 @@ struct starpu_fxt_options
 	char *number_events_path;
 	char *anim_path;
 	char *states_path;
+	char *dir;
 	char worker_names[STARPU_NMAXWORKERS][256];
 	int nworkers;
 	struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS];
@@ -104,6 +105,7 @@ struct starpu_fxt_options
 };
 
 void starpu_fxt_options_init(struct starpu_fxt_options *options);
+void starpu_fxt_options_shutdown(struct starpu_fxt_options *options);
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
 
 /**

+ 3 - 3
julia/examples/old_examples/mult/mult.c

@@ -204,9 +204,9 @@ double median_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim
 {
 	unsigned i;
 
-	float * A = (float *) valloc(zdim*ydim*sizeof(float));
-	float * B = (float *) valloc(xdim*zdim*sizeof(float));
-	float * C = (float *) valloc(xdim*ydim*sizeof(float));
+	float * A = (float *) malloc(zdim*ydim*sizeof(float));
+	float * B = (float *) malloc(xdim*zdim*sizeof(float));
+	float * C = (float *) malloc(xdim*ydim*sizeof(float));
 
 	double exec_times[nb_test];
 

+ 1 - 1
mpi/src/starpu_mpi_datatype.c

@@ -106,7 +106,7 @@ static int handle_to_datatype_tensor(starpu_data_handle_t data_handle, MPI_Datat
 	unsigned ldy = starpu_tensor_get_local_ldy(data_handle);
 	unsigned ldz = starpu_tensor_get_local_ldz(data_handle);
 	unsigned ldt = starpu_tensor_get_local_ldt(data_handle);
-	size_t elemsize = starpu_block_get_elemsize(data_handle);
+	size_t elemsize = starpu_tensor_get_elemsize(data_handle);
 
 	MPI_Datatype datatype_3dlayer;
 	ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_3dlayer);

+ 1 - 1
socl/src/cl_createbuffer.c

@@ -115,7 +115,7 @@ soclCreateBuffer(cl_context   context,
 	// If not MEM_USE_HOST_PTR, we need to alloc the buffer ourselves
 	if (!(flags & CL_MEM_USE_HOST_PTR))
 	{
-		mem->ptr = valloc(size);
+		mem->ptr = malloc(size);
 		if (mem->ptr == NULL)
 		{
 			if (errcode_ret != NULL)

+ 34 - 29
src/common/fxt.c

@@ -39,7 +39,7 @@ unsigned long _starpu_job_cnt = 0;
 #include <sys/thr.h>       /* for thr_self() */
 #endif
 
-static char _STARPU_PROF_FILE_USER[128];
+static char _starpu_prof_file_user[128];
 int _starpu_fxt_started = 0;
 int _starpu_fxt_willstart = 1;
 starpu_pthread_mutex_t _starpu_fxt_started_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
@@ -95,26 +95,25 @@ static void _starpu_profile_set_tracefile(void)
 
 	char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX");
 	if (!fxt_prefix)
-	     fxt_prefix = "/tmp/";
+		fxt_prefix = "/tmp";
 	else
+		_starpu_mkpath_and_check(fxt_prefix, S_IRWXU);
+
+	char suffix[127];
+	char *fxt_suffix = starpu_getenv("STARPU_FXT_SUFFIX");
+	if (!fxt_suffix)
 	{
-		// Check if the given folder really exists:
-		struct stat folder_stat;
-		if (stat(fxt_prefix, &folder_stat) < 0 || !S_ISDIR(folder_stat.st_mode))
-		{
-			_STARPU_MSG("%s is not a valid directory.\n", fxt_prefix);
-			_starpu_abort();
-		}
+		user = starpu_getenv("USER");
+		if (!user)
+			user = "";
+		snprintf(suffix, sizeof(suffix), "prof_file_%s_%d", user, _starpu_id);
+	}
+	else
+	{
+		snprintf(suffix, sizeof(suffix), "%s_%d", fxt_suffix, _starpu_id);
 	}
 
-	user = starpu_getenv("USER");
-	if (!user)
-		user = "";
-
-	char suffix[128];
-	snprintf(suffix, sizeof(suffix), "prof_file_%s_%d", user, _starpu_id);
-
-	snprintf(_STARPU_PROF_FILE_USER, sizeof(_STARPU_PROF_FILE_USER), "%s%s", fxt_prefix, suffix);
+	snprintf(_starpu_prof_file_user, sizeof(_starpu_prof_file_user), "%s/%s", fxt_prefix, suffix);
 }
 
 void starpu_profiling_set_id(int new_id)
@@ -124,7 +123,7 @@ void starpu_profiling_set_id(int new_id)
 	_starpu_profile_set_tracefile();
 
 #ifdef HAVE_FUT_SET_FILENAME
-	fut_set_filename(_STARPU_PROF_FILE_USER);
+	fut_set_filename(_starpu_prof_file_user);
 #endif
 }
 
@@ -184,7 +183,7 @@ void _starpu_fxt_init_profiling(uint64_t trace_buffer_size)
 	_starpu_profile_set_tracefile();
 
 #ifdef HAVE_FUT_SET_FILENAME
-	fut_set_filename(_STARPU_PROF_FILE_USER);
+	fut_set_filename(_starpu_prof_file_user);
 #endif
 #ifdef HAVE_ENABLE_FUT_FLUSH
 	// when the event buffer is full, fxt stops recording events.
@@ -261,7 +260,7 @@ static void _starpu_generate_paje_trace_read_option(const char *option, struct s
 	}
 }
 
-static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
+static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename, char *dirname)
 {
 	/* We take default options */
 	struct starpu_fxt_options options;
@@ -283,6 +282,7 @@ static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_p
 	options.out_paje_path = output_paje_filename;
 	options.file_prefix = "";
 	options.file_rank = -1;
+	options.dir = dirname;
 
 	starpu_fxt_generate_trace(&options);
 }
@@ -291,10 +291,17 @@ void _starpu_fxt_dump_file(void)
 {
 	if (!_starpu_fxt_started)
 		return;
+
+	char hostname[128];
+	gethostname(hostname, 128);
+
+	int ret = fut_endup(_starpu_prof_file_user);
+	if (ret < 0)
+		_STARPU_MSG("Problem when writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user);
 #ifdef STARPU_VERBOSE
-	_STARPU_MSG("Writing FxT traces into file %s\n", _STARPU_PROF_FILE_USER);
+	else
+		_STARPU_MSG("Writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user);
 #endif
-	fut_endup(_STARPU_PROF_FILE_USER);
 }
 
 void _starpu_stop_fxt_profiling(void)
@@ -303,17 +310,15 @@ void _starpu_stop_fxt_profiling(void)
 		return;
 	if (!_starpu_written)
 	{
-#ifdef STARPU_VERBOSE
-	        char hostname[128];
-		gethostname(hostname, 128);
-		_STARPU_MSG("Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
-#endif
-		fut_endup(_STARPU_PROF_FILE_USER);
+		_starpu_fxt_dump_file();
 
 		/* Should we generate a Paje trace directly ? */
 		int generate_trace = starpu_get_env_number("STARPU_GENERATE_TRACE");
 		if (generate_trace == 1)
-			_starpu_generate_paje_trace(_STARPU_PROF_FILE_USER, "paje.trace");
+		{
+			char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX");
+			_starpu_generate_paje_trace(_starpu_prof_file_user, "paje.trace", fxt_prefix);
+		}
 
 		int ret = fut_done();
 		if (ret < 0)

+ 1 - 1
src/common/utils.c

@@ -112,7 +112,7 @@ int _starpu_mkpath(const char *s, mode_t mode)
 	{
 		if (!S_ISDIR(sb.st_mode))
 		{
-			_STARPU_MSG("Error: %s is not a directory:\n", path);
+			_STARPU_MSG("Error: %s already exists and is not a directory:\n", path);
 			STARPU_ABORT();
 		}
 		/* It already exists and is a directory.  */

+ 94 - 10
src/debug/traces/starpu_fxt.c

@@ -3465,8 +3465,7 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 	fd_in = open(filename_in, O_RDONLY);
 	if (fd_in < 0)
 	{
-	        perror("open failed :");
-	        exit(-1);
+		STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno));
 	}
 
 	static fxt_t fut;
@@ -4305,6 +4304,59 @@ void starpu_fxt_options_init(struct starpu_fxt_options *options)
 	options->dumped_codelets = NULL;
 	options->activity_path = "activity.data";
 	options->sched_tasks_path = "sched_tasks.rec";
+	options->dir = NULL;
+}
+
+static
+void _set_dir(char *dir, char **option)
+{
+	if (*option)
+	{
+		char *tmp = strdup(*option);
+		_STARPU_MALLOC(*option, 256);
+		snprintf(*option, 256, "%s/%s", dir, tmp);
+		free(tmp);
+	}
+}
+
+static
+void _starpu_fxt_options_set_dir(struct starpu_fxt_options *options)
+{
+	if (!options->dir)
+		return;
+
+	_starpu_mkpath_and_check(options->dir, S_IRWXU);
+	_set_dir(options->dir, &options->out_paje_path);
+	_set_dir(options->dir, &options->dag_path);
+	_set_dir(options->dir, &options->tasks_path);
+	_set_dir(options->dir, &options->comms_path);
+	_set_dir(options->dir, &options->number_events_path);
+	_set_dir(options->dir, &options->data_path);
+	_set_dir(options->dir, &options->papi_path);
+	_set_dir(options->dir, &options->anim_path);
+	_set_dir(options->dir, &options->states_path);
+	_set_dir(options->dir, &options->distrib_time_path);
+	_set_dir(options->dir, &options->activity_path);
+	_set_dir(options->dir, &options->sched_tasks_path);
+}
+
+void starpu_fxt_options_shutdown(struct starpu_fxt_options *options)
+{
+	if (options->dir)
+	{
+		free(options->out_paje_path);
+		free(options->dag_path);
+		free(options->tasks_path);
+		free(options->comms_path);
+		free(options->number_events_path);
+		free(options->data_path);
+		free(options->papi_path);
+		free(options->anim_path);
+		free(options->states_path);
+		free(options->distrib_time_path);
+		free(options->activity_path);
+		free(options->sched_tasks_path);
+	}
 }
 
 static
@@ -4316,6 +4368,8 @@ void _starpu_fxt_distrib_file_init(struct starpu_fxt_options *options)
 	if (options->distrib_time_path)
 	{
 		distrib_time = fopen(options->distrib_time_path, "w+");
+		if (distrib_time == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->distrib_time_path, strerror(errno));
 	}
 	else
 	{
@@ -4340,7 +4394,11 @@ static
 void _starpu_fxt_activity_file_init(struct starpu_fxt_options *options)
 {
 	if (options->activity_path)
+	{
 		activity_file = fopen(options->activity_path, "w+");
+		if (activity_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->activity_path, strerror(errno));
+	}
 	else
 		activity_file = NULL;
 }
@@ -4349,7 +4407,11 @@ static
 void _starpu_fxt_sched_tasks_file_init(struct starpu_fxt_options *options)
 {
 	if (options->sched_tasks_path)
+	{
 		sched_tasks_file = fopen(options->sched_tasks_path, "w+");
+		if (sched_tasks_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->sched_tasks_path, strerror(errno));
+	}
 	else
 		sched_tasks_file = NULL;
 }
@@ -4360,6 +4422,9 @@ void _starpu_fxt_anim_file_init(struct starpu_fxt_options *options)
 	if (options->anim_path)
 	{
 		anim_file = fopen(options->anim_path, "w+");
+		if (anim_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->anim_path, strerror(errno));
+
 		_starpu_fxt_component_print_header(anim_file);
 	}
 	else
@@ -4370,7 +4435,11 @@ static
 void _starpu_fxt_tasks_file_init(struct starpu_fxt_options *options)
 {
 	if (options->tasks_path)
+	{
 		tasks_file = fopen(options->tasks_path, "w+");
+		if (tasks_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->tasks_path, strerror(errno));
+	}
 	else
 		tasks_file = NULL;
 }
@@ -4379,7 +4448,11 @@ static
 void _starpu_fxt_data_file_init(struct starpu_fxt_options *options)
 {
 	if (options->data_path)
+	{
 		data_file = fopen(options->data_path, "w+");
+		if (data_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->data_path, strerror(errno));
+	}
 	else
 		data_file = NULL;
 }
@@ -4388,7 +4461,11 @@ static
 void _starpu_fxt_comms_file_init(struct starpu_fxt_options *options)
 {
 	if (options->comms_path)
+	{
 		comms_file = fopen(options->comms_path, "w+");
+		if (comms_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->comms_path, strerror(errno));
+	}
 	else
 		comms_file = NULL;
 }
@@ -4399,6 +4476,8 @@ void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options)
 	if (options->number_events_path)
 	{
 		number_events_file = fopen(options->number_events_path, "w+");
+		if (number_events_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno));
 
 		/* FUT_SETUP_CODE is the event with the maximal value */
 		number_events = calloc(FUT_SETUP_CODE+1, sizeof(uint64_t));
@@ -4412,7 +4491,11 @@ void _starpu_fxt_papi_file_init(struct starpu_fxt_options *options)
 {
 #ifdef STARPU_PAPI
 	if (options->papi_path)
+	{
 		papi_file = fopen(options->papi_path, "w+");
+		if (papi_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->papi_path, strerror(errno));
+	}
 	else
 		papi_file = NULL;
 #endif
@@ -4436,7 +4519,11 @@ static
 void _starpu_fxt_trace_file_init(struct starpu_fxt_options *options)
 {
 	if (options->states_path)
+	{
 		trace_file = fopen(options->states_path, "w+");
+		if (trace_file == NULL)
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->states_path, strerror(errno));
+	}
 	else
 		trace_file = NULL;
 
@@ -4586,8 +4673,7 @@ uint64_t _starpu_fxt_find_start_time(char *filename_in)
 	fd_in = open(filename_in, O_RDONLY);
 	if (fd_in < 0)
 	{
-	        perror("open failed :");
-	        exit(-1);
+		STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno));
 	}
 
 	static fxt_t fut;
@@ -4617,6 +4703,7 @@ uint64_t _starpu_fxt_find_start_time(char *filename_in)
 
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 {
+	_starpu_fxt_options_set_dir(options);
 	_starpu_fxt_dag_init(options->dag_path);
 	_starpu_fxt_distrib_file_init(options);
 	_starpu_fxt_activity_file_init(options);
@@ -4806,8 +4893,7 @@ static void write_task(struct parse_task pt)
 		kernel->file = fopen(codelet_name, "w+");
 		if(!kernel->file)
 		{
-			perror("open failed :");
-			exit(-1);
+			STARPU_ABORT_MSG("Failed to open '%s' (err %s)", codelet_name, strerror(errno));
 		}
 		HASH_ADD_STR(kernels, name, kernel);
 		fprintf(codelet_list, "%s\n", codelet_name);
@@ -4822,8 +4908,7 @@ void starpu_fxt_write_data_trace(char *filename_in)
 	fd_in = open(filename_in, O_RDONLY);
 	if (fd_in < 0)
 	{
-	        perror("open failed :");
-	        exit(-1);
+		STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno));
 	}
 
 	static fxt_t fut;
@@ -4837,8 +4922,7 @@ void starpu_fxt_write_data_trace(char *filename_in)
 	codelet_list = fopen("codelet_list", "w+");
 	if(!codelet_list)
 	{
-		perror("open failed :");
-		exit(-1);
+		STARPU_ABORT_MSG("Failed to open '%s' (err %s)", "codelet_list", strerror(errno));
 	}
 
 	fxt_blockev_t block;

+ 1 - 2
tests/errorcheck/workers_cpuid.c

@@ -22,7 +22,7 @@
  * expected binding does happen
  */
 
-#if !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC)
+#if !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC) || !defined(STARPU_HAVE_SETENV)
 #warning no cpu are available. Skipping test
 int main(void)
 {
@@ -130,7 +130,6 @@ static long * generate_arrangement(int arr_size, long *set, int set_size)
 	int i;
 
 	STARPU_ASSERT(arr_size <= set_size);
-	srandom(time(0));
 
 	for (i=0; i<arr_size; i++)
 	{

+ 12 - 1
tools/starpu_fxt_tool.c

@@ -32,7 +32,8 @@ static void usage()
 	fprintf(stderr, "   -i <input file[s]>  specify the input file[s]. Several files can be provided,\n");
 	fprintf(stderr, "                       or the option specified several times for MPI execution\n");
 	fprintf(stderr, "                       case\n");
-        fprintf(stderr, "   -o <output file>    specify the output file\n");
+        fprintf(stderr, "   -o <output file>    specify the paje output filename\n");
+	fprintf(stderr, "   -d <directory>      specify the directory in which to save files\n");
         fprintf(stderr, "   -c                  use a different colour for every type of task\n");
 	fprintf(stderr, "   -no-events          do not show events\n");
 	fprintf(stderr, "   -no-counter         do not show scheduler counters\n");
@@ -77,6 +78,13 @@ static int parse_args(int argc, char **argv)
 			continue;
 		}
 
+		if (strcmp(argv[i], "-d") == 0)
+		{
+			options.dir = argv[++i];
+			reading_input_filenames = 0;
+			continue;
+		}
+
 		if (strcmp(argv[i], "-i") == 0)
 		{
 			if (options.ninputfiles >= STARPU_FXT_MAX_FILES)
@@ -193,6 +201,7 @@ static int parse_args(int argc, char **argv)
                 usage();
 		return 77;
 	}
+
 	return 0;
 }
 
@@ -203,5 +212,7 @@ int main(int argc, char **argv)
 
 	starpu_fxt_generate_trace(&options);
 
+	starpu_fxt_options_shutdown(&options);
+
 	return 0;
 }