12 anos atrás · 4492f76e0b
--- a/ChangeLog
+++ b/ChangeLog
@@ -58,6 +58,7 @@ New features:
 
				 	  data with dynamic size can now be exchanged with StarPU-MPI.
			
 
				   * Add experimental simgrid support, to simulation execution with various
			
 
				     number of CPUs, GPUs, amount of memory, etc.
			
 
				+  * Add support for OpenCL simulators (which provide simulated execution time)
			
 
				 
			
 
				 Changes:
			
 
				   * Fix the block filter functions.
			
--- a/configure.ac
+++ b/configure.ac
@@ -777,6 +777,14 @@ AC_DEFINE_UNQUOTED(STARPU_MAXGORDONDEVS, [1], [maximum number of GORDON devices]
 
				 #                                                                             #
			
 
				 ###############################################################################
			
 
				 
			
 
				+AC_ARG_ENABLE(opencl-simulator, [AS_HELP_STRING([--enable-opencl-simulator],
			
 
				+				[Enable the use of an OpenCL simulator])],
			
 
				+				enable_opencl_simulator=$enableval, enable_opencl_simulator=no)
			
 
				+if test x$enable_opencl_simulator = xyes; then
			
 
				+	enable_simgrid=yes
			
 
				+	AC_DEFINE(STARPU_OPENCL_SIMULATOR, 1, [Define this to enable using an OpenCL simulator])
			
 
				+fi
			
 
				+
			
 
				 AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid],
			
 
				 			[Enable simulating execution in simgrid])],
			
 
				 			enable_simgrid=$enableval, enable_simgrid=no)
			
--- a/doc/chapters/configuration.texi
+++ b/doc/chapters/configuration.texi
@@ -150,6 +150,12 @@ contain the OpenCL shared libraries---e.g. @file{libOpenCL.so}. This defaults to
 
				 @code{/lib} appended to the value given to @code{--with-opencl-dir}.
			
 
				 @end defvr
			
 
				 
			
 
				+@defvr {Configure option} --enable-opencl-simulator
			
 
				+Enable considering the provided OpenCL implementation as a simulator, i.e. use
			
 
				+the kernel duration returned by OpenCL profiling information as wallclock time
			
 
				+instead of the actual measured real time. This requires simgrid support.
			
 
				+@end defvr
			
 
				+
			
 
				 @defvr {Configure option} --enable-maximplementations=@var{count}
			
 
				 Allow for at most @var{count} codelet implementations for the same
			
 
				 target device.  This information is then available as the
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -172,13 +172,17 @@ int main(int argc, char **argv)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void _starpu_simgrid_execute_job(struct _starpu_job *j, enum starpu_perf_archtype perf_arch)
			
 
				+void _starpu_simgrid_execute_job(struct _starpu_job *j, enum starpu_perf_archtype perf_arch, double length)
			
 
				 {
			
 
				 	struct starpu_task *task = j->task;
			
 
				 	msg_task_t simgrid_task;
			
 
				-	double length = starpu_task_expected_length(task, perf_arch, j->nimpl);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelets need to have a calibrated perfmodel");
			
 
				+	if (isnan(length))
			
 
				+	{
			
 
				+		length = starpu_task_expected_length(task, perf_arch, j->nimpl);
			
 
				+
			
 
				+		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelets must have a calibrated perfmodel");
			
 
				+	}
			
 
				 
			
 
				 	simgrid_task = MSG_task_create(_starpu_job_get_model_name(j),
			
 
				 			length/1000000.0*MSG_get_host_speed(MSG_host_self()),
			
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -20,7 +20,7 @@
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <msg/msg.h>
			
 
				 
			
 
				-void _starpu_simgrid_execute_job(struct _starpu_job *job, enum starpu_perf_archtype perf_arch);
			
 
				+void _starpu_simgrid_execute_job(struct _starpu_job *job, enum starpu_perf_archtype perf_arch, double length);
			
 
				 msg_task_t _starpu_simgrid_transfer_task_create(unsigned src_node, unsigned dst_node, size_t size);
			
 
				 void _starpu_simgrid_post_task(msg_task_t task, unsigned *finished, _starpu_pthread_mutex_t *mutex, _starpu_pthread_cond_t *cond);
			
 
				 #endif
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -273,7 +273,7 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	struct starpu_conf *conf = config->conf;
			
 
				-	topology->nhwcpus = conf->ncpus;
			
 
				+	topology->nhwcpus = conf->ncpus?conf->ncpus:1;
			
 
				 	topology->nhwcudagpus = conf->ncuda;
			
 
				 	topology->nhwopenclgpus = conf->nopencl;
			
 
				 #else
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -149,7 +149,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 			_starpu_bind_thread_on_cpus(cpu_args->config, _starpu_get_combined_worker_struct(j->combined_workerid));
			
 
				 		STARPU_ASSERT(func);
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-		_starpu_simgrid_execute_job(j, perf_arch);
			
 
				+		_starpu_simgrid_execute_job(j, perf_arch, NAN);
			
 
				 #else
			
 
				 		func(task->interfaces, task->cl_arg);
			
 
				 #endif
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -309,7 +309,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
				 	STARPU_ASSERT(func);
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	_starpu_simgrid_execute_job(j, args->perf_arch);
			
 
				+	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
			
 
				 #else
			
 
				 	func(task->interfaces, task->cl_arg);
			
 
				 #endif
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -663,7 +663,21 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				 	STARPU_ASSERT(func);
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	_starpu_simgrid_execute_job(j, args->perf_arch);
			
 
				+	double length = NAN;
			
 
				+  #ifdef STARPU_OPENCL_SIMULATOR
			
 
				+	func(task->interfaces, task->cl_arg);
			
 
				+    #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
			
 
				+      #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
			
 
				+        #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
			
 
				+      #else
			
 
				+        #error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
			
 
				+      #endif
			
 
				+    #endif
			
 
				+	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
			
 
				+	STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
			
 
				+	length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
			
 
				+  #endif
			
 
				+	_starpu_simgrid_execute_job(j, args->perf_arch, length);
			
 
				 #else
			
 
				 	func(task->interfaces, task->cl_arg);
			
 
				 #endif
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -57,8 +57,14 @@ static void _starpu_bus_reset_profiling_info(struct starpu_bus_profiling_info *b
 
				  *	Global control of profiling
			
 
				  */
			
 
				 
			
 
				-/* Disabled by default */
			
 
				-int _starpu_profiling = 0;
			
 
				+/* Disabled by default, unless simulating */
			
 
				+int _starpu_profiling =
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	1
			
 
				+#else
			
 
				+	0
			
 
				+#endif
			
 
				+	;
			
 
				 
			
 
				 int starpu_profiling_status_set(int status)
			
 
				 {