Browse Source

Add --enable-opencl-simulator option to combine PePU with simgrid

Samuel Thibault 12 years ago
parent
commit
4492f76e0b

+ 1 - 0
ChangeLog

@@ -58,6 +58,7 @@ New features:
 	  data with dynamic size can now be exchanged with StarPU-MPI.
   * Add experimental simgrid support, to simulation execution with various
     number of CPUs, GPUs, amount of memory, etc.
+  * Add support for OpenCL simulators (which provide simulated execution time)
 
 Changes:
   * Fix the block filter functions.

+ 8 - 0
configure.ac

@@ -777,6 +777,14 @@ AC_DEFINE_UNQUOTED(STARPU_MAXGORDONDEVS, [1], [maximum number of GORDON devices]
 #                                                                             #
 ###############################################################################
 
+AC_ARG_ENABLE(opencl-simulator, [AS_HELP_STRING([--enable-opencl-simulator],
+				[Enable the use of an OpenCL simulator])],
+				enable_opencl_simulator=$enableval, enable_opencl_simulator=no)
+if test x$enable_opencl_simulator = xyes; then
+	enable_simgrid=yes
+	AC_DEFINE(STARPU_OPENCL_SIMULATOR, 1, [Define this to enable using an OpenCL simulator])
+fi
+
 AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid],
 			[Enable simulating execution in simgrid])],
 			enable_simgrid=$enableval, enable_simgrid=no)

+ 6 - 0
doc/chapters/configuration.texi

@@ -150,6 +150,12 @@ contain the OpenCL shared libraries---e.g. @file{libOpenCL.so}. This defaults to
 @code{/lib} appended to the value given to @code{--with-opencl-dir}.
 @end defvr
 
+@defvr {Configure option} --enable-opencl-simulator
+Enable considering the provided OpenCL implementation as a simulator, i.e. use
+the kernel duration returned by OpenCL profiling information as wallclock time
+instead of the actual measured real time. This requires simgrid support.
+@end defvr
+
 @defvr {Configure option} --enable-maximplementations=@var{count}
 Allow for at most @var{count} codelet implementations for the same
 target device.  This information is then available as the

+ 7 - 3
src/core/simgrid.c

@@ -172,13 +172,17 @@ int main(int argc, char **argv)
 	return 0;
 }
 
-void _starpu_simgrid_execute_job(struct _starpu_job *j, enum starpu_perf_archtype perf_arch)
+void _starpu_simgrid_execute_job(struct _starpu_job *j, enum starpu_perf_archtype perf_arch, double length)
 {
 	struct starpu_task *task = j->task;
 	msg_task_t simgrid_task;
-	double length = starpu_task_expected_length(task, perf_arch, j->nimpl);
 
-	STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelets need to have a calibrated perfmodel");
+	if (isnan(length))
+	{
+		length = starpu_task_expected_length(task, perf_arch, j->nimpl);
+
+		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelets must have a calibrated perfmodel");
+	}
 
 	simgrid_task = MSG_task_create(_starpu_job_get_model_name(j),
 			length/1000000.0*MSG_get_host_speed(MSG_host_self()),

+ 1 - 1
src/core/simgrid.h

@@ -20,7 +20,7 @@
 #ifdef STARPU_SIMGRID
 #include <msg/msg.h>
 
-void _starpu_simgrid_execute_job(struct _starpu_job *job, enum starpu_perf_archtype perf_arch);
+void _starpu_simgrid_execute_job(struct _starpu_job *job, enum starpu_perf_archtype perf_arch, double length);
 msg_task_t _starpu_simgrid_transfer_task_create(unsigned src_node, unsigned dst_node, size_t size);
 void _starpu_simgrid_post_task(msg_task_t task, unsigned *finished, _starpu_pthread_mutex_t *mutex, _starpu_pthread_cond_t *cond);
 #endif

+ 1 - 1
src/core/topology.c

@@ -273,7 +273,7 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 
 #ifdef STARPU_SIMGRID
 	struct starpu_conf *conf = config->conf;
-	topology->nhwcpus = conf->ncpus;
+	topology->nhwcpus = conf->ncpus?conf->ncpus:1;
 	topology->nhwcudagpus = conf->ncuda;
 	topology->nhwopenclgpus = conf->nopencl;
 #else

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -149,7 +149,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 			_starpu_bind_thread_on_cpus(cpu_args->config, _starpu_get_combined_worker_struct(j->combined_workerid));
 		STARPU_ASSERT(func);
 #ifdef STARPU_SIMGRID
-		_starpu_simgrid_execute_job(j, perf_arch);
+		_starpu_simgrid_execute_job(j, perf_arch, NAN);
 #else
 		func(task->interfaces, task->cl_arg);
 #endif

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -309,7 +309,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	STARPU_ASSERT(func);
 
 #ifdef STARPU_SIMGRID
-	_starpu_simgrid_execute_job(j, args->perf_arch);
+	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
 #else
 	func(task->interfaces, task->cl_arg);
 #endif

+ 15 - 1
src/drivers/opencl/driver_opencl.c

@@ -663,7 +663,21 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 	STARPU_ASSERT(func);
 
 #ifdef STARPU_SIMGRID
-	_starpu_simgrid_execute_job(j, args->perf_arch);
+	double length = NAN;
+  #ifdef STARPU_OPENCL_SIMULATOR
+	func(task->interfaces, task->cl_arg);
+    #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
+      #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
+        #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
+      #else
+        #error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
+      #endif
+    #endif
+	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
+	STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
+	length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
+  #endif
+	_starpu_simgrid_execute_job(j, args->perf_arch, length);
 #else
 	func(task->interfaces, task->cl_arg);
 #endif

+ 8 - 2
src/profiling/profiling.c

@@ -57,8 +57,14 @@ static void _starpu_bus_reset_profiling_info(struct starpu_bus_profiling_info *b
  *	Global control of profiling
  */
 
-/* Disabled by default */
-int _starpu_profiling = 0;
+/* Disabled by default, unless simulating */
+int _starpu_profiling =
+#ifdef STARPU_SIMGRID
+	1
+#else
+	0
+#endif
+	;
 
 int starpu_profiling_status_set(int status)
 {