瀏覽代碼

Add STARPU_CODELET_SIMGRID_EXECUTE flag to allow executing codelet functions even in simgrid mode

Samuel Thibault 9 年之前
父節點
當前提交
6a03e93123

+ 1 - 0
ChangeLog

@@ -193,6 +193,7 @@ Small features:
   * New function starpu_codelet_unpack_args_and_copyleft() which
     allows to copy in a new buffer values which have not been unpacked by
     the current call
+  * Add STARPU_CODELET_SIMGRID_EXECUTE flag.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 4 - 3
doc/doxygen/chapters/21simgrid.doxy

@@ -34,7 +34,8 @@ StarPU in Simgrid mode anyway.
 To be able to run the application with e.g. CUDA simulation on a system which
 does not have CUDA installed, one can fill the cuda_funcs with (void*)1, to
 express that there is a CUDA implementation, even if one does not actually
-provide it. StarPU will never actually run it in Simgrid mode anyway.
+provide it. StarPU will not actually run it in Simgrid mode anyway by default
+(unless the ::STARPU_CODELET_SIMGRID_EXECUTE flag is set in the codelet)
 
 \section Calibration Calibration
 
@@ -118,8 +119,8 @@ If the desktop machine does not have CUDA or OpenCL, StarPU is still able to
 use simgrid to simulate execution with CUDA/OpenCL devices, but the application
 source code will probably disable the CUDA and OpenCL codelets in thatcd sc
 case. Since during simgrid execution, the functions of the codelet are actually
-not called, one can use dummy functions such as the following to still permit
-CUDA or OpenCL execution:
+not called by default, one can use dummy functions such as the following to
+still permit CUDA or OpenCL execution.
 
 \section SimulationExamples Simulation examples
 

+ 6 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -133,6 +133,9 @@ Value to be set in starpu_codelet::cuda_flags to allow asynchronous CUDA kernel
 \def STARPU_OPENCL_ASYNC
 Value to be set in starpu_codelet::opencl_flags to allow asynchronous OpenCL kernel execution.
 
+\def STARPU_CODELET_SIMGRID_EXECUTE
+Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode.
+
 \typedef starpu_cpu_func_t
 \ingroup API_Codelet_And_Tasks
 CPU implementation of a codelet.
@@ -358,6 +361,9 @@ starpu_codelet_display_stats() for details).
 Optional name of the codelet. This can be useful for debugging
 purposes.
 
+\var const char *starpu_codelet::flags
+Various flags for the codelet.
+
 \fn void starpu_codelet_init(struct starpu_codelet *cl)
 \ingroup API_Codelet_And_Tasks
 Initialize \p cl with default values. Codelets should

+ 3 - 0
include/starpu_task.h

@@ -43,6 +43,7 @@ extern "C"
 #define STARPU_MIC	((1ULL)<<7)
 #define STARPU_SCC	((1ULL)<<8)
 
+#define STARPU_CODELET_SIMGRID_EXECUTE	(1<<0)
 #define STARPU_CUDA_ASYNC	(1<<0)
 #define STARPU_OPENCL_ASYNC	(1<<0)
 
@@ -120,6 +121,8 @@ struct starpu_codelet
 	unsigned long per_worker_stats[STARPU_NMAXWORKERS];
 
 	const char *name;
+
+	int flags;
 };
 
 struct starpu_task

+ 5 - 2
src/drivers/cpu/driver_cpu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2015  Université de Bordeaux
+ * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010-2015  CNRS
  * Copyright (C) 2011  Télécom-SudParis
@@ -104,7 +104,10 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 		{
 			_STARPU_TRACE_START_EXECUTING();
 #ifdef STARPU_SIMGRID
-			_starpu_simgrid_submit_job(cpu_args->workerid, j, perf_arch, NAN, NULL, NULL, NULL);
+			if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE)
+				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+			else
+				_starpu_simgrid_submit_job(cpu_args->workerid, j, perf_arch, NAN, NULL, NULL, NULL);
 #else
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif

+ 4 - 1
src/drivers/cuda/driver_cuda.c

@@ -458,7 +458,10 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worke
 #ifdef STARPU_SIMGRID
 		int async = task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC;
 		unsigned workerid = worker->workerid;
-		_starpu_simgrid_submit_job(workerid, j, &worker->perf_arch, NAN,
+		if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE & !async)
+			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+		else
+			_starpu_simgrid_submit_job(workerid, j, &worker->perf_arch, NAN,
 				async ? &task_finished[workerid][pipeline_idx] : NULL,
 				async ? &task_mutex[workerid][pipeline_idx] : NULL,
 				async ? &task_cond[workerid][pipeline_idx] : NULL);

+ 24 - 15
src/drivers/opencl/driver_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2015  Université de Bordeaux
+ * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2011  Télécom-SudParis
@@ -929,21 +929,30 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 		_STARPU_TRACE_START_EXECUTING();
 #ifdef STARPU_SIMGRID
 		double length = NAN;
-	  #ifdef STARPU_OPENCL_SIMULATOR
-		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
-	    #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
-	      #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
-		#define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
-	      #else
-		#error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
-	      #endif
-	    #endif
-		struct starpu_profiling_task_info *profiling_info = task->profiling_info;
-		STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
-		length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
-	  #endif
 		int async = task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC;
-		_starpu_simgrid_submit_job(worker->workerid, j, &worker->perf_arch, length,
+		int simulate = 1;
+		if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE & !async)
+		{
+			/* Actually execute function */
+			simulate = 0;
+			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+		#ifdef STARPU_OPENCL_SIMULATOR
+		    #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
+		      #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
+			#define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
+		      #else
+			#error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
+		      #endif
+		    #endif
+			struct starpu_profiling_task_info *profiling_info = task->profiling_info;
+			STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
+			length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
+			/* And give the simulated time to simgrid */
+			simulate = 1;
+		#endif
+		}
+		if (simulate)
+			_starpu_simgrid_submit_job(worker->workerid, j, &worker->perf_arch, length,
 				async ? &task_finished[worker->devid][pipeline_idx] : NULL,
 				async ? &task_mutex[worker->devid][pipeline_idx] : NULL,
 				async ? &task_cond[worker->devid][pipeline_idx] : NULL);

+ 1 - 0
tests/datawizard/locality.c

@@ -58,6 +58,7 @@ static struct starpu_codelet cl =
 		STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY,
 		STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY,
 	},
+	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
 };
 
 int main(int argc, char *argv[])