Browse Source

Add STARPU_SIMGRID_CUDA_MALLOC_COST and STARPU_SIMGRID_QUEUE_MALLOC_COST environment variables to control whether simgrid simulates CUDA costs or not

Samuel Thibault 10 years ago
parent
commit
f433b48230

+ 7 - 0
doc/doxygen/chapters/21simgrid.doxy

@@ -121,6 +121,13 @@ case. Since during simgrid execution, the functions of the codelet are actually
 not called, one can use dummy functions such as the following to still permit
 CUDA or OpenCL execution:
 
+\section Tweaking simulation
+
+The simulation can be tweaked, to be able to tune it between a very accurate
+simulation and a very simple simulation (which is thus close to scheduling
+theory results), see the \ref STARPU_SIMGRID_CUDA_MALLOC_COST and \ref
+STARPU_SIMGRID_CUDA_QUEUE_COST environment variables.
+
 \section MPI applications
 
 StarPU-MPI applications can also be run in simgrid mode. It needs to be compiled

+ 16 - 0
doc/doxygen/chapters/40environment_variables.doxy

@@ -456,6 +456,22 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 </dd>
 
+<dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
+<dd>
+\anchor STARPU_SIMGRID_CUDA_MALLOC_COST
+\addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST
+When set to 1 (which is the default), CUDA malloc costs are taken into account
+in simgrid mode.
+</dd>
+
+<dt>STARPU_SIMGRID_CUDA_QUEUE_COST</dt>
+<dd>
+\anchor STARPU_SIMGRID_CUDA_QUEUE_COST
+\addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST
+When set to 1 (which is the default), CUDA task and transfer queueing costs are
+taken into account in simgrid mode.
+</dd>
+
 </dl>
 
 \section MiscellaneousAndDebug Miscellaneous And Debug

+ 5 - 3
src/core/simgrid.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012-2014  Université de Bordeaux
+ * Copyright (C) 2012-2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -350,7 +350,8 @@ void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct star
 		task->cond = cond;
 		task->next = NULL;
 		/* Sleep 10µs for the GPU task queueing */
-		MSG_process_sleep(0.000010);
+		if (_starpu_simgrid_queue_malloc_cost())
+			MSG_process_sleep(0.000010);
 		if (last_task[workerid])
 		{
 			/* Make this task depend on the previous */
@@ -566,7 +567,8 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 
 	/* Sleep 10µs for the GPU transfer queueing */
-	MSG_process_sleep(0.000010);
+	if (_starpu_simgrid_queue_malloc_cost())
+		MSG_process_sleep(0.000010);
 	transfer_submit(transfer);
 	/* Note: from here, transfer might be already freed */
 

+ 4 - 0
src/core/simgrid.h

@@ -46,6 +46,10 @@ void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 msg_as_t _starpu_simgrid_get_as_by_name(const char *name);
 #pragma weak starpu_mpi_world_rank
 extern int starpu_mpi_world_rank(void);
+
+#define _starpu_simgrid_cuda_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_CUDA_MALLOC_COST", 1)
+#define _starpu_simgrid_queue_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_QUEUE_MALLOC_COST", 1)
+
 #endif
 
 #endif // __SIMGRID_H__

+ 2 - 1
src/datawizard/datawizard.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -30,6 +30,7 @@ int __starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsig
 	int ret = 0;
 
 #ifdef STARPU_SIMGRID
+	/* XXX */
 	MSG_process_sleep(0.000010);
 #endif
 	STARPU_UYIELD();

+ 12 - 6
src/datawizard/malloc.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,6 +25,7 @@
 #include <drivers/opencl/driver_opencl.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/malloc.h>
+#include <core/simgrid.h>
 
 static size_t _malloc_align = sizeof(void*);
 
@@ -119,7 +120,8 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 		 * Ideally we would simulate this batching in 1MiB requests
 		 * instead of computing an average value.
 		 */
-		MSG_process_sleep((float) dim * 0.000650 / 1048576.);
+		if (_starpu_simgrid_cuda_malloc_cost())
+			MSG_process_sleep((float) dim * 0.000650 / 1048576.);
 #else /* STARPU_SIMGRID */
 		if (_starpu_can_submit_cuda_task())
 		{
@@ -410,7 +412,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 #endif
 			/* Sleep for the allocation */
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
-			MSG_process_sleep(0.000175);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000175);
 			if (!last[dst_node])
 				last[dst_node] = 1<<10;
 			addr = last[dst_node];
@@ -444,7 +447,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 				static uintptr_t last[STARPU_MAXNODES];
 				/* Sleep for the allocation */
 				STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
-				MSG_process_sleep(0.000175);
+				if (_starpu_simgrid_cuda_malloc_cost())
+					MSG_process_sleep(0.000175);
 				if (!last[dst_node])
 					last[dst_node] = 1<<10;
 				addr = last[dst_node];
@@ -523,7 +527,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			/* Sleep for the free */
-			MSG_process_sleep(0.000750);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 #else
 			cudaError_t err;
@@ -558,7 +563,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 			/* Sleep for the free */
-			MSG_process_sleep(0.000750);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
 #else
 			cl_int err;