浏览代码

Add STARPU_SIMGRID_CUDA_MALLOC_COST and STARPU_SIMGRID_QUEUE_MALLOC_COST environment variables to control whether simgrid simulates CUDA costs or not

Samuel Thibault 10 年之前
父节点
当前提交
f433b48230

+ 7 - 0
doc/doxygen/chapters/21simgrid.doxy

@@ -121,6 +121,13 @@ case. Since during simgrid execution, the functions of the codelet are actually
 not called, one can use dummy functions such as the following to still permit
 not called, one can use dummy functions such as the following to still permit
 CUDA or OpenCL execution:
 CUDA or OpenCL execution:
 
 
+\section Tweaking simulation
+
+The simulation can be tweaked, to be able to tune it between a very accurate
+simulation and a very simple simulation (which is thus close to scheduling
+theory results), see the \ref STARPU_SIMGRID_CUDA_MALLOC_COST and \ref
+STARPU_SIMGRID_CUDA_QUEUE_COST environment variables.
+
 \section MPI applications
 \section MPI applications
 
 
 StarPU-MPI applications can also be run in simgrid mode. It needs to be compiled
 StarPU-MPI applications can also be run in simgrid mode. It needs to be compiled

+ 16 - 0
doc/doxygen/chapters/40environment_variables.doxy

@@ -456,6 +456,22 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 communication cache.
 </dd>
 </dd>
 
 
+<dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
+<dd>
+\anchor STARPU_SIMGRID_CUDA_MALLOC_COST
+\addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST
+When set to 1 (which is the default), CUDA malloc costs are taken into account
+in simgrid mode.
+</dd>
+
+<dt>STARPU_SIMGRID_CUDA_QUEUE_COST</dt>
+<dd>
+\anchor STARPU_SIMGRID_CUDA_QUEUE_COST
+\addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST
+When set to 1 (which is the default), CUDA task and transfer queueing costs are
+taken into account in simgrid mode.
+</dd>
+
 </dl>
 </dl>
 
 
 \section MiscellaneousAndDebug Miscellaneous And Debug
 \section MiscellaneousAndDebug Miscellaneous And Debug

+ 5 - 3
src/core/simgrid.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2012-2014  Université de Bordeaux
+ * Copyright (C) 2012-2015  Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -350,7 +350,8 @@ void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct star
 		task->cond = cond;
 		task->cond = cond;
 		task->next = NULL;
 		task->next = NULL;
 		/* Sleep 10µs for the GPU task queueing */
 		/* Sleep 10µs for the GPU task queueing */
-		MSG_process_sleep(0.000010);
+		if (_starpu_simgrid_queue_malloc_cost())
+			MSG_process_sleep(0.000010);
 		if (last_task[workerid])
 		if (last_task[workerid])
 		{
 		{
 			/* Make this task depend on the previous */
 			/* Make this task depend on the previous */
@@ -566,7 +567,8 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 
 
 	/* Sleep 10µs for the GPU transfer queueing */
 	/* Sleep 10µs for the GPU transfer queueing */
-	MSG_process_sleep(0.000010);
+	if (_starpu_simgrid_queue_malloc_cost())
+		MSG_process_sleep(0.000010);
 	transfer_submit(transfer);
 	transfer_submit(transfer);
 	/* Note: from here, transfer might be already freed */
 	/* Note: from here, transfer might be already freed */
 
 

+ 4 - 0
src/core/simgrid.h

@@ -46,6 +46,10 @@ void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 msg_as_t _starpu_simgrid_get_as_by_name(const char *name);
 msg_as_t _starpu_simgrid_get_as_by_name(const char *name);
 #pragma weak starpu_mpi_world_rank
 #pragma weak starpu_mpi_world_rank
 extern int starpu_mpi_world_rank(void);
 extern int starpu_mpi_world_rank(void);
+
+#define _starpu_simgrid_cuda_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_CUDA_MALLOC_COST", 1)
+#define _starpu_simgrid_queue_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_QUEUE_MALLOC_COST", 1)
+
 #endif
 #endif
 
 
 #endif // __SIMGRID_H__
 #endif // __SIMGRID_H__

+ 2 - 1
src/datawizard/datawizard.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -30,6 +30,7 @@ int __starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsig
 	int ret = 0;
 	int ret = 0;
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
+	/* XXX */
 	MSG_process_sleep(0.000010);
 	MSG_process_sleep(0.000010);
 #endif
 #endif
 	STARPU_UYIELD();
 	STARPU_UYIELD();

+ 12 - 6
src/datawizard/malloc.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,6 +25,7 @@
 #include <drivers/opencl/driver_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/malloc.h>
 #include <datawizard/malloc.h>
+#include <core/simgrid.h>
 
 
 static size_t _malloc_align = sizeof(void*);
 static size_t _malloc_align = sizeof(void*);
 
 
@@ -119,7 +120,8 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 		 * Ideally we would simulate this batching in 1MiB requests
 		 * Ideally we would simulate this batching in 1MiB requests
 		 * instead of computing an average value.
 		 * instead of computing an average value.
 		 */
 		 */
-		MSG_process_sleep((float) dim * 0.000650 / 1048576.);
+		if (_starpu_simgrid_cuda_malloc_cost())
+			MSG_process_sleep((float) dim * 0.000650 / 1048576.);
 #else /* STARPU_SIMGRID */
 #else /* STARPU_SIMGRID */
 		if (_starpu_can_submit_cuda_task())
 		if (_starpu_can_submit_cuda_task())
 		{
 		{
@@ -410,7 +412,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 #endif
 #endif
 			/* Sleep for the allocation */
 			/* Sleep for the allocation */
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
-			MSG_process_sleep(0.000175);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000175);
 			if (!last[dst_node])
 			if (!last[dst_node])
 				last[dst_node] = 1<<10;
 				last[dst_node] = 1<<10;
 			addr = last[dst_node];
 			addr = last[dst_node];
@@ -444,7 +447,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 				static uintptr_t last[STARPU_MAXNODES];
 				static uintptr_t last[STARPU_MAXNODES];
 				/* Sleep for the allocation */
 				/* Sleep for the allocation */
 				STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 				STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
-				MSG_process_sleep(0.000175);
+				if (_starpu_simgrid_cuda_malloc_cost())
+					MSG_process_sleep(0.000175);
 				if (!last[dst_node])
 				if (!last[dst_node])
 					last[dst_node] = 1<<10;
 					last[dst_node] = 1<<10;
 				addr = last[dst_node];
 				addr = last[dst_node];
@@ -523,7 +527,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			/* Sleep for the free */
 			/* Sleep for the free */
-			MSG_process_sleep(0.000750);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 #else
 #else
 			cudaError_t err;
 			cudaError_t err;
@@ -558,7 +563,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 			/* Sleep for the free */
 			/* Sleep for the free */
-			MSG_process_sleep(0.000750);
+			if (_starpu_simgrid_cuda_malloc_cost())
+				MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
 #else
 #else
 			cl_int err;
 			cl_int err;