10 年之前 · f433b48230
--- a/doc/doxygen/chapters/21simgrid.doxy
+++ b/doc/doxygen/chapters/21simgrid.doxy
@@ -121,6 +121,13 @@ case. Since during simgrid execution, the functions of the codelet are actually
 
				 not called, one can use dummy functions such as the following to still permit
			
 
				 CUDA or OpenCL execution:
			
 
				 
			
 
				+\section Tweaking simulation
			
 
				+
			
 
				+The simulation can be tweaked, to be able to tune it between a very accurate
			
 
				+simulation and a very simple simulation (which is thus close to scheduling
			
 
				+theory results), see the \ref STARPU_SIMGRID_CUDA_MALLOC_COST and \ref
			
 
				+STARPU_SIMGRID_CUDA_QUEUE_COST environment variables.
			
 
				+
			
 
				 \section MPI applications
			
 
				 
			
 
				 StarPU-MPI applications can also be run in simgrid mode. It needs to be compiled
			
--- a/doc/doxygen/chapters/40environment_variables.doxy
+++ b/doc/doxygen/chapters/40environment_variables.doxy
@@ -456,6 +456,22 @@ it prints messages on the standard output when data are added or removed from th
 
				 communication cache.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_SIMGRID_CUDA_MALLOC_COST
			
 
				+\addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST
			
 
				+When set to 1 (which is the default), CUDA malloc costs are taken into account
			
 
				+in simgrid mode.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_SIMGRID_CUDA_QUEUE_COST</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_SIMGRID_CUDA_QUEUE_COST
			
 
				+\addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST
			
 
				+When set to 1 (which is the default), CUDA task and transfer queueing costs are
			
 
				+taken into account in simgrid mode.
			
 
				+</dd>
			
 
				+
			
 
				 </dl>
			
 
				 
			
 
				 \section MiscellaneousAndDebug Miscellaneous And Debug
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2012-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2012-2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -350,7 +350,8 @@ void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct star
 
				 		task->cond = cond;
			
 
				 		task->next = NULL;
			
 
				 		/* Sleep 10µs for the GPU task queueing */
			
 
				-		MSG_process_sleep(0.000010);
			
 
				+		if (_starpu_simgrid_queue_malloc_cost())
			
 
				+			MSG_process_sleep(0.000010);
			
 
				 		if (last_task[workerid])
			
 
				 		{
			
 
				 			/* Make this task depend on the previous */
			
@@ -566,7 +567,8 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 
				 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				 
			
 
				 	/* Sleep 10µs for the GPU transfer queueing */
			
 
				-	MSG_process_sleep(0.000010);
			
 
				+	if (_starpu_simgrid_queue_malloc_cost())
			
 
				+		MSG_process_sleep(0.000010);
			
 
				 	transfer_submit(transfer);
			
 
				 	/* Note: from here, transfer might be already freed */
			
 
				 
			
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -46,6 +46,10 @@ void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 
				 msg_as_t _starpu_simgrid_get_as_by_name(const char *name);
			
 
				 #pragma weak starpu_mpi_world_rank
			
 
				 extern int starpu_mpi_world_rank(void);
			
 
				+
			
 
				+#define _starpu_simgrid_cuda_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_CUDA_MALLOC_COST", 1)
			
 
				+#define _starpu_simgrid_queue_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_QUEUE_MALLOC_COST", 1)
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 #endif // __SIMGRID_H__
			
--- a/src/datawizard/datawizard.c
+++ b/src/datawizard/datawizard.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -30,6 +30,7 @@ int __starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsig
 
				 	int ret = 0;
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				+	/* XXX */
			
 
				 	MSG_process_sleep(0.000010);
			
 
				 #endif
			
 
				 	STARPU_UYIELD();
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2010, 2012-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -25,6 +25,7 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #include <datawizard/memory_manager.h>
			
 
				 #include <datawizard/malloc.h>
			
 
				+#include <core/simgrid.h>
			
 
				 
			
 
				 static size_t _malloc_align = sizeof(void*);
			
 
				 
			
@@ -119,7 +120,8 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
				 		 * Ideally we would simulate this batching in 1MiB requests
			
 
				 		 * instead of computing an average value.
			
 
				 		 */
			
 
				-		MSG_process_sleep((float) dim * 0.000650 / 1048576.);
			
 
				+		if (_starpu_simgrid_cuda_malloc_cost())
			
 
				+			MSG_process_sleep((float) dim * 0.000650 / 1048576.);
			
 
				 #else /* STARPU_SIMGRID */
			
 
				 		if (_starpu_can_submit_cuda_task())
			
 
				 		{
			
@@ -410,7 +412,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 
				 #endif
			
 
				 			/* Sleep for the allocation */
			
 
				 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				-			MSG_process_sleep(0.000175);
			
 
				+			if (_starpu_simgrid_cuda_malloc_cost())
			
 
				+				MSG_process_sleep(0.000175);
			
 
				 			if (!last[dst_node])
			
 
				 				last[dst_node] = 1<<10;
			
 
				 			addr = last[dst_node];
			
@@ -444,7 +447,8 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size)
 
				 				static uintptr_t last[STARPU_MAXNODES];
			
 
				 				/* Sleep for the allocation */
			
 
				 				STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				-				MSG_process_sleep(0.000175);
			
 
				+				if (_starpu_simgrid_cuda_malloc_cost())
			
 
				+					MSG_process_sleep(0.000175);
			
 
				 				if (!last[dst_node])
			
 
				 					last[dst_node] = 1<<10;
			
 
				 				addr = last[dst_node];
			
@@ -523,7 +527,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 
				 #ifdef STARPU_SIMGRID
			
 
				 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				 			/* Sleep for the free */
			
 
				-			MSG_process_sleep(0.000750);
			
 
				+			if (_starpu_simgrid_cuda_malloc_cost())
			
 
				+				MSG_process_sleep(0.000750);
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
			
 
				 #else
			
 
				 			cudaError_t err;
			
@@ -558,7 +563,8 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 
				 #ifdef STARPU_SIMGRID
			
 
				 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				 			/* Sleep for the free */
			
 
				-			MSG_process_sleep(0.000750);
			
 
				+			if (_starpu_simgrid_cuda_malloc_cost())
			
 
				+				MSG_process_sleep(0.000750);
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
			
 
				 #else
			
 
				 			cl_int err;