il y a 11 ans · 6253cb1a91
--- a/doc/doxygen/chapters/08scheduling.doxy
+++ b/doc/doxygen/chapters/08scheduling.doxy
@@ -10,6 +10,19 @@
 
				 
			
 
				 \section TaskSchedulingPolicy Task Scheduling Policy
			
 
				 
			
 
				+The basics of the scheduling policy are that
			
 
				+
			
 
				+<ul>
			
 
				+<li>The scheduler gets to schedule tasks (<c>push</c> operation) when they become
			
 
				+ready to be executed, i.e. they are not waiting for some tags, data dependencies
			
 
				+or task dependencies.</li>
			
 
				+<li>Workers pull tasks (<c>pop</c> operation) one by one from the scheduler.
			
 
				+</ul>
			
 
				+
			
 
				+This means scheduling policies usually contain at least one queue of tasks to
			
 
				+store them between the time when they become available, and the time when a
			
 
				+worker gets to grab them.
			
 
				+
			
 
				 By default, StarPU uses the simple greedy scheduler <c>eager</c>. This is
			
 
				 because it provides correct load balance even if the application codelets do not
			
 
				 have performance models. If your application codelets have performance models
			
@@ -17,35 +30,38 @@ have performance models. If your application codelets have performance models
 
				 to the environment variable \ref STARPU_SCHED. For instance <c>export
			
 
				 STARPU_SCHED=dmda</c> . Use <c>help</c> to get the list of available schedulers.
			
 
				 
			
 
				-The <b>eager</b> scheduler uses a central task queue, from which workers draw tasks
			
 
				-to work on. This however does not permit to prefetch data since the scheduling
			
 
				+The <b>eager</b> scheduler uses a central task queue, from which all workers draw tasks
			
 
				+to work on concurrently. This however does not permit to prefetch data since the scheduling
			
 
				 decision is taken late. If a task has a non-0 priority, it is put at the front of the queue.
			
 
				 
			
 
				 The <b>prio</b> scheduler also uses a central task queue, but sorts tasks by
			
 
				 priority (between -5 and 5).
			
 
				 
			
 
				-The <b>random</b> scheduler distributes tasks randomly according to assumed worker
			
 
				+The <b>random</b> scheduler uses a queue per worker, and distributes tasks randomly according to assumed worker
			
 
				 overall performance.
			
 
				 
			
 
				-The <b>ws</b> (work stealing) scheduler schedules tasks on the local worker by
			
 
				+The <b>ws</b> (work stealing) scheduler uses a queue per worker, and schedules
			
 
				+a task on the worker which released it by
			
 
				 default. When a worker becomes idle, it steals a task from the most loaded
			
 
				 worker.
			
 
				 
			
 
				 The <b>dm</b> (deque model) scheduler uses task execution performance models into account to
			
 
				-perform an HEFT-similar scheduling strategy: it schedules tasks where their
			
 
				-termination time will be minimal. The difference with HEFT is that tasks are
			
 
				-scheduled in the order they become available.
			
 
				+perform a HEFT-similar scheduling strategy: it schedules tasks where their
			
 
				+termination time will be minimal. The difference with HEFT is that <b>dm</b>
			
 
				+schedules tasks as soon as they become available, and thus in the order they
			
 
				+become available, without taking priorities into account.
			
 
				 
			
 
				 The <b>dmda</b> (deque model data aware) scheduler is similar to dm, but it also takes
			
 
				 into account data transfer time.
			
 
				 
			
 
				 The <b>dmdar</b> (deque model data aware ready) scheduler is similar to dmda,
			
 
				-it also sorts tasks on per-worker queues by number of already-available data
			
 
				+but it also sorts tasks on per-worker queues by number of already-available data
			
 
				 buffers on the target device.
			
 
				 
			
 
				 The <b>dmdas</b> (deque model data aware sorted) scheduler is similar to dmdar,
			
 
				 except that it sorts tasks by priority order, which allows to become even closer
			
 
				-to HEFT.
			
 
				+to HEFT by respecting priorities after having made the scheduling decision (but
			
 
				+it still schedules tasks in the order they become available).
			
 
				 
			
 
				 The <b>heft</b> (heterogeneous earliest finish time) scheduler is a deprecated
			
 
				 alias for <b>dmda</b>.
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -767,7 +767,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 
				 		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
			
 
				 
			
 
				 		/* If the replicate was not initialized yet, we have to do it now */
			
 
				-		if (!local_replicate->initialized)
			
 
				+		if (!(mode & STARPU_SCRATCH) && !local_replicate->initialized)
			
 
				 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
			
 
				 	}
			
 
				 
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -697,8 +697,10 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 
				 	if (reclaim && !force)
			
 
				 	{
			
 
				 		static int warned;
			
 
				+		char name[32];
			
 
				+		_starpu_memory_node_get_name(node, name, sizeof(name));
			
 
				 		if (!warned) {
			
 
				-			_STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out. This message will not be printed again for further purges\n", node, (unsigned long) reclaim);
			
 
				+			_STARPU_DISP("Not enough memory left on node %s. Your application working set is probably simply just hard to fit in the devices, but StarPU will cope with it by trying to purge %lu bytes out. This message will not be printed again for further purges\n", name, (unsigned long) reclaim);
			
 
				 			warned = 1;
			
 
				 		}
			
 
				 	}
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -106,6 +106,37 @@ unsigned starpu_memory_nodes_get_count(void)
 
				 	return descr.nnodes;
			
 
				 }
			
 
				 
			
 
				+void _starpu_memory_node_get_name(unsigned node, char *name, int size)
			
 
				+{
			
 
				+	const char *prefix;
			
 
				+	switch (descr.nodes[node]) {
			
 
				+	case STARPU_CPU_RAM:
			
 
				+		prefix = "RAM";
			
 
				+		break;
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+		prefix = "CUDA";
			
 
				+		break;
			
 
				+	case STARPU_OPENCL_RAM:
			
 
				+		prefix = "OpenCL";
			
 
				+		break;
			
 
				+	case STARPU_DISK_RAM:
			
 
				+		prefix = "Disk";
			
 
				+		break;
			
 
				+	case STARPU_MIC_RAM:
			
 
				+		prefix = "MIC";
			
 
				+		break;
			
 
				+	case STARPU_SCC_RAM:
			
 
				+		prefix = "SCC_RAM";
			
 
				+		break;
			
 
				+	case STARPU_SCC_SHM:
			
 
				+		prefix = "SCC_shared";
			
 
				+		break;
			
 
				+	case STARPU_UNUSED:
			
 
				+		STARPU_ASSERT(0);
			
 
				+	}
			
 
				+	snprintf(name, size, "%s %u\n", prefix, descr.devid[node]);
			
 
				+}
			
 
				+
			
 
				 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
			
 
				 {
			
 
				 	unsigned node;
			
--- a/src/datawizard/memory_nodes.h
+++ b/src/datawizard/memory_nodes.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -82,6 +82,7 @@ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid);
 
				 void _starpu_memory_node_register_condition(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, unsigned memory_node);
			
 
				 
			
 
				 int _starpu_memory_node_get_devid(unsigned node);
			
 
				+void _starpu_memory_node_get_name(unsigned node, char *name, int size);
			
 
				 
			
 
				 struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void);