11 years ago · 6253cb1a91
--- a/doc/doxygen/chapters/08scheduling.doxy
+++ b/doc/doxygen/chapters/08scheduling.doxy
@@ -10,6 +10,19 @@
 
																 \section TaskSchedulingPolicy Task Scheduling Policy
															
 
																+The basics of the scheduling policy are that
															
 
																+
															
 
																+<ul>
															
 
																+<li>The scheduler gets to schedule tasks (<c>push</c> operation) when they become
															
 
																+ready to be executed, i.e. they are not waiting for some tags, data dependencies
															
 
																+or task dependencies.</li>
															
 
																+<li>Workers pull tasks (<c>pop</c> operation) one by one from the scheduler.
															
 
																+</ul>
															
 
																+
															
 
																+This means scheduling policies usually contain at least one queue of tasks to
															
 
																+store them between the time when they become available, and the time when a
															
 
																+worker gets to grab them.
															
 
																+
															
 
																 By default, StarPU uses the simple greedy scheduler <c>eager</c>. This is
															
 
																 because it provides correct load balance even if the application codelets do not
															
 
																 have performance models. If your application codelets have performance models
															
@@ -17,35 +30,38 @@ have performance models. If your application codelets have performance models
 
																 to the environment variable \ref STARPU_SCHED. For instance <c>export
															
 
																 STARPU_SCHED=dmda</c> . Use <c>help</c> to get the list of available schedulers.
															
 
																-The <b>eager</b> scheduler uses a central task queue, from which workers draw tasks
															
 
																-to work on. This however does not permit to prefetch data since the scheduling
															
 
																+The <b>eager</b> scheduler uses a central task queue, from which all workers draw tasks
															
 
																+to work on concurrently. This however does not permit to prefetch data since the scheduling
															
 
																 decision is taken late. If a task has a non-0 priority, it is put at the front of the queue.
															
 
																 The <b>prio</b> scheduler also uses a central task queue, but sorts tasks by
															
 
																 priority (between -5 and 5).
															
 
																-The <b>random</b> scheduler distributes tasks randomly according to assumed worker
															
 
																+The <b>random</b> scheduler uses a queue per worker, and distributes tasks randomly according to assumed worker
															
 
																 overall performance.
															
 
																-The <b>ws</b> (work stealing) scheduler schedules tasks on the local worker by
															
 
																+The <b>ws</b> (work stealing) scheduler uses a queue per worker, and schedules
															
 
																+a task on the worker which released it by
															
 
																 default. When a worker becomes idle, it steals a task from the most loaded
															
 
																 worker.
															
 
																 The <b>dm</b> (deque model) scheduler uses task execution performance models into account to
															
 
																-perform an HEFT-similar scheduling strategy: it schedules tasks where their
															
 
																-termination time will be minimal. The difference with HEFT is that tasks are
															
 
																-scheduled in the order they become available.
															
 
																+perform a HEFT-similar scheduling strategy: it schedules tasks where their
															
 
																+termination time will be minimal. The difference with HEFT is that <b>dm</b>
															
 
																+schedules tasks as soon as they become available, and thus in the order they
															
 
																+become available, without taking priorities into account.
															
 
																 The <b>dmda</b> (deque model data aware) scheduler is similar to dm, but it also takes
															
 
																 into account data transfer time.
															
 
																 The <b>dmdar</b> (deque model data aware ready) scheduler is similar to dmda,
															
 
																-it also sorts tasks on per-worker queues by number of already-available data
															
 
																+but it also sorts tasks on per-worker queues by number of already-available data
															
 
																 buffers on the target device.
															
 
																 The <b>dmdas</b> (deque model data aware sorted) scheduler is similar to dmdar,
															
 
																 except that it sorts tasks by priority order, which allows to become even closer
															
 
																-to HEFT.
															
 
																+to HEFT by respecting priorities after having made the scheduling decision (but
															
 
																+it still schedules tasks in the order they become available).
															
 
																 The <b>heft</b> (heterogeneous earliest finish time) scheduler is a deprecated
															
 
																 alias for <b>dmda</b>.
															
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -767,7 +767,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 
																 		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
															
 
																 		/* If the replicate was not initialized yet, we have to do it now */
															
 
																-		if (!local_replicate->initialized)
															
 
																+		if (!(mode & STARPU_SCRATCH) && !local_replicate->initialized)
															
 
																 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
															
 
																 	}
															
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -697,8 +697,10 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 
																 	if (reclaim && !force)
															
 
																 	{
															
 
																 		static int warned;
															
 
																+		char name[32];
															
 
																+		_starpu_memory_node_get_name(node, name, sizeof(name));
															
 
																 		if (!warned) {
															
 
																-			_STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out. This message will not be printed again for further purges\n", node, (unsigned long) reclaim);
															
 
																+			_STARPU_DISP("Not enough memory left on node %s. Your application working set is probably simply just hard to fit in the devices, but StarPU will cope with it by trying to purge %lu bytes out. This message will not be printed again for further purges\n", name, (unsigned long) reclaim);
															
 
																 			warned = 1;
															
 
																 		}
															
 
																 	}
															
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -106,6 +106,37 @@ unsigned starpu_memory_nodes_get_count(void)
 
																 	return descr.nnodes;
															
 
																 }
															
 
																+void _starpu_memory_node_get_name(unsigned node, char *name, int size)
															
 
																+{
															
 
																+	const char *prefix;
															
 
																+	switch (descr.nodes[node]) {
															
 
																+	case STARPU_CPU_RAM:
															
 
																+		prefix = "RAM";
															
 
																+		break;
															
 
																+	case STARPU_CUDA_RAM:
															
 
																+		prefix = "CUDA";
															
 
																+		break;
															
 
																+	case STARPU_OPENCL_RAM:
															
 
																+		prefix = "OpenCL";
															
 
																+		break;
															
 
																+	case STARPU_DISK_RAM:
															
 
																+		prefix = "Disk";
															
 
																+		break;
															
 
																+	case STARPU_MIC_RAM:
															
 
																+		prefix = "MIC";
															
 
																+		break;
															
 
																+	case STARPU_SCC_RAM:
															
 
																+		prefix = "SCC_RAM";
															
 
																+		break;
															
 
																+	case STARPU_SCC_SHM:
															
 
																+		prefix = "SCC_shared";
															
 
																+		break;
															
 
																+	case STARPU_UNUSED:
															
 
																+		STARPU_ASSERT(0);
															
 
																+	}
															
 
																+	snprintf(name, size, "%s %u\n", prefix, descr.devid[node]);
															
 
																+}
															
 
																+
															
 
																 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
															
 
																 {
															
 
																 	unsigned node;
															
--- a/src/datawizard/memory_nodes.h
+++ b/src/datawizard/memory_nodes.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -82,6 +82,7 @@ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid);
 
																 void _starpu_memory_node_register_condition(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, unsigned memory_node);
															
 
																 int _starpu_memory_node_get_devid(unsigned node);
															
 
																+void _starpu_memory_node_get_name(unsigned node, char *name, int size);
															
 
																 struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void);