Parcourir la source

- merge trunk

Olivier Aumage il y a 11 ans
Parent
commit
6253cb1a91

+ 25 - 9
doc/doxygen/chapters/08scheduling.doxy

@@ -10,6 +10,19 @@
 
 \section TaskSchedulingPolicy Task Scheduling Policy
 
+The basics of the scheduling policy are that
+
+<ul>
+<li>The scheduler gets to schedule tasks (<c>push</c> operation) when they become
+ready to be executed, i.e. they are not waiting for some tags, data dependencies
+or task dependencies.</li>
+<li>Workers pull tasks (<c>pop</c> operation) one by one from the scheduler.
+</ul>
+
+This means scheduling policies usually contain at least one queue of tasks to
+store them between the time when they become available, and the time when a
+worker gets to grab them.
+
 By default, StarPU uses the simple greedy scheduler <c>eager</c>. This is
 because it provides correct load balance even if the application codelets do not
 have performance models. If your application codelets have performance models
@@ -17,35 +30,38 @@ have performance models. If your application codelets have performance models
 to the environment variable \ref STARPU_SCHED. For instance <c>export
 STARPU_SCHED=dmda</c> . Use <c>help</c> to get the list of available schedulers.
 
-The <b>eager</b> scheduler uses a central task queue, from which workers draw tasks
-to work on. This however does not permit to prefetch data since the scheduling
+The <b>eager</b> scheduler uses a central task queue, from which all workers draw tasks
+to work on concurrently. This however does not permit to prefetch data since the scheduling
 decision is taken late. If a task has a non-0 priority, it is put at the front of the queue.
 
 The <b>prio</b> scheduler also uses a central task queue, but sorts tasks by
 priority (between -5 and 5).
 
-The <b>random</b> scheduler distributes tasks randomly according to assumed worker
+The <b>random</b> scheduler uses a queue per worker, and distributes tasks randomly according to assumed worker
 overall performance.
 
-The <b>ws</b> (work stealing) scheduler schedules tasks on the local worker by
+The <b>ws</b> (work stealing) scheduler uses a queue per worker, and schedules
+a task on the worker which released it by
 default. When a worker becomes idle, it steals a task from the most loaded
 worker.
 
 The <b>dm</b> (deque model) scheduler uses task execution performance models into account to
-perform an HEFT-similar scheduling strategy: it schedules tasks where their
-termination time will be minimal. The difference with HEFT is that tasks are
-scheduled in the order they become available.
+perform a HEFT-similar scheduling strategy: it schedules tasks where their
+termination time will be minimal. The difference with HEFT is that <b>dm</b>
+schedules tasks as soon as they become available, and thus in the order they
+become available, without taking priorities into account.
 
 The <b>dmda</b> (deque model data aware) scheduler is similar to dm, but it also takes
 into account data transfer time.
 
 The <b>dmdar</b> (deque model data aware ready) scheduler is similar to dmda,
-it also sorts tasks on per-worker queues by number of already-available data
+but it also sorts tasks on per-worker queues by number of already-available data
 buffers on the target device.
 
 The <b>dmdas</b> (deque model data aware sorted) scheduler is similar to dmdar,
 except that it sorts tasks by priority order, which allows to become even closer
-to HEFT.
+to HEFT by respecting priorities after having made the scheduling decision (but
+it still schedules tasks in the order they become available).
 
 The <b>heft</b> (heterogeneous earliest finish time) scheduler is a deprecated
 alias for <b>dmda</b>.

+ 1 - 1
src/datawizard/coherency.c

@@ -767,7 +767,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
 
 		/* If the replicate was not initialized yet, we have to do it now */
-		if (!local_replicate->initialized)
+		if (!(mode & STARPU_SCRATCH) && !local_replicate->initialized)
 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
 	}
 

+ 3 - 1
src/datawizard/memalloc.c

@@ -697,8 +697,10 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 	if (reclaim && !force)
 	{
 		static int warned;
+		char name[32];
+		_starpu_memory_node_get_name(node, name, sizeof(name));
 		if (!warned) {
-			_STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out. This message will not be printed again for further purges\n", node, (unsigned long) reclaim);
+			_STARPU_DISP("Not enough memory left on node %s. Your application working set is probably simply just hard to fit in the devices, but StarPU will cope with it by trying to purge %lu bytes out. This message will not be printed again for further purges\n", name, (unsigned long) reclaim);
 			warned = 1;
 		}
 	}

+ 31 - 0
src/datawizard/memory_nodes.c

@@ -106,6 +106,37 @@ unsigned starpu_memory_nodes_get_count(void)
 	return descr.nnodes;
 }
 
+void _starpu_memory_node_get_name(unsigned node, char *name, int size)
+{
+	const char *prefix;
+	switch (descr.nodes[node]) {
+	case STARPU_CPU_RAM:
+		prefix = "RAM";
+		break;
+	case STARPU_CUDA_RAM:
+		prefix = "CUDA";
+		break;
+	case STARPU_OPENCL_RAM:
+		prefix = "OpenCL";
+		break;
+	case STARPU_DISK_RAM:
+		prefix = "Disk";
+		break;
+	case STARPU_MIC_RAM:
+		prefix = "MIC";
+		break;
+	case STARPU_SCC_RAM:
+		prefix = "SCC_RAM";
+		break;
+	case STARPU_SCC_SHM:
+		prefix = "SCC_shared";
+		break;
+	case STARPU_UNUSED:
+		STARPU_ASSERT(0);
+	}
+	snprintf(name, size, "%s %u\n", prefix, descr.devid[node]);
+}
+
 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
 {
 	unsigned node;

+ 2 - 1
src/datawizard/memory_nodes.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -82,6 +82,7 @@ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid);
 void _starpu_memory_node_register_condition(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, unsigned memory_node);
 
 int _starpu_memory_node_get_devid(unsigned node);
+void _starpu_memory_node_get_name(unsigned node, char *name, int size);
 
 struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void);