лет назад: 6 · 154ae9c4fa
--- a/ChangeLog
+++ b/ChangeLog
@@ -66,6 +66,7 @@ New features:
 
				   * New StarPU-RM resource management module to share processor cores and
			
 
				     accelerator devices with other parallel runtime systems. Use
			
 
				     --enable-starpurm option to activate it.
			
 
				+  * modular-gemm scheduler.
			
 
				 
			
 
				 Small features:
			
 
				   * Scheduling contexts may now be associated a user data pointer at creation
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2011-2017                                Inria
			
 
				 # Copyright (C) 2012                                     Benjamin Lorendeau
			
 
				-# Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+# Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				 # Copyright (C) 2010-2015,2017,2018                      CNRS
			
 
				 # Copyright (C) 2013                                     Simon Archipoff
			
 
				 #
			
@@ -293,6 +293,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 
				 	sched_policies/component_work_stealing.c				\
			
 
				 	sched_policies/modular_eager.c				\
			
 
				 	sched_policies/modular_eager_prefetching.c				\
			
 
				+	sched_policies/modular_gemm.c				\
			
 
				 	sched_policies/modular_prio.c				\
			
 
				 	sched_policies/modular_prio_prefetching.c				\
			
 
				 	sched_policies/modular_random.c				\
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2017                                Inria
			
 
				  * Copyright (C) 2013                                     Simon Archipoff
			
 
				- * Copyright (C) 2008-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -56,6 +56,7 @@ static struct starpu_sched_policy *predefined_policies[] =
 
				 {
			
 
				 	&_starpu_sched_modular_eager_policy,
			
 
				 	&_starpu_sched_modular_eager_prefetching_policy,
			
 
				+	&_starpu_sched_modular_gemm_policy,
			
 
				 	&_starpu_sched_modular_prio_policy,
			
 
				 	&_starpu_sched_modular_prio_prefetching_policy,
			
 
				 	&_starpu_sched_modular_random_policy,
			
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2015,2017                           Inria
			
 
				- * Copyright (C) 2008-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2017,2019                           Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2017                           CNRS
			
 
				  * Copyright (C) 2013                                     Simon Archipoff
			
 
				  *
			
@@ -92,6 +92,7 @@ extern struct starpu_sched_policy _starpu_sched_peager_policy;
 
				 extern struct starpu_sched_policy _starpu_sched_heteroprio_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_modular_eager_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy;
			
 
				+extern struct starpu_sched_policy _starpu_sched_modular_gemm_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_modular_prio_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_modular_random_policy;
			
--- a/src/sched_policies/modular_gemm.c
+++ b/src/sched_policies/modular_gemm.c
@@ -0,0 +1,188 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013-2015,2017                           Inria
			
 
				+ * Copyright (C) 2017                                     CNRS
			
 
				+ * Copyright (C) 2014,2016-2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2013                                     Simon Archipoff
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/* This scheduler runs only GEMMs on GPUs, and tries to feed them with as many
			
 
				+ * GEMMs as possible. */
			
 
				+
			
 
				+#include <starpu_sched_component.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+
			
 
				+/* Optionally, it can take memory affinity into account, to avoid too many GPU
			
 
				+ * data transfers */
			
 
				+
			
 
				+#define MEMORY_AFFINITY
			
 
				+
			
 
				+struct child_data {
			
 
				+	double expected_start;
			
 
				+	double predicted;
			
 
				+	double predicted_transfer;
			
 
				+	double expected_end;
			
 
				+	unsigned child;
			
 
				+};
			
 
				+
			
 
				+static int compar(const void *_a, const void *_b)
			
 
				+{
			
 
				+	const struct child_data *a = _a;
			
 
				+	const struct child_data *b = _b;
			
 
				+	if (a->expected_end < b->expected_end)
			
 
				+		return -1;
			
 
				+	if (a->expected_end == b->expected_end)
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int gemm_push_task(struct starpu_sched_component * component, struct starpu_task * task)
			
 
				+{
			
 
				+	unsigned n = component->nchildren;
			
 
				+	unsigned i;
			
 
				+
			
 
				+	/* See if it's a GEMM task */
			
 
				+	const char *name = starpu_task_get_model_name(task);
			
 
				+	//fprintf(stderr, "it's %s\n", name);
			
 
				+
			
 
				+	if (!strcmp(name, "gemm") ||
			
 
				+		!strcmp(name, "dgemm") ||
			
 
				+		!strcmp(name, "sgemm") ||
			
 
				+		!strcmp(name, "chol_model_22") ||
			
 
				+		!strcmp(name, "starpu_dlu_lu_model_22") ||
			
 
				+		!strcmp(name, "starpu_slu_lu_model_22"))
			
 
				+	{
			
 
				+		/* It's a GEMM, try to push to GPUs */
			
 
				+
			
 
				+		struct child_data child_data[n];
			
 
				+
			
 
				+		for (i = 0; i < n; i++)
			
 
				+		{
			
 
				+			child_data[i].expected_end = -1;
			
 
				+			child_data[i].child = i;
			
 
				+		}
			
 
				+
			
 
				+		/* Look at GPU availability time */
			
 
				+		for (i = 0; i < n; i++)
			
 
				+		{
			
 
				+			struct starpu_sched_component *child = component->children[i];
			
 
				+			double predicted;
			
 
				+			if (starpu_sched_component_execute_preds(child, task, &predicted))
			
 
				+			{
			
 
				+				double expected_start;
			
 
				+				child_data[i].expected_start =
			
 
				+					expected_start = child->estimated_end(child);
			
 
				+				child_data[i].predicted = predicted;
			
 
				+				child_data[i].expected_end = expected_start 
			
 
				+					+ predicted;
			
 
				+
			
 
				+#ifdef MEMORY_AFFINITY
			
 
				+				double predicted_transfer;
			
 
				+				child_data[i].predicted_transfer =
			
 
				+					predicted_transfer = starpu_sched_component_transfer_length(child, task);
			
 
				+				child_data[i].expected_end += predicted_transfer;
			
 
				+#endif
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* Sort by increasing expected end */
			
 
				+		qsort(child_data, n, sizeof(*child_data), compar);
			
 
				+
			
 
				+		/* Try to push to the GPU with minimum availability time, to balance the load.  */
			
 
				+		for (i = 0; i < n; i++)
			
 
				+		{
			
 
				+			if (child_data[i].expected_end != -1)
			
 
				+			{
			
 
				+				struct starpu_sched_component *child = component->children[child_data[i].child];
			
 
				+
			
 
				+				/* Note it in the task so that estimated_end() has it */
			
 
				+				task->predicted = child_data[i].predicted;
			
 
				+				task->predicted_transfer = child_data[i].predicted_transfer;
			
 
				+
			
 
				+				int ret = starpu_sched_component_push_task(component,child,task);
			
 
				+				if (!ret)
			
 
				+					/* Ok, this GPU took it */
			
 
				+					return 0;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* It's not a GEMM, or no GPU wanted to take it, find somebody else */
			
 
				+	int nimpl;
			
 
				+	for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+	{
			
 
				+		for (i = 0; i < n; i++)
			
 
				+		{
			
 
				+			struct starpu_sched_component *child = component->children[i];
			
 
				+			int workerid;
			
 
				+			for(workerid = starpu_bitmap_first(child->workers);
			
 
				+				workerid != -1;
			
 
				+				workerid = starpu_bitmap_next(child->workers, workerid))
			
 
				+			{
			
 
				+				if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER
			
 
				+				 && (starpu_worker_can_execute_task(workerid,task,nimpl)
			
 
				+				   || starpu_combined_worker_can_execute_task(workerid, task, nimpl)))
			
 
				+				{
			
 
				+					int ret = starpu_sched_component_push_task(component,child,task);
			
 
				+					if (!ret)
			
 
				+						return 0;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	/* FIFOs are full */
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+struct starpu_sched_component *starpu_sched_component_gemm_create(struct starpu_sched_tree *tree, void *params STARPU_ATTRIBUTE_UNUSED)
			
 
				+{
			
 
				+	struct starpu_sched_component *component = starpu_sched_component_create(tree, "gemm");
			
 
				+
			
 
				+	component->push_task = gemm_push_task;
			
 
				+
			
 
				+	return component;
			
 
				+}
			
 
				+
			
 
				+static void initialize_gemm_center_policy(unsigned sched_ctx_id)
			
 
				+{
			
 
				+	starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_gemm_create, NULL,
			
 
				+			STARPU_SCHED_SIMPLE_DECIDE_MEMNODES |
			
 
				+			STARPU_SCHED_SIMPLE_FIFO_ABOVE |
			
 
				+			STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO |
			
 
				+			STARPU_SCHED_SIMPLE_FIFOS_BELOW |
			
 
				+			STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO |
			
 
				+			STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id);
			
 
				+}
			
 
				+
			
 
				+static void deinitialize_gemm_center_policy(unsigned sched_ctx_id)
			
 
				+{
			
 
				+	struct starpu_sched_tree *tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				+	starpu_sched_tree_destroy(tree);
			
 
				+}
			
 
				+
			
 
				+struct starpu_sched_policy _starpu_sched_modular_gemm_policy =
			
 
				+{
			
 
				+	.init_sched = initialize_gemm_center_policy,
			
 
				+	.deinit_sched = deinitialize_gemm_center_policy,
			
 
				+	.add_workers = starpu_sched_tree_add_workers,
			
 
				+	.remove_workers = starpu_sched_tree_remove_workers,
			
 
				+	.push_task = starpu_sched_tree_push_task,
			
 
				+	.pop_task = starpu_sched_tree_pop_task,
			
 
				+	.pre_exec_hook = starpu_sched_component_worker_pre_exec_hook,
			
 
				+	.post_exec_hook = starpu_sched_component_worker_post_exec_hook,
			
 
				+	.pop_every_task = NULL,
			
 
				+	.policy_name = "modular-gemm",
			
 
				+	.policy_description = "gemm modular policy",
			
 
				+	.worker_type = STARPU_WORKER_LIST,
			
 
				+};