лет назад: 12 · 6b6461329c
--- a/configure.ac
+++ b/configure.ac
@@ -826,7 +826,7 @@ if test x$disable_asynchronous_copy = xyes ; then
 
				 fi
			
 
				 
			
 
				 AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled)
			
 
				-AC_ARG_ENABLE(asynchronous-cudacopy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
			
 
				+AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
			
 
				 			[disable asynchronous copy between CPU and CUDA devices])],
			
 
				 			enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes)
			
 
				 disable_asynchronous_cuda_copy=no
			
@@ -839,7 +839,7 @@ if test x$disable_asynchronous_cuda_copy = xyes ; then
 
				 fi
			
 
				 
			
 
				 AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled)
			
 
				-AC_ARG_ENABLE(asynchronous-openclcopy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
			
 
				+AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
			
 
				 			[disable asynchronous copy between CPU and OPENCL devices])],
			
 
				 			enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes)
			
 
				 disable_asynchronous_opencl_copy=no
			
@@ -1353,6 +1353,7 @@ AC_MSG_RESULT($nmaxbuffers)
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
			
 
				 		[how many buffers can be manipulated per task])
			
 
				 
			
 
				+# TODO: add option to choose maxnodes
			
 
				 if test x$enable_simgrid = xyes ; then
			
 
				 	# We still need the room for the virtual CUDA/OpenCL devices
			
 
				 	maxnodes=16
			
--- a/gcc-plugin/examples/Makefile.am
+++ b/gcc-plugin/examples/Makefile.am
@@ -65,7 +65,7 @@ AM_CFLAGS =							\
 
				   -fplugin="$(builddir)/../src/.libs/starpu.so"			\
			
 
				   -fplugin-arg-starpu-include-dir="$(top_srcdir)/include"	\
			
 
				   -fplugin-arg-starpu-verbose					\
			
 
				-  -Wall
			
 
				+  -Wall $(HWLOC_CFLAGS)
			
 
				 
			
 
				 #noinst_HEADERS =				\
			
 
				 #  cholesky/cholesky.h				\
			
--- a/gcc-plugin/src/Makefile.am
+++ b/gcc-plugin/src/Makefile.am
@@ -40,7 +40,7 @@ AM_CPPFLAGS =						\
 
				   -I$(top_srcdir)/gcc-plugin/include			\
			
 
				   -I$(top_srcdir)/include				\
			
 
				   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1	\
			
 
				-  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
			
 
				+  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
			
 
				 
			
 
				 AM_LDFLAGS = -module
			
 
				 
			
--- a/socl/src/Makefile.am
+++ b/socl/src/Makefile.am
@@ -14,7 +14,7 @@
 
				 #
			
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 
			
 
				-AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
			
 
				+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
			
 
				 libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/socl/src
			
 
				 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) -no-undefined
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -751,7 +751,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 
				 		config->workers[worker_idx].devid = devid;
			
 
				 		config->workers[worker_idx].perf_arch = arch;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
			
 
				-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
			
 
				+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
			
 
				 		config->worker_mask |= STARPU_CUDA;
			
 
				 
			
 
				 		struct handle_entry *entry;
			
@@ -826,7 +826,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 
				 		config->workers[worker_idx].devid = devid;
			
 
				 		config->workers[worker_idx].perf_arch = arch;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
			
 
				-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
			
 
				+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
			
 
				 		config->worker_mask |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
@@ -951,7 +951,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 
				 		config->workers[worker_idx].devid = cpu;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_CPU;
			
 
				 		config->worker_mask |= STARPU_CPU;
			
 
				-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cpu].workerid);
			
 
				+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
			
 
				 	}
			
 
				 
			
 
				 	topology->nworkers += topology->ncpus;
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -467,7 +467,9 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
				 
			
 
				 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
			
 
				 {
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				 	struct _starpu_async_channel *async_channel = async_data;
			
 
				+#endif
			
 
				 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
			
 
				 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
			
 
				 
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -108,19 +108,20 @@ unsigned starpu_memory_nodes_get_count(void)
 
				 
			
 
				 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
			
 
				 {
			
 
				-	unsigned nnodes;
			
 
				+	unsigned node;
			
 
				 	/* ATOMIC_ADD returns the new value ... */
			
 
				-	nnodes = STARPU_ATOMIC_ADD(&descr.nnodes, 1);
			
 
				+	node = STARPU_ATOMIC_ADD(&descr.nnodes, 1) - 1;
			
 
				+	STARPU_ASSERT_MSG(node < STARPU_MAXNODES,"Too many nodes (%u)!", node);
			
 
				 
			
 
				-	descr.nodes[nnodes-1] = kind;
			
 
				-	_STARPU_TRACE_NEW_MEM_NODE(nnodes-1);
			
 
				+	descr.nodes[node] = kind;
			
 
				+	_STARPU_TRACE_NEW_MEM_NODE(node);
			
 
				 
			
 
				-	descr.devid[nnodes-1] = devid;
			
 
				+	descr.devid[node] = devid;
			
 
				 
			
 
				 	/* for now, there is no condition associated to that newly created node */
			
 
				-	descr.condition_count[nnodes-1] = 0;
			
 
				+	descr.condition_count[node] = 0;
			
 
				 
			
 
				-	return (nnodes-1);
			
 
				+	return node;
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -682,10 +682,16 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
				 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
			
 
				 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
			
 
				 	double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
			
 
				+
			
 
				+	/* Expected end of this task on the workers */
			
 
				 	double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
			
 
				-	double max_exp_end = 0.0;
			
 
				+
			
 
				+	/* This is the minimum among the exp_end[] matrix */
			
 
				 	double best_exp_end;
			
 
				 
			
 
				+	/* This is the maximum termination time of already-scheduled tasks over all workers */
			
 
				+	double max_exp_end = 0.0;
			
 
				+
			
 
				 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -287,6 +287,8 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 
				 	starpu_pthread_mutex_t *worker_sched_mutex;
			
 
				 	starpu_pthread_cond_t *worker_sched_cond;
			
 
				 	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
			
 
				+
			
 
				+	/* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
			
 
				        
			
 
				 
			
--- a/starpufft/Makefile.am
+++ b/starpufft/Makefile.am
@@ -15,7 +15,7 @@
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 #
			
 
				 
			
 
				-AM_CFLAGS = $(GLOBAL_AM_CFLAGS)
			
 
				+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
			
 
				 
			
 
				 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la