浏览代码

merge trunk

Samuel Thibault 12 年之前
父节点
当前提交
6b6461329c

+ 3 - 2
configure.ac

@@ -826,7 +826,7 @@ if test x$disable_asynchronous_copy = xyes ; then
 fi
 
 AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled)
-AC_ARG_ENABLE(asynchronous-cudacopy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
+AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
 			[disable asynchronous copy between CPU and CUDA devices])],
 			enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes)
 disable_asynchronous_cuda_copy=no
@@ -839,7 +839,7 @@ if test x$disable_asynchronous_cuda_copy = xyes ; then
 fi
 
 AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled)
-AC_ARG_ENABLE(asynchronous-openclcopy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
+AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
 			[disable asynchronous copy between CPU and OPENCL devices])],
 			enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes)
 disable_asynchronous_opencl_copy=no
@@ -1353,6 +1353,7 @@ AC_MSG_RESULT($nmaxbuffers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 		[how many buffers can be manipulated per task])
 
+# TODO: add option to choose maxnodes
 if test x$enable_simgrid = xyes ; then
 	# We still need the room for the virtual CUDA/OpenCL devices
 	maxnodes=16

+ 1 - 1
gcc-plugin/examples/Makefile.am

@@ -65,7 +65,7 @@ AM_CFLAGS =							\
   -fplugin="$(builddir)/../src/.libs/starpu.so"			\
   -fplugin-arg-starpu-include-dir="$(top_srcdir)/include"	\
   -fplugin-arg-starpu-verbose					\
-  -Wall
+  -Wall $(HWLOC_CFLAGS)
 
 #noinst_HEADERS =				\
 #  cholesky/cholesky.h				\

+ 1 - 1
gcc-plugin/src/Makefile.am

@@ -40,7 +40,7 @@ AM_CPPFLAGS =						\
   -I$(top_srcdir)/gcc-plugin/include			\
   -I$(top_srcdir)/include				\
   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1	\
-  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 
 AM_LDFLAGS = -module
 

+ 1 - 1
socl/src/Makefile.am

@@ -14,7 +14,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/socl/src
 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) -no-undefined

+ 3 - 3
src/core/topology.c

@@ -751,7 +751,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_CUDA;
 
 		struct handle_entry *entry;
@@ -826,7 +826,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_OPENCL;
 	}
 
@@ -951,7 +951,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 		config->workers[worker_idx].devid = cpu;
 		config->workers[worker_idx].worker_mask = STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 	}
 
 	topology->nworkers += topology->ncpus;

+ 2 - 0
src/datawizard/copy_driver.c

@@ -467,7 +467,9 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 {
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 	struct _starpu_async_channel *async_channel = async_data;
+#endif
 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
 

+ 8 - 7
src/datawizard/memory_nodes.c

@@ -108,19 +108,20 @@ unsigned starpu_memory_nodes_get_count(void)
 
 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
 {
-	unsigned nnodes;
+	unsigned node;
 	/* ATOMIC_ADD returns the new value ... */
-	nnodes = STARPU_ATOMIC_ADD(&descr.nnodes, 1);
+	node = STARPU_ATOMIC_ADD(&descr.nnodes, 1) - 1;
+	STARPU_ASSERT_MSG(node < STARPU_MAXNODES,"Too many nodes (%u)!", node);
 
-	descr.nodes[nnodes-1] = kind;
-	_STARPU_TRACE_NEW_MEM_NODE(nnodes-1);
+	descr.nodes[node] = kind;
+	_STARPU_TRACE_NEW_MEM_NODE(node);
 
-	descr.devid[nnodes-1] = devid;
+	descr.devid[node] = devid;
 
 	/* for now, there is no condition associated to that newly created node */
-	descr.condition_count[nnodes-1] = 0;
+	descr.condition_count[node] = 0;
 
-	return (nnodes-1);
+	return node;
 }
 
 #ifdef STARPU_SIMGRID

+ 7 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -682,10 +682,16 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
+
+	/* Expected end of this task on the workers */
 	double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
-	double max_exp_end = 0.0;
+
+	/* This is the minimum among the exp_end[] matrix */
 	double best_exp_end;
 
+	/* This is the maximum termination time of already-scheduled tasks over all workers */
+	double max_exp_end = 0.0;
+
 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
 
 	struct starpu_sched_ctx_iterator it;

+ 2 - 0
src/sched_policies/work_stealing_policy.c

@@ -287,6 +287,8 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	starpu_pthread_mutex_t *worker_sched_mutex;
 	starpu_pthread_cond_t *worker_sched_cond;
 	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
+
+	/* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
 	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
        
 

+ 1 - 1
starpufft/Makefile.am

@@ -15,7 +15,7 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
 
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la