Browse Source

merge trunk

Corentin Salingue 12 years ago
parent
commit
1abb2dfa5d

+ 4 - 0
ChangeLog

@@ -29,6 +29,10 @@ New features:
 	  allocate data correctly, and to submit the matching receive of
 	  the envelope.
 
+Small features:
+  * Add cl_arg_free field to enable automatic free(cl_arg) on task
+    destroy.
+
 StarPU 1.1.0 (svn revision xxxx)
 ==============================================
 

+ 5 - 3
STARPU-VERSION

@@ -17,19 +17,21 @@
 # - If any interfaces have been removed or changed since the last
 #   public release, then set age to 0. change
 
+# This is the tarball version, major.minor
+STARPU_EFFECTIVE_VERSION=1.2
+
 # Note for StarPU 1.1: we have changed ABI
 
 # Libtool interface versioning (info "(libtool) Versioning").
-LIBSTARPU_INTERFACE_CURRENT=1	# increment upon ABI change
+LIBSTARPU_INTERFACE_CURRENT=2	# increment upon ABI change
 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPU_INTERFACE_AGE=0	# set to CURRENT - PREVIOUS interface
-STARPU_EFFECTIVE_VERSION=1.0
 
 LIBSTARPUFFT_INTERFACE_CURRENT=1	# increment upon ABI change
 LIBSTARPUFFT_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUFFT_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 
-LIBSTARPUMPI_INTERFACE_CURRENT=1	# increment upon ABI change
+LIBSTARPUMPI_INTERFACE_CURRENT=2	# increment upon ABI change
 LIBSTARPUMPI_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUMPI_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 

+ 6 - 4
configure.ac

@@ -16,8 +16,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AC_INIT([StarPU], [1.2.0], [starpu-devel@lists.gforge.inria.fr],
-  [starpu], [http://runtime.bordeaux.inria.fr/StarPU/])
+AC_INIT([StarPU], [1.2.0], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://runtime.bordeaux.inria.fr/StarPU/])
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_AUX_DIR([build-aux])
 
@@ -26,6 +25,8 @@ dnl Versioning.
 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
 STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
+STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`"
+dnl we do not want the rcXX in the release version. we would like to use sed -r 's/[a-z]+.*//' to remove any string but the -r option is not portable
 AC_SUBST([STARPU_MAJOR_VERSION])
 AC_SUBST([STARPU_MINOR_VERSION])
 AC_SUBST([STARPU_RELEASE_VERSION])
@@ -800,7 +801,7 @@ if test x$disable_asynchronous_copy = xyes ; then
 fi
 
 AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled)
-AC_ARG_ENABLE(asynchronous-cudacopy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
+AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
 			[disable asynchronous copy between CPU and CUDA devices])],
 			enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes)
 disable_asynchronous_cuda_copy=no
@@ -813,7 +814,7 @@ if test x$disable_asynchronous_cuda_copy = xyes ; then
 fi
 
 AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled)
-AC_ARG_ENABLE(asynchronous-openclcopy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
+AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
 			[disable asynchronous copy between CPU and OPENCL devices])],
 			enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes)
 disable_asynchronous_opencl_copy=no
@@ -1068,6 +1069,7 @@ AC_MSG_RESULT($nmaxbuffers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 		[how many buffers can be manipulated per task])
 
+# TODO: add option to choose maxnodes
 if test x$enable_simgrid = xyes ; then
 	# We still need the room for the virtual CUDA/OpenCL devices
 	maxnodes=16

+ 8 - 0
doc/chapters/api.texi

@@ -2038,6 +2038,14 @@ this case, the argument given to the codelet is therefore not the
 This field is ignored for CPU, CUDA and OpenCL codelets, where the
 @code{cl_arg} pointer is given as such.
 
+@item @code{unsigned cl_arg_free} (optional)
+In case @code{cl_arg} was allocated by the application through @code{malloc},
+setting @code{cl_arg_free} to 1 makes StarPU automatically call
+@code{free(cl_arg)} when destroying the task. This saves the user from
+defining a callback just for that. This is mostly useful when targetting MIC or
+SCC, where the codelet does not execute in the same memory space as the main
+thread.
+
 @item @code{void (*callback_func)(void *)} (optional) (default: @code{NULL})
 This is a function pointer of prototype @code{void (*f)(void *)} which
 specifies a possible callback. If this pointer is non-null, the callback

+ 3 - 0
doc/chapters/installing.texi

@@ -114,6 +114,9 @@ configuration}.
 $ ./configure
 @end example
 
+If @code{configure} does not detect some software or produces errors, please
+make sure to post the content of @code{config.log} when reporting the issue.
+
 By default, the files produced during the compilation are placed in
 the source directory. As the compilation generates a lot of files, it
 is advised to to put them all in a separate directory. It is then

+ 4 - 4
gcc-plugin/examples/Makefile.am

@@ -28,8 +28,8 @@ endif
 if !STARPU_HAVE_WINDOWS
 ## test loader program
 LOADER			=	loader
-loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
-LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
+loader_CPPFLAGS		=	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+LOADER_BIN		=	$(abs_top_builddir)/gcc-plugin/examples/$(LOADER)
 loader_SOURCES		=	../../tests/loader.c
 
 if STARPU_HAVE_AM111
@@ -54,7 +54,7 @@ examplebin_PROGRAMS =			\
 endif STARPU_USE_CPU
 
 AM_LDFLAGS = $(top_builddir)/src/@LIBSTARPU_LINK@
-AM_LDFLAGS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) 
+AM_LDFLAGS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS)
 
 AM_CPPFLAGS =						\
   -I$(top_srcdir)/include				\
@@ -65,7 +65,7 @@ AM_CFLAGS =							\
   -fplugin="$(builddir)/../src/.libs/starpu.so"			\
   -fplugin-arg-starpu-include-dir="$(top_srcdir)/include"	\
   -fplugin-arg-starpu-verbose					\
-  -Wall
+  -Wall $(HWLOC_CFLAGS)
 
 #noinst_HEADERS =				\
 #  cholesky/cholesky.h				\

+ 1 - 1
gcc-plugin/src/Makefile.am

@@ -40,7 +40,7 @@ AM_CPPFLAGS =						\
   -I$(top_srcdir)/gcc-plugin/include			\
   -I$(top_srcdir)/include				\
   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1	\
-  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 
 AM_LDFLAGS = -module
 

+ 1 - 1
gcc-plugin/tests/pointer-tasks.c

@@ -14,7 +14,7 @@
    You should have received a copy of the GNU General Public License
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
 
-/* (instructions run (ldflags "-lstarpu-1.0")) */
+/* (instructions run (ldflags "-lstarpu-1.2")) */
 
 #undef NDEBUG
 

+ 1 - 1
gcc-plugin/tests/scalar-tasks.c

@@ -14,7 +14,7 @@
    You should have received a copy of the GNU General Public License
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
 
-/* (instructions run (ldflags "-lstarpu-1.0")) */
+/* (instructions run (ldflags "-lstarpu-1.2")) */
 
 #undef NDEBUG
 

+ 0 - 16
include/starpu_data.h

@@ -98,7 +98,6 @@ int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, uns
 
 #define STARPU_MAIN_RAM 0
 
-<<<<<<< .courant
 enum starpu_node_kind
 {
 	STARPU_UNUSED     = 0x00,
@@ -113,21 +112,6 @@ unsigned starpu_memory_nodes_get_count(void);
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
 
-=======
-enum starpu_node_kind
-{
-	STARPU_UNUSED     = 0x00,
-	STARPU_CPU_RAM    = 0x01,
-	STARPU_CUDA_RAM   = 0x02,
-	STARPU_OPENCL_RAM = 0x03
-};
-
-unsigned starpu_worker_get_memory_node(unsigned workerid);
-unsigned starpu_memory_nodes_get_count(void);
-enum starpu_node_kind starpu_node_get_kind(unsigned node);
-
-
->>>>>>> .fusion-droit.r9881
 /* It is possible to associate a mask to a piece of data (and its children) so
  * that when it is modified, it is automatically transfered into those memory
  * node. For instance a (1<<0) write-through mask means that the CUDA workers will

+ 3 - 3
include/starpu_hash.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,7 +29,7 @@ extern "C"
 /* Compute the CRC of a byte buffer seeded by the inputcrc "current state".
  * The return value should be considered as the new "current state" for future
  * CRC computation. */
-uint32_t starpu_hash_crc32c_be_n(void *input, size_t n, uint32_t inputcrc);
+uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
 
 /* Compute the CRC of a 32bit number seeded by the inputcrc "current state".
  * The return value should be considered as the new "current state" for future
@@ -39,7 +39,7 @@ uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
 /* Compute the CRC of a string seeded by the inputcrc "current state".  The
  * return value should be considered as the new "current state" for future CRC
  * computation. */
-uint32_t starpu_hash_crc32c_string(char *str, uint32_t inputcrc);
+uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
 
 #ifdef __cplusplus
 }

+ 2 - 0
include/starpu_task.h

@@ -128,6 +128,8 @@ struct starpu_task
 	void *cl_arg;
 	/* in case the argument buffer has to be uploaded explicitely */
 	size_t cl_arg_size;
+	/* must StarPU release cl_arg ? - 0 by default */
+	unsigned cl_arg_free;
 
 	/* when the task is done, callback_func(callback_arg) is called */
 	void (*callback_func)(void *);

+ 12 - 0
include/starpu_worker.h

@@ -101,6 +101,18 @@ struct starpu_worker_collection
 	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
 };
 
+enum starpu_node_kind
+{
+	STARPU_UNUSED     = 0x00,
+	STARPU_CPU_RAM    = 0x01,
+	STARPU_CUDA_RAM   = 0x02,
+	STARPU_OPENCL_RAM = 0x03
+};
+
+unsigned starpu_worker_get_memory_node(unsigned workerid);
+unsigned starpu_memory_nodes_get_count(void);
+enum starpu_node_kind starpu_node_get_kind(unsigned node);
+
 /* types of structures the worker collection can implement */
 #define STARPU_WORKER_LIST 0
 

+ 1 - 1
socl/src/Makefile.am

@@ -14,7 +14,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/socl/src
 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) -no-undefined

+ 3 - 3
src/common/hash.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -33,7 +33,7 @@ static inline uint32_t __attribute__ ((pure)) starpu_crc32c_be_8(uint8_t inputby
 	return crc;
 }
 
-uint32_t starpu_hash_crc32c_be_n(void *input, size_t n, uint32_t inputcrc)
+uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc)
 {
 	uint8_t *p = (uint8_t *)input;
 	size_t i;
@@ -60,7 +60,7 @@ uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc)
 	return crc;
 }
 
-uint32_t starpu_hash_crc32c_string(char *str, uint32_t inputcrc)
+uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc)
 {
 	uint32_t hash = inputcrc;
 

+ 5 - 0
src/core/task.c

@@ -155,6 +155,11 @@ void _starpu_task_destroy(struct starpu_task *task)
 		starpu_task_clean(task);
 		/* TODO handle the case of task with detach = 1 and destroy = 1 */
 		/* TODO handle the case of non terminated tasks -> return -EINVAL */
+
+		/* Does user want StarPU release cl_arg ? */
+		if (task->cl_arg_free)
+			free(task->cl_arg);
+
 		free(task);
 	}
 }

+ 3 - 3
src/core/topology.c

@@ -501,7 +501,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_CUDA;
 
 		struct handle_entry *entry;
@@ -575,7 +575,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_OPENCL;
 	}
 
@@ -620,7 +620,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = cpu;
 		config->workers[worker_idx].worker_mask = STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 	}
 
 	topology->nworkers += topology->ncpus;

+ 2 - 0
src/datawizard/copy_driver.c

@@ -390,7 +390,9 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 {
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 	struct _starpu_async_channel *async_channel = async_data;
+#endif
 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
 

+ 8 - 7
src/datawizard/memory_nodes.c

@@ -108,20 +108,21 @@ unsigned starpu_memory_nodes_get_count(void)
 
 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
 {
-	unsigned nnodes;
+	unsigned node;
 	/* ATOMIC_ADD returns the new value ... */
-	nnodes = STARPU_ATOMIC_ADD(&descr.nnodes, 1);
+	node = STARPU_ATOMIC_ADD(&descr.nnodes, 1) - 1;
+	STARPU_ASSERT_MSG(node < STARPU_MAXNODES,"Too many nodes (%u)!", node);
 	STARPU_ASSERT_MSG(nnodes < STARPU_MAXNODES,"Too many nodes !");
 
-	descr.nodes[nnodes-1] = kind;
-	_STARPU_TRACE_NEW_MEM_NODE(nnodes-1);
+	descr.nodes[node] = kind;
+	_STARPU_TRACE_NEW_MEM_NODE(node);
 
-	descr.devid[nnodes-1] = devid;
+	descr.devid[node] = devid;
 
 	/* for now, there is no condition associated to that newly created node */
-	descr.condition_count[nnodes-1] = 0;
+	descr.condition_count[node] = 0;
 
-	return (nnodes-1);
+	return node;
 
 }
 

+ 7 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -682,10 +682,16 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
+
+	/* Expected end of this task on the workers */
 	double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
-	double max_exp_end = 0.0;
+
+	/* This is the minimum among the exp_end[] matrix */
 	double best_exp_end;
 
+	/* This is the maximum termination time of already-scheduled tasks over all workers */
+	double max_exp_end = 0.0;
+
 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
 
 	struct starpu_sched_ctx_iterator it;

+ 2 - 0
src/sched_policies/work_stealing_policy.c

@@ -287,6 +287,8 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	starpu_pthread_mutex_t *worker_sched_mutex;
 	starpu_pthread_cond_t *worker_sched_cond;
 	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
+
+	/* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
 	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
        
 

+ 1 - 1
src/util/starpu_insert_task.c

@@ -60,7 +60,6 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 	}
 
 	va_end(varg_list);
-	free(cl_arg);
 }
 
 int starpu_insert_task(struct starpu_codelet *cl, ...)
@@ -80,6 +79,7 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 	}
 
 	struct starpu_task *task = starpu_task_create();
+	task->cl_arg_free = 1;
 
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	{

+ 0 - 4
src/util/starpu_insert_task_utils.c

@@ -29,7 +29,6 @@ struct insert_task_cb_wrapper
 {
 	_starpu_callback_func_t callback_func;
 	void *callback_arg;
-	void *arg_stack;
 };
 
 static
@@ -41,7 +40,6 @@ void starpu_task_insert_callback_wrapper(void *_cl_arg_wrapper)
 	if (cl_arg_wrapper->callback_func)
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 
-	// cl_arg_wrapper->arg_stack is freed by starpu_codelet_unpack_args()
 	free(cl_arg_wrapper);
 }
 
@@ -229,7 +227,6 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 	STARPU_ASSERT(cl_arg_wrapper);
 
 	cl_arg_wrapper->callback_func = NULL;
-	cl_arg_wrapper->arg_stack = arg_buffer;
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
@@ -358,7 +355,6 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 			(cl == NULL) ? "none" :
 			(*task)->cl->name ? (*task)->cl->name :
 			((*task)->cl->model && (*task)->cl->model->symbol)?(*task)->cl->model->symbol:"none");
-		free(cl_arg_wrapper->arg_stack);
 		free(cl_arg_wrapper);
 	}
 

+ 1 - 1
starpufft/Makefile.am

@@ -15,7 +15,7 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
 
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la