Corentin Salingue 12 gadi atpakaļ
vecāks
revīzija
1abb2dfa5d

+ 4 - 0
ChangeLog

@@ -29,6 +29,10 @@ New features:
 	  allocate data correctly, and to submit the matching receive of
 	  allocate data correctly, and to submit the matching receive of
 	  the envelope.
 	  the envelope.
 
 
+Small features:
+  * Add cl_arg_free field to enable automatic free(cl_arg) on task
+    destroy.
+
 StarPU 1.1.0 (svn revision xxxx)
 StarPU 1.1.0 (svn revision xxxx)
 ==============================================
 ==============================================
 
 

+ 5 - 3
STARPU-VERSION

@@ -17,19 +17,21 @@
 # - If any interfaces have been removed or changed since the last
 # - If any interfaces have been removed or changed since the last
 #   public release, then set age to 0. change
 #   public release, then set age to 0. change
 
 
+# This is the tarball version, major.minor
+STARPU_EFFECTIVE_VERSION=1.2
+
 # Note for StarPU 1.1: we have changed ABI
 # Note for StarPU 1.1: we have changed ABI
 
 
 # Libtool interface versioning (info "(libtool) Versioning").
 # Libtool interface versioning (info "(libtool) Versioning").
-LIBSTARPU_INTERFACE_CURRENT=1	# increment upon ABI change
+LIBSTARPU_INTERFACE_CURRENT=2	# increment upon ABI change
 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPU_INTERFACE_AGE=0	# set to CURRENT - PREVIOUS interface
 LIBSTARPU_INTERFACE_AGE=0	# set to CURRENT - PREVIOUS interface
-STARPU_EFFECTIVE_VERSION=1.0
 
 
 LIBSTARPUFFT_INTERFACE_CURRENT=1	# increment upon ABI change
 LIBSTARPUFFT_INTERFACE_CURRENT=1	# increment upon ABI change
 LIBSTARPUFFT_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUFFT_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUFFT_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 LIBSTARPUFFT_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 
 
-LIBSTARPUMPI_INTERFACE_CURRENT=1	# increment upon ABI change
+LIBSTARPUMPI_INTERFACE_CURRENT=2	# increment upon ABI change
 LIBSTARPUMPI_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUMPI_INTERFACE_REVISION=0	# increment upon implementation change
 LIBSTARPUMPI_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 LIBSTARPUMPI_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
 
 

+ 6 - 4
configure.ac

@@ -16,8 +16,7 @@
 #
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 
-AC_INIT([StarPU], [1.2.0], [starpu-devel@lists.gforge.inria.fr],
-  [starpu], [http://runtime.bordeaux.inria.fr/StarPU/])
+AC_INIT([StarPU], [1.2.0], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://runtime.bordeaux.inria.fr/StarPU/])
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_AUX_DIR([build-aux])
 
 
@@ -26,6 +25,8 @@ dnl Versioning.
 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
 STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
 STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
+STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`"
+dnl we do not want the rcXX in the release version. we would like to use sed -r 's/[a-z]+.*//' to remove any string but the -r option is not portable
 AC_SUBST([STARPU_MAJOR_VERSION])
 AC_SUBST([STARPU_MAJOR_VERSION])
 AC_SUBST([STARPU_MINOR_VERSION])
 AC_SUBST([STARPU_MINOR_VERSION])
 AC_SUBST([STARPU_RELEASE_VERSION])
 AC_SUBST([STARPU_RELEASE_VERSION])
@@ -800,7 +801,7 @@ if test x$disable_asynchronous_copy = xyes ; then
 fi
 fi
 
 
 AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled)
 AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled)
-AC_ARG_ENABLE(asynchronous-cudacopy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
+AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy],
 			[disable asynchronous copy between CPU and CUDA devices])],
 			[disable asynchronous copy between CPU and CUDA devices])],
 			enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes)
 			enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes)
 disable_asynchronous_cuda_copy=no
 disable_asynchronous_cuda_copy=no
@@ -813,7 +814,7 @@ if test x$disable_asynchronous_cuda_copy = xyes ; then
 fi
 fi
 
 
 AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled)
 AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled)
-AC_ARG_ENABLE(asynchronous-openclcopy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
+AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy],
 			[disable asynchronous copy between CPU and OPENCL devices])],
 			[disable asynchronous copy between CPU and OPENCL devices])],
 			enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes)
 			enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes)
 disable_asynchronous_opencl_copy=no
 disable_asynchronous_opencl_copy=no
@@ -1068,6 +1069,7 @@ AC_MSG_RESULT($nmaxbuffers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
 		[how many buffers can be manipulated per task])
 		[how many buffers can be manipulated per task])
 
 
+# TODO: add option to choose maxnodes
 if test x$enable_simgrid = xyes ; then
 if test x$enable_simgrid = xyes ; then
 	# We still need the room for the virtual CUDA/OpenCL devices
 	# We still need the room for the virtual CUDA/OpenCL devices
 	maxnodes=16
 	maxnodes=16

+ 8 - 0
doc/chapters/api.texi

@@ -2038,6 +2038,14 @@ this case, the argument given to the codelet is therefore not the
 This field is ignored for CPU, CUDA and OpenCL codelets, where the
 This field is ignored for CPU, CUDA and OpenCL codelets, where the
 @code{cl_arg} pointer is given as such.
 @code{cl_arg} pointer is given as such.
 
 
+@item @code{unsigned cl_arg_free} (optional)
+In case @code{cl_arg} was allocated by the application through @code{malloc},
+setting @code{cl_arg_free} to 1 makes StarPU automatically call
+@code{free(cl_arg)} when destroying the task. This saves the user from
+defining a callback just for that. This is mostly useful when targetting MIC or
+SCC, where the codelet does not execute in the same memory space as the main
+thread.
+
 @item @code{void (*callback_func)(void *)} (optional) (default: @code{NULL})
 @item @code{void (*callback_func)(void *)} (optional) (default: @code{NULL})
 This is a function pointer of prototype @code{void (*f)(void *)} which
 This is a function pointer of prototype @code{void (*f)(void *)} which
 specifies a possible callback. If this pointer is non-null, the callback
 specifies a possible callback. If this pointer is non-null, the callback

+ 3 - 0
doc/chapters/installing.texi

@@ -114,6 +114,9 @@ configuration}.
 $ ./configure
 $ ./configure
 @end example
 @end example
 
 
+If @code{configure} does not detect some software or produces errors, please
+make sure to post the content of @code{config.log} when reporting the issue.
+
 By default, the files produced during the compilation are placed in
 By default, the files produced during the compilation are placed in
 the source directory. As the compilation generates a lot of files, it
 the source directory. As the compilation generates a lot of files, it
 is advised to to put them all in a separate directory. It is then
 is advised to to put them all in a separate directory. It is then

+ 4 - 4
gcc-plugin/examples/Makefile.am

@@ -28,8 +28,8 @@ endif
 if !STARPU_HAVE_WINDOWS
 if !STARPU_HAVE_WINDOWS
 ## test loader program
 ## test loader program
 LOADER			=	loader
 LOADER			=	loader
-loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
-LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
+loader_CPPFLAGS		=	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+LOADER_BIN		=	$(abs_top_builddir)/gcc-plugin/examples/$(LOADER)
 loader_SOURCES		=	../../tests/loader.c
 loader_SOURCES		=	../../tests/loader.c
 
 
 if STARPU_HAVE_AM111
 if STARPU_HAVE_AM111
@@ -54,7 +54,7 @@ examplebin_PROGRAMS =			\
 endif STARPU_USE_CPU
 endif STARPU_USE_CPU
 
 
 AM_LDFLAGS = $(top_builddir)/src/@LIBSTARPU_LINK@
 AM_LDFLAGS = $(top_builddir)/src/@LIBSTARPU_LINK@
-AM_LDFLAGS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) 
+AM_LDFLAGS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS)
 
 
 AM_CPPFLAGS =						\
 AM_CPPFLAGS =						\
   -I$(top_srcdir)/include				\
   -I$(top_srcdir)/include				\
@@ -65,7 +65,7 @@ AM_CFLAGS =							\
   -fplugin="$(builddir)/../src/.libs/starpu.so"			\
   -fplugin="$(builddir)/../src/.libs/starpu.so"			\
   -fplugin-arg-starpu-include-dir="$(top_srcdir)/include"	\
   -fplugin-arg-starpu-include-dir="$(top_srcdir)/include"	\
   -fplugin-arg-starpu-verbose					\
   -fplugin-arg-starpu-verbose					\
-  -Wall
+  -Wall $(HWLOC_CFLAGS)
 
 
 #noinst_HEADERS =				\
 #noinst_HEADERS =				\
 #  cholesky/cholesky.h				\
 #  cholesky/cholesky.h				\

+ 1 - 1
gcc-plugin/src/Makefile.am

@@ -40,7 +40,7 @@ AM_CPPFLAGS =						\
   -I$(top_srcdir)/gcc-plugin/include			\
   -I$(top_srcdir)/gcc-plugin/include			\
   -I$(top_srcdir)/include				\
   -I$(top_srcdir)/include				\
   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1	\
   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1	\
-  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+  $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 
 
 AM_LDFLAGS = -module
 AM_LDFLAGS = -module
 
 

+ 1 - 1
gcc-plugin/tests/pointer-tasks.c

@@ -14,7 +14,7 @@
    You should have received a copy of the GNU General Public License
    You should have received a copy of the GNU General Public License
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
 
 
-/* (instructions run (ldflags "-lstarpu-1.0")) */
+/* (instructions run (ldflags "-lstarpu-1.2")) */
 
 
 #undef NDEBUG
 #undef NDEBUG
 
 

+ 1 - 1
gcc-plugin/tests/scalar-tasks.c

@@ -14,7 +14,7 @@
    You should have received a copy of the GNU General Public License
    You should have received a copy of the GNU General Public License
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
    along with GCC-StarPU.  If not, see <http://www.gnu.org/licenses/>.  */
 
 
-/* (instructions run (ldflags "-lstarpu-1.0")) */
+/* (instructions run (ldflags "-lstarpu-1.2")) */
 
 
 #undef NDEBUG
 #undef NDEBUG
 
 

+ 0 - 16
include/starpu_data.h

@@ -98,7 +98,6 @@ int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, uns
 
 
 #define STARPU_MAIN_RAM 0
 #define STARPU_MAIN_RAM 0
 
 
-<<<<<<< .courant
 enum starpu_node_kind
 enum starpu_node_kind
 {
 {
 	STARPU_UNUSED     = 0x00,
 	STARPU_UNUSED     = 0x00,
@@ -113,21 +112,6 @@ unsigned starpu_memory_nodes_get_count(void);
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
 
 
 
-=======
-enum starpu_node_kind
-{
-	STARPU_UNUSED     = 0x00,
-	STARPU_CPU_RAM    = 0x01,
-	STARPU_CUDA_RAM   = 0x02,
-	STARPU_OPENCL_RAM = 0x03
-};
-
-unsigned starpu_worker_get_memory_node(unsigned workerid);
-unsigned starpu_memory_nodes_get_count(void);
-enum starpu_node_kind starpu_node_get_kind(unsigned node);
-
-
->>>>>>> .fusion-droit.r9881
 /* It is possible to associate a mask to a piece of data (and its children) so
 /* It is possible to associate a mask to a piece of data (and its children) so
  * that when it is modified, it is automatically transfered into those memory
  * that when it is modified, it is automatically transfered into those memory
  * node. For instance a (1<<0) write-through mask means that the CUDA workers will
  * node. For instance a (1<<0) write-through mask means that the CUDA workers will

+ 3 - 3
include/starpu_hash.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,7 +29,7 @@ extern "C"
 /* Compute the CRC of a byte buffer seeded by the inputcrc "current state".
 /* Compute the CRC of a byte buffer seeded by the inputcrc "current state".
  * The return value should be considered as the new "current state" for future
  * The return value should be considered as the new "current state" for future
  * CRC computation. */
  * CRC computation. */
-uint32_t starpu_hash_crc32c_be_n(void *input, size_t n, uint32_t inputcrc);
+uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
 
 
 /* Compute the CRC of a 32bit number seeded by the inputcrc "current state".
 /* Compute the CRC of a 32bit number seeded by the inputcrc "current state".
  * The return value should be considered as the new "current state" for future
  * The return value should be considered as the new "current state" for future
@@ -39,7 +39,7 @@ uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
 /* Compute the CRC of a string seeded by the inputcrc "current state".  The
 /* Compute the CRC of a string seeded by the inputcrc "current state".  The
  * return value should be considered as the new "current state" for future CRC
  * return value should be considered as the new "current state" for future CRC
  * computation. */
  * computation. */
-uint32_t starpu_hash_crc32c_string(char *str, uint32_t inputcrc);
+uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }

+ 2 - 0
include/starpu_task.h

@@ -128,6 +128,8 @@ struct starpu_task
 	void *cl_arg;
 	void *cl_arg;
 	/* in case the argument buffer has to be uploaded explicitely */
 	/* in case the argument buffer has to be uploaded explicitely */
 	size_t cl_arg_size;
 	size_t cl_arg_size;
+	/* must StarPU release cl_arg ? - 0 by default */
+	unsigned cl_arg_free;
 
 
 	/* when the task is done, callback_func(callback_arg) is called */
 	/* when the task is done, callback_func(callback_arg) is called */
 	void (*callback_func)(void *);
 	void (*callback_func)(void *);

+ 12 - 0
include/starpu_worker.h

@@ -101,6 +101,18 @@ struct starpu_worker_collection
 	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
 	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
 };
 };
 
 
+enum starpu_node_kind
+{
+	STARPU_UNUSED     = 0x00,
+	STARPU_CPU_RAM    = 0x01,
+	STARPU_CUDA_RAM   = 0x02,
+	STARPU_OPENCL_RAM = 0x03
+};
+
+unsigned starpu_worker_get_memory_node(unsigned workerid);
+unsigned starpu_memory_nodes_get_count(void);
+enum starpu_node_kind starpu_node_get_kind(unsigned node);
+
 /* types of structures the worker collection can implement */
 /* types of structures the worker collection can implement */
 #define STARPU_WORKER_LIST 0
 #define STARPU_WORKER_LIST 0
 
 

+ 1 - 1
socl/src/Makefile.am

@@ -14,7 +14,7 @@
 #
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/socl/src
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/socl/src
 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) -no-undefined
 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) -no-undefined

+ 3 - 3
src/common/hash.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -33,7 +33,7 @@ static inline uint32_t __attribute__ ((pure)) starpu_crc32c_be_8(uint8_t inputby
 	return crc;
 	return crc;
 }
 }
 
 
-uint32_t starpu_hash_crc32c_be_n(void *input, size_t n, uint32_t inputcrc)
+uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc)
 {
 {
 	uint8_t *p = (uint8_t *)input;
 	uint8_t *p = (uint8_t *)input;
 	size_t i;
 	size_t i;
@@ -60,7 +60,7 @@ uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc)
 	return crc;
 	return crc;
 }
 }
 
 
-uint32_t starpu_hash_crc32c_string(char *str, uint32_t inputcrc)
+uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc)
 {
 {
 	uint32_t hash = inputcrc;
 	uint32_t hash = inputcrc;
 
 

+ 5 - 0
src/core/task.c

@@ -155,6 +155,11 @@ void _starpu_task_destroy(struct starpu_task *task)
 		starpu_task_clean(task);
 		starpu_task_clean(task);
 		/* TODO handle the case of task with detach = 1 and destroy = 1 */
 		/* TODO handle the case of task with detach = 1 and destroy = 1 */
 		/* TODO handle the case of non terminated tasks -> return -EINVAL */
 		/* TODO handle the case of non terminated tasks -> return -EINVAL */
+
+		/* Does user want StarPU release cl_arg ? */
+		if (task->cl_arg_free)
+			free(task->cl_arg);
+
 		free(task);
 		free(task);
 	}
 	}
 }
 }

+ 3 - 3
src/core/topology.c

@@ -501,7 +501,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_CUDA;
 		config->worker_mask |= STARPU_CUDA;
 
 
 		struct handle_entry *entry;
 		struct handle_entry *entry;
@@ -575,7 +575,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].devid = devid;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].perf_arch = arch;
 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 		config->worker_mask |= STARPU_OPENCL;
 		config->worker_mask |= STARPU_OPENCL;
 	}
 	}
 
 
@@ -620,7 +620,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 		config->workers[worker_idx].devid = cpu;
 		config->workers[worker_idx].devid = cpu;
 		config->workers[worker_idx].worker_mask = STARPU_CPU;
 		config->workers[worker_idx].worker_mask = STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
 		config->worker_mask |= STARPU_CPU;
-		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cpu].workerid);
+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
 	}
 	}
 
 
 	topology->nworkers += topology->ncpus;
 	topology->nworkers += topology->ncpus;

+ 2 - 0
src/datawizard/copy_driver.c

@@ -390,7 +390,9 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
 
 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 {
 {
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 	struct _starpu_async_channel *async_channel = async_data;
 	struct _starpu_async_channel *async_channel = async_data;
+#endif
 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
 
 

+ 8 - 7
src/datawizard/memory_nodes.c

@@ -108,20 +108,21 @@ unsigned starpu_memory_nodes_get_count(void)
 
 
 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
 unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid)
 {
 {
-	unsigned nnodes;
+	unsigned node;
 	/* ATOMIC_ADD returns the new value ... */
 	/* ATOMIC_ADD returns the new value ... */
-	nnodes = STARPU_ATOMIC_ADD(&descr.nnodes, 1);
+	node = STARPU_ATOMIC_ADD(&descr.nnodes, 1) - 1;
+	STARPU_ASSERT_MSG(node < STARPU_MAXNODES,"Too many nodes (%u)!", node);
 	STARPU_ASSERT_MSG(nnodes < STARPU_MAXNODES,"Too many nodes !");
 	STARPU_ASSERT_MSG(nnodes < STARPU_MAXNODES,"Too many nodes !");
 
 
-	descr.nodes[nnodes-1] = kind;
-	_STARPU_TRACE_NEW_MEM_NODE(nnodes-1);
+	descr.nodes[node] = kind;
+	_STARPU_TRACE_NEW_MEM_NODE(node);
 
 
-	descr.devid[nnodes-1] = devid;
+	descr.devid[node] = devid;
 
 
 	/* for now, there is no condition associated to that newly created node */
 	/* for now, there is no condition associated to that newly created node */
-	descr.condition_count[nnodes-1] = 0;
+	descr.condition_count[node] = 0;
 
 
-	return (nnodes-1);
+	return node;
 
 
 }
 }
 
 

+ 7 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -682,10 +682,16 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
+
+	/* Expected end of this task on the workers */
 	double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
-	double max_exp_end = 0.0;
+
+	/* This is the minimum among the exp_end[] matrix */
 	double best_exp_end;
 	double best_exp_end;
 
 
+	/* This is the maximum termination time of already-scheduled tasks over all workers */
+	double max_exp_end = 0.0;
+
 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
 
 
 	struct starpu_sched_ctx_iterator it;
 	struct starpu_sched_ctx_iterator it;

+ 2 - 0
src/sched_policies/work_stealing_policy.c

@@ -287,6 +287,8 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
 	starpu_pthread_mutex_t *worker_sched_mutex;
 	starpu_pthread_mutex_t *worker_sched_mutex;
 	starpu_pthread_cond_t *worker_sched_cond;
 	starpu_pthread_cond_t *worker_sched_cond;
 	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
 	starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
+
+	/* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
 	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
        
        
 
 

+ 1 - 1
src/util/starpu_insert_task.c

@@ -60,7 +60,6 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 	}
 	}
 
 
 	va_end(varg_list);
 	va_end(varg_list);
-	free(cl_arg);
 }
 }
 
 
 int starpu_insert_task(struct starpu_codelet *cl, ...)
 int starpu_insert_task(struct starpu_codelet *cl, ...)
@@ -80,6 +79,7 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 	}
 	}
 
 
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
+	task->cl_arg_free = 1;
 
 
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	{
 	{

+ 0 - 4
src/util/starpu_insert_task_utils.c

@@ -29,7 +29,6 @@ struct insert_task_cb_wrapper
 {
 {
 	_starpu_callback_func_t callback_func;
 	_starpu_callback_func_t callback_func;
 	void *callback_arg;
 	void *callback_arg;
-	void *arg_stack;
 };
 };
 
 
 static
 static
@@ -41,7 +40,6 @@ void starpu_task_insert_callback_wrapper(void *_cl_arg_wrapper)
 	if (cl_arg_wrapper->callback_func)
 	if (cl_arg_wrapper->callback_func)
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 		cl_arg_wrapper->callback_func(cl_arg_wrapper->callback_arg);
 
 
-	// cl_arg_wrapper->arg_stack is freed by starpu_codelet_unpack_args()
 	free(cl_arg_wrapper);
 	free(cl_arg_wrapper);
 }
 }
 
 
@@ -229,7 +227,6 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 	STARPU_ASSERT(cl_arg_wrapper);
 	STARPU_ASSERT(cl_arg_wrapper);
 
 
 	cl_arg_wrapper->callback_func = NULL;
 	cl_arg_wrapper->callback_func = NULL;
-	cl_arg_wrapper->arg_stack = arg_buffer;
 
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
@@ -358,7 +355,6 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 			(cl == NULL) ? "none" :
 			(cl == NULL) ? "none" :
 			(*task)->cl->name ? (*task)->cl->name :
 			(*task)->cl->name ? (*task)->cl->name :
 			((*task)->cl->model && (*task)->cl->model->symbol)?(*task)->cl->model->symbol:"none");
 			((*task)->cl->model && (*task)->cl->model->symbol)?(*task)->cl->model->symbol:"none");
-		free(cl_arg_wrapper->arg_stack);
 		free(cl_arg_wrapper);
 		free(cl_arg_wrapper);
 	}
 	}
 
 

+ 1 - 1
starpufft/Makefile.am

@@ -15,7 +15,7 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 #
 
 
-AM_CFLAGS = $(GLOBAL_AM_CFLAGS)
+AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
 
 
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la