Browse Source

Merge @ 9066:9105

Marc Sergent 12 years ago
parent
commit
458663783b
44 changed files with 502 additions and 161 deletions
  1. 2 2
      Makefile.am
  2. 4 0
      configure.ac
  3. 12 12
      doc/chapters/advanced-api.texi
  4. 6 0
      doc/chapters/basic-api.texi
  5. 2 0
      doc/chapters/tips-tricks.texi
  6. 5 1
      examples/sched_ctx/sched_ctx.c
  7. 2 2
      examples/scheduler/dummy_sched.c
  8. 2 37
      include/starpu_sched_ctx.h
  9. 2 0
      include/starpu_stdlib.h
  10. 8 8
      include/starpu_util.h
  11. 34 0
      include/starpu_worker.h
  12. 2 2
      mpi/Makefile.am
  13. 29 0
      mpi/starpumpi-1.1.pc.in
  14. 1 1
      sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
  15. 6 6
      sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
  16. 3 3
      sched_ctx_hypervisor/src/hypervisor_policies/simple_policy.c
  17. 3 3
      sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
  18. 1 1
      socl/Makefile.am
  19. 29 0
      socl/socl-1.1.pc.in
  20. 3 1
      src/common/starpu_spinlock.h
  21. 18 3
      src/core/dependencies/implicit_data_deps.c
  22. 9 2
      src/core/jobs.c
  23. 13 13
      src/core/sched_ctx.c
  24. 1 1
      src/core/sched_ctx.h
  25. 1 1
      src/core/sched_policy.c
  26. 1 1
      src/core/workers.c
  27. 8 0
      src/datawizard/memory_manager.c
  28. 4 0
      src/drivers/cuda/driver_cuda.c
  29. 4 4
      src/sched_policies/deque_modeling_policy_data_aware.c
  30. 4 4
      src/sched_policies/detect_combined_workers.c
  31. 2 2
      src/sched_policies/eager_central_policy.c
  32. 3 3
      src/sched_policies/eager_central_priority_policy.c
  33. 2 2
      src/sched_policies/parallel_eager.c
  34. 43 5
      src/sched_policies/parallel_heft.c
  35. 2 2
      src/sched_policies/random_policy.c
  36. 20 5
      src/sched_policies/work_stealing_policy.c
  37. 10 10
      src/worker_collection/worker_list.c
  38. 34 0
      starpu-1.1.pc.in
  39. 2 2
      starpufft/Makefile.am
  40. 27 0
      starpufft/starpufft-1.1.pc.in
  41. 126 19
      tests/datawizard/allocate.c
  42. 4 1
      tests/errorcheck/invalid_blocking_calls.c
  43. 2 0
      tests/microbenchs/tasks_overhead.c
  44. 6 2
      tests/sched_policies/simple_cpu_gpu_sched.c

+ 2 - 2
Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2009-2013  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -49,7 +49,7 @@ SUBDIRS += sched_ctx_hypervisor
 endif
 
 pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libstarpu.pc starpu-1.0.pc
+pkgconfig_DATA = libstarpu.pc starpu-1.0.pc starpu-1.1.pc
 
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 				\

+ 4 - 0
configure.ac

@@ -1877,14 +1877,18 @@ AC_OUTPUT([
 	socl/src/Makefile
 	socl/examples/Makefile
         socl/socl-1.0.pc
+	socl/socl-1.1.pc
 	socl/vendors/socl.icd
 	libstarpu.pc
 	starpu-1.0.pc
+	starpu-1.1.pc
 	mpi/libstarpumpi.pc
 	mpi/starpumpi-1.0.pc
+	mpi/starpumpi-1.1.pc
 	starpufft/Makefile
 	starpufft/libstarpufft.pc
 	starpufft/starpufft-1.0.pc
+	starpufft/starpufft-1.1.pc
 	examples/Makefile
 	examples/stencil/Makefile
 	tests/Makefile

+ 12 - 12
doc/chapters/advanced-api.texi

@@ -826,7 +826,7 @@ This function removes the workers indicated in the first argument from the conte
 A scheduling context manages a collection of workers that can be memorized using different data structures. Thus, a generic structure is available in order to simplify the choice of its type.
 Only the list data structure is available but further data structures(like tree) implementations are foreseen.
 
-@deftp {Data Type} {struct starpu_sched_ctx_worker_collection}
+@deftp {Data Type} {struct starpu_worker_collection}
 @table @asis
 @item @code{void *workerids}
 The workerids managed by the collection
@@ -835,28 +835,28 @@ The number of workerids
 @item @code{pthread_key_t cursor_key} (optional)
 The cursor needed to iterate the collection (depending on the data structure)
 @item @code{int type}
-The type of structure (currently STARPU_SCHED_CTX_WORKER_LIST is the only one available)
-@item @code{unsigned (*has_next)(struct starpu_sched_ctx_worker_collection *workers)}
+The type of structure (currently STARPU_WORKER_LIST is the only one available)
+@item @code{unsigned (*has_next)(struct starpu_worker_collection *workers)}
 Checks if there is a next worker
-@item @code{int (*get_next)(struct starpu_sched_ctx_worker_collection *workers)}
+@item @code{int (*get_next)(struct starpu_worker_collection *workers)}
 Gets the next worker
-@item @code{int (*add)(struct starpu_sched_ctx_worker_collection *workers, int worker)}
+@item @code{int (*add)(struct starpu_worker_collection *workers, int worker)}
 Adds a worker to the collection
-@item @code{int (*remove)(struct starpu_sched_ctx_worker_collection *workers, int worker)}
+@item @code{int (*remove)(struct starpu_worker_collection *workers, int worker)}
 Removes a worker from the collection
-@item @code{void* (*init)(struct starpu_sched_ctx_worker_collection *workers)}
+@item @code{void* (*init)(struct starpu_worker_collection *workers)}
 Initialize the collection
-@item @code{void (*deinit)(struct starpu_sched_ctx_worker_collection *workers)}
+@item @code{void (*deinit)(struct starpu_worker_collection *workers)}
 Deinitialize the colection
-@item @code{void (*init_cursor)(struct starpu_sched_ctx_worker_collection *workers)} (optional)
+@item @code{void (*init_cursor)(struct starpu_worker_collection *workers)} (optional)
 Initialize the cursor if there is one
-@item @code{void (*deinit_cursor)(struct starpu_sched_ctx_worker_collection *workers)} (optional)
+@item @code{void (*deinit_cursor)(struct starpu_worker_collection *workers)} (optional)
 Deinitialize the cursor if there is one
 
 @end table
 @end deftp
 
-@deftypefun struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_create_worker_collection (unsigned @var{sched_ctx_id}, int @var{type})
+@deftypefun struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection (unsigned @var{sched_ctx_id}, int @var{type})
 Create a worker collection of the type indicated by the last parameter for the context specified through the first parameter.
 @end deftypefun
 
@@ -864,7 +864,7 @@ Create a worker collection of the type indicated by the last parameter for the c
 Delete the worker collection of the specified scheduling context
 @end deftypefun
 
-@deftypefun struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_get_worker_collection (unsigned @var{sched_ctx_id})
+@deftypefun struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection (unsigned @var{sched_ctx_id})
 Return the worker collection managed by the indicated context
 @end deftypefun
 

+ 6 - 0
doc/chapters/basic-api.texi

@@ -284,6 +284,12 @@ This function frees memory by specifying its size. The given
 @code{starpu_malloc_flags} when allocating the memory.
 @end deftypefun
 
+@deftypefun ssize_t starpu_memory_get_available (unsigned @var{node})
+If a memory limit is defined on the given node (@pxref{Limit memory}),
+return the amount of available memory on the node. Otherwise return
+@code{-1}.
+@end deftypefun
+
 @node Workers' Properties
 @section Workers' Properties
 

+ 2 - 0
doc/chapters/tips-tricks.texi

@@ -89,3 +89,5 @@ Talk about
 @code{STARPU_LIMIT_OPENCL_devid_MEM}, @code{STARPU_LIMIT_OPENCL_MEM}
 and @code{STARPU_LIMIT_CPU_MEM}
 
+@code{starpu_memory_get_available}
+

+ 5 - 1
examples/sched_ctx/sched_ctx.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,7 +18,11 @@
 #include <starpu.h>
 #include <pthread.h>
 
+#ifdef STARPU_QUICK_CHECK
+#define NTASKS 64
+#else
 #define NTASKS 1000
+#endif
 
 int tasks_executed = 0;
 pthread_mutex_t mut;

+ 2 - 2
examples/scheduler/dummy_sched.c

@@ -28,7 +28,7 @@ typedef struct dummy_sched_data {
 
 static void init_dummy_sched(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 	struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data));
 	
@@ -79,7 +79,7 @@ static int push_task_dummy(struct starpu_task *task)
         /*if there are no tasks block */
         /* wake people waiting for a task */
         unsigned worker = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
         struct starpu_sched_ctx_iterator it;
         if(workers->init_iterator)

+ 2 - 37
include/starpu_sched_ctx.h

@@ -24,41 +24,6 @@ extern "C"
 {
 #endif
 
-//struct starpu_sched_ctx_iterator;
-struct starpu_sched_ctx_iterator
-{
-	int cursor;
-};
-
-
-/* generic structure used by the scheduling contexts to iterate the workers */
-struct starpu_sched_ctx_worker_collection
-{
-	/* hidden data structure used to memorize the workers */
-	void *workerids;
-	/* the number of workers in the collection */
-	unsigned nworkers;
-	/* the type of structure (STARPU_SCHED_CTX_WORKER_LIST,...) */
-	int type;
-	/* checks if there is another element in collection */
-	unsigned (*has_next)(struct starpu_sched_ctx_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
-	/* return the next element in the collection */
-	int (*get_next)(struct starpu_sched_ctx_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
-	/* add a new element in the collection */
-	int (*add)(struct starpu_sched_ctx_worker_collection *workers, int worker);
-	/* remove an element from the collection */
-	int (*remove)(struct starpu_sched_ctx_worker_collection *workers, int worker);
-	/* initialize the structure */
-	void (*init)(struct starpu_sched_ctx_worker_collection *workers);
-	/* free the structure */
-	void (*deinit)(struct starpu_sched_ctx_worker_collection *workers);
-	/* initialize the cursor if there is one */
-	void (*init_iterator)(struct starpu_sched_ctx_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
-};
-
-/* types of structures the worker collection can implement */
-#define STARPU_SCHED_CTX_WORKER_LIST 0
-
 struct starpu_sched_ctx_performance_counters
 {
 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
@@ -91,11 +56,11 @@ void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data);
 
 void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 
-struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int type);
+struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int type);
 
 void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id);
 
-struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id);
+struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id);
 
 #if !defined(_MSC_VER) && !defined(STARPU_SIMGRID)
 pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);

+ 2 - 0
include/starpu_stdlib.h

@@ -36,6 +36,8 @@ int starpu_free(void *A);
 int starpu_malloc_flags(void **A, size_t dim, int flags);
 int starpu_free_flags(void *A, size_t dim, int flags);
 
+ssize_t starpu_memory_get_available(unsigned node);
+
 #ifdef __cplusplus
 }
 #endif

+ 8 - 8
include/starpu_util.h

@@ -87,20 +87,20 @@ extern "C"
 } while(0)
 
 #if defined(STARPU_HAVE_STRERROR_R)
-#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (STARPU_UNLIKELY(err != 0)) { \
+#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
-			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
+			fprintf(stderr, "[starpu] Unexpected value: <%d:%s> returned for " message "\n", err, xmessage, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
-#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (STARPU_UNLIKELY(err != value)) { \
+#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
-			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
+			fprintf(stderr, "[starpu] Unexpected value: <%d!=%d:%s> returned for " message "\n", err, value, xmessage, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
 #else
-#  define STARPU_CHECK_RETURN_VALUE(err, message) {if (STARPU_UNLIKELY(err != 0)) {		\
-			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
+#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
+			fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
-#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (STARPU_UNLIKELY(err != value)) { \
-			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
+#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
+	       		fprintf(stderr, "[starpu] Unexpected value: <%d != %d> returned for " message "\n", err, value, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
 #endif /* STARPU_HAVE_STRERROR_R */
 

+ 34 - 0
include/starpu_worker.h

@@ -34,6 +34,40 @@ enum starpu_archtype
 	STARPU_OPENCL_WORKER  /* OpenCL device */
 };
 
+struct starpu_sched_ctx_iterator
+{
+	int cursor;
+};
+
+
+/* generic structure used by the scheduling contexts to iterate the workers */
+struct starpu_worker_collection
+{
+	/* hidden data structure used to memorize the workers */
+	void *workerids;
+	/* the number of workers in the collection */
+	unsigned nworkers;
+	/* the type of structure (STARPU_WORKER_LIST,...) */
+	int type;
+	/* checks if there is another element in collection */
+	unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
+	/* return the next element in the collection */
+	int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
+	/* add a new element in the collection */
+	int (*add)(struct starpu_worker_collection *workers, int worker);
+	/* remove an element from the collection */
+	int (*remove)(struct starpu_worker_collection *workers, int worker);
+	/* initialize the structure */
+	void (*init)(struct starpu_worker_collection *workers);
+	/* free the structure */
+	void (*deinit)(struct starpu_worker_collection *workers);
+	/* initialize the cursor if there is one */
+	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
+};
+
+/* types of structures the worker collection can implement */
+#define STARPU_WORKER_LIST 0
+
 /* This function returns the number of workers (ie. processing units executing
  * StarPU tasks). The returned value should be at most STARPU_NMAXWORKERS. */
 unsigned starpu_worker_get_count(void);

+ 2 - 2
mpi/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2009-2013  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -17,7 +17,7 @@
 SUBDIRS=src tests examples
 
 pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc
+pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc starpumpi-1.1.pc
 
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 					\

+ 29 - 0
mpi/starpumpi-1.1.pc.in

@@ -0,0 +1,29 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2011  Université de Bordeaux 1
+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: starpumpi
+Description: offers MPI support for heterogeneous multicore architecture
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@
+Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@
+Libs.private: @LDFLAGS@ @LIBS@
+Requires: starpu-1.0
+Requires.private:

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c

@@ -72,7 +72,7 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 	for(i = 0; i < *nworkers; i++)
 		curr_workers[i] = -1;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int index;
 	int worker;
 	int considered = 0;

+ 6 - 6
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -25,7 +25,7 @@ static int _compute_priority(unsigned sched_ctx)
 
 	int total_priority = 0;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
@@ -113,7 +113,7 @@ int* _get_first_workers(unsigned sched_ctx, int *nworkers, enum starpu_archtype
 	for(i = 0; i < *nworkers; i++)
 		curr_workers[i] = -1;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int index;
 	int worker;
 	int considered = 0;
@@ -180,7 +180,7 @@ int* _get_first_workers(unsigned sched_ctx, int *nworkers, enum starpu_archtype
 /* get the number of workers in the context that are allowed to be moved (that are not fixed) */
 unsigned _get_potential_nworkers(struct sched_ctx_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_archtype arch)
 {
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 
 	unsigned potential_workers = 0;
 	int worker;
@@ -304,7 +304,7 @@ unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
 
 static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype req_arch)
 {
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
         int worker;
 
 	double avg = 0.0;
@@ -330,7 +330,7 @@ static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_
 
 static double _get_ispeed_sample_for_sched_ctx(unsigned sched_ctx)
 {
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctx);
         
 	int worker;
@@ -485,7 +485,7 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 static double _get_best_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
 {
 	double ret_val = 0.0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
         int worker;
 
 	struct starpu_sched_ctx_iterator it;

+ 3 - 3
sched_ctx_hypervisor/src/hypervisor_policies/simple_policy.c

@@ -23,7 +23,7 @@ static int _compute_priority(unsigned sched_ctx)
 
 	int total_priority = 0;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int worker;
 	
 	starpu_iterator it;
@@ -80,7 +80,7 @@ int* _get_first_workers(unsigned sched_ctx, unsigned *nworkers, enum starpu_arch
 	for(i = 0; i < *nworkers; i++)
 		curr_workers[i] = -1;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int index;
 	int worker;
 	int considered = 0;
@@ -146,7 +146,7 @@ int* _get_first_workers(unsigned sched_ctx, unsigned *nworkers, enum starpu_arch
 
 static unsigned _get_potential_nworkers(struct sched_ctx_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_archtype arch)
 {
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 
 	unsigned potential_workers = 0;
 	int worker;

+ 3 - 3
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -341,7 +341,7 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 static double _get_best_total_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
 {
 	double ret_val = 0.0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
         int worker;
 
 	struct starpu_sched_ctx_iterator it;
@@ -388,7 +388,7 @@ double _get_ref_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc
 	double ref_velocity = 0.0;
 	unsigned nw = 0;
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
@@ -439,7 +439,7 @@ static void _get_cpus(int *workers, int nworkers, int *cpus, int *ncpus)
 int sched_ctx_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch)
 {
 	int nworkers_ctx = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;

+ 1 - 1
socl/Makefile.am

@@ -20,7 +20,7 @@ EXTRA_DIST = README
 libsocl_la_includedir=$(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)/socl/CL
 
 pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = socl-1.0.pc
+pkgconfig_DATA = socl-1.0.pc socl-1.1.pc
 
 showcheck:
 	for i in $(SUBDIRS) ; do \

+ 29 - 0
socl/socl-1.1.pc.in

@@ -0,0 +1,29 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2011  Université de Bordeaux 1
+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: socl
+Description: offers OpenCL implementation on top of StarPU
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@/socl
+Libs: -L${libdir} -lsocl-@STARPU_EFFECTIVE_VERSION@
+Libs.private: @LDFLAGS@ @LIBS@
+Requires: starpu-1.0
+Requires.private:

+ 3 - 1
src/common/starpu_spinlock.h

@@ -30,13 +30,15 @@ struct _starpu_spinlock
 #elif defined(STARPU_SPINLOCK_CHECK)
 	pthread_mutexattr_t errcheck_attr;
 	_starpu_pthread_mutex_t errcheck_lock;
-	const char *last_taker;
 #elif defined(HAVE_PTHREAD_SPIN_LOCK)
 	_starpu_pthread_spinlock_t lock;
 #else
 	/* we only have a trivial implementation yet ! */
 	uint32_t taken __attribute__ ((aligned(16)));
 #endif
+#ifdef STARPU_SPINLOCK_CHECK
+	const char *last_taker;
+#endif
 };
 
 int _starpu_spin_init(struct _starpu_spinlock *lock);

+ 18 - 3
src/core/dependencies/implicit_data_deps.c

@@ -90,8 +90,13 @@ static void _starpu_add_reader_after_writer(starpu_data_handle_t handle, struct
 		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_writer_id, pre_sync_task);
 	}
 
-	if (!pre_sync_task->cl)
+	if (!pre_sync_task->cl) {
+		/* Add a reference to be released in _starpu_handle_job_termination */
+		_starpu_spin_lock(&handle->header_lock);
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_get_job_associated_to_task(pre_sync_task)->implicit_dep_handle = handle;
+	}
 }
 
 /* Write after Read (WAR) */
@@ -155,8 +160,13 @@ static void _starpu_add_writer_after_readers(starpu_data_handle_t handle, struct
 	handle->last_submitted_readers = NULL;
 	handle->last_submitted_writer = post_sync_task;
 
-	if (!post_sync_task->cl)
+	if (!post_sync_task->cl) {
+		/* Add a reference to be released in _starpu_handle_job_termination */
+		_starpu_spin_lock(&handle->header_lock);
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle;
+	}
 }
 
 /* Write after Write (WAW) */
@@ -199,8 +209,13 @@ static void _starpu_add_writer_after_writer(starpu_data_handle_t handle, struct
 
 	handle->last_submitted_writer = post_sync_task;
 
-	if (!post_sync_task->cl)
+	if (!post_sync_task->cl) {
+		/* Add a reference to be released in _starpu_handle_job_termination */
+		_starpu_spin_lock(&handle->header_lock);
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle;
+	}
 }
 
 /* This function adds the implicit task dependencies introduced by data

+ 9 - 2
src/core/jobs.c

@@ -173,8 +173,15 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	/* Task does not have a cl, but has explicit data dependencies, we need
 	 * to tell them that we will not exist any more before notifying the
 	 * tasks waiting for us */
-	if (j->implicit_dep_handle)
-		_starpu_release_data_enforce_sequential_consistency(j->task, j->implicit_dep_handle);
+	if (j->implicit_dep_handle) {
+		starpu_data_handle_t handle = j->implicit_dep_handle;
+		_starpu_release_data_enforce_sequential_consistency(j->task, handle);
+		/* Release reference taken while setting implicit_dep_handle */
+		_starpu_spin_lock(&handle->header_lock);
+		handle->busy_count--;
+		if (!_starpu_data_check_not_busy(handle))
+			_starpu_spin_unlock(&handle->header_lock);
+	}
 
 	/* in case there are dependencies, wake up the proper tasks */
 	_starpu_notify_dependencies(j);

+ 13 - 13
src/core/sched_ctx.c

@@ -20,7 +20,7 @@
 
 _starpu_pthread_mutex_t changing_ctx_mutex[STARPU_NMAX_SCHED_CTXS];
 
-extern struct starpu_sched_ctx_worker_collection worker_list;
+extern struct starpu_worker_collection worker_list;
 static _starpu_pthread_mutex_t sched_ctx_manag = _STARPU_PTHREAD_MUTEX_INITIALIZER;
 static _starpu_pthread_mutex_t finished_submit_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
 struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER;
@@ -133,7 +133,7 @@ void starpu_sched_ctx_stop_task_submission()
 static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers,
 				       int *added_workers, int *n_added_workers)
 {
-	struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
 
 	int nworkers_to_add = nworkers == -1 ? (int)config->topology.nworkers : nworkers;
@@ -181,7 +181,7 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids,
 						  int nworkers, int *removed_workers, int *n_removed_workers)
 {
-	struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 
 	int i = 0;
 
@@ -807,14 +807,14 @@ void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id)
 	return sched_ctx->policy_data;
 }
 
-struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int worker_collection_type)
+struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int worker_collection_type)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	sched_ctx->workers = (struct starpu_sched_ctx_worker_collection*)malloc(sizeof(struct starpu_sched_ctx_worker_collection));
+	sched_ctx->workers = (struct starpu_worker_collection*)malloc(sizeof(struct starpu_worker_collection));
 
 	switch(worker_collection_type)
 	{
-	case STARPU_SCHED_CTX_WORKER_LIST:
+	case STARPU_WORKER_LIST:
 		sched_ctx->workers->has_next = worker_list.has_next;
 		sched_ctx->workers->get_next = worker_list.get_next;
 		sched_ctx->workers->add = worker_list.add;
@@ -822,7 +822,7 @@ struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_create_worker_collec
 		sched_ctx->workers->init = worker_list.init;
 		sched_ctx->workers->deinit = worker_list.deinit;
 		sched_ctx->workers->init_iterator = worker_list.init_iterator;
-		sched_ctx->workers->type = STARPU_SCHED_CTX_WORKER_LIST;
+		sched_ctx->workers->type = STARPU_WORKER_LIST;
 		break;
 	}
 
@@ -831,7 +831,7 @@ struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_create_worker_collec
 
 static unsigned _get_workers_list(struct _starpu_sched_ctx *sched_ctx, int **workerids)
 {
-	struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 	*workerids = (int*)malloc(workers->nworkers*sizeof(int));
 	int worker;
 	unsigned nworkers = 0;
@@ -854,7 +854,7 @@ void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id)
 	free(sched_ctx->workers);
 }
 
-struct starpu_sched_ctx_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id)
+struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	return sched_ctx->workers;
@@ -864,7 +864,7 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
-	struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 	int worker;
 
 	int npus = 0;
@@ -903,8 +903,8 @@ unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sc
         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
         struct _starpu_sched_ctx *sched_ctx2 = _starpu_get_sched_ctx_struct(sched_ctx_id2);
 
-        struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
-        struct starpu_sched_ctx_worker_collection *workers2 = sched_ctx2->workers;
+        struct starpu_worker_collection *workers = sched_ctx->workers;
+        struct starpu_worker_collection *workers2 = sched_ctx2->workers;
         int worker, worker2;
         int shared_workers = 0;
 
@@ -940,7 +940,7 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 /* 	} */
         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
-        struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+        struct starpu_worker_collection *workers = sched_ctx->workers;
         int worker;
 
 	struct starpu_sched_ctx_iterator it;

+ 1 - 1
src/core/sched_ctx.h

@@ -46,7 +46,7 @@ struct _starpu_sched_ctx
 	/* data necessary for the policy */
 	void *policy_data;
 
-	struct starpu_sched_ctx_worker_collection *workers;
+	struct starpu_worker_collection *workers;
 
 	/* we keep an initial sched which we never delete */
 	unsigned is_initial_sched;

+ 1 - 1
src/core/sched_policy.c

@@ -296,7 +296,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 static int _starpu_nworkers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx)
 {
 	int worker = -1, nworkers = 0;
-	struct starpu_sched_ctx_worker_collection *workers = sched_ctx->workers;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)

+ 1 - 1
src/core/workers.c

@@ -1310,7 +1310,7 @@ int starpu_worker_get_nids_ctx_free_by_type(enum starpu_archtype type, int *work
 			{
 				if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
 				{
-					struct starpu_sched_ctx_worker_collection *workers = config.sched_ctxs[s].workers;
+					struct starpu_worker_collection *workers = config.sched_ctxs[s].workers;
 					struct starpu_sched_ctx_iterator it;
 					if(workers->init_iterator)
 						workers->init_iterator(workers, &it);

+ 8 - 0
src/datawizard/memory_manager.c

@@ -68,3 +68,11 @@ void _starpu_memory_manager_deallocate_size(size_t size, unsigned node)
 {
 	used_size[node] -= size;
 }
+
+ssize_t starpu_memory_get_available(unsigned node)
+{
+	if (global_size[node] == 0)
+		return -1;
+	else
+		return global_size[node] - used_size[node];
+}

+ 4 - 0
src/drivers/cuda/driver_cuda.c

@@ -78,6 +78,10 @@ static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid)
 	size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
 	char name[30];
 
+#ifdef STARPU_USE_CUDA
+	global_mem[devid] = props[devid].totalGlobalMem;
+#endif
+
 	limit = starpu_get_env_number("STARPU_LIMIT_CUDA_MEM");
 	if (limit == -1)
 	{

+ 4 - 4
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -375,7 +375,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
 	unsigned best_impl = 0;
 	unsigned nimpl;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)
@@ -496,7 +496,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
 	starpu_task_bundle_t bundle = task->bundle;
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)
@@ -615,7 +615,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 	int forced_impl = -1;
 
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	unsigned nworkers_ctx = workers->nworkers;
 	double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
 	double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS];
@@ -817,7 +817,7 @@ static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned
 
 static void initialize_dmda_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)malloc(sizeof(struct _starpu_dmda_data));
 	dt->alpha = _STARPU_DEFAULT_ALPHA;

+ 4 - 4
src/sched_policies/detect_combined_workers.c

@@ -85,7 +85,7 @@ static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned min,
 				unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
 				if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
 					sched_ctx_id = 0;
-				struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+				struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 				_STARPU_DEBUG("Adding it\n");
 				ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers);
@@ -138,7 +138,7 @@ static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned
 		if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
 			sched_ctx_id = 0;
 
-		struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+		struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 		int newworkerid = starpu_combined_worker_assign_workerid(nworkers, cpu_workers);
 		STARPU_ASSERT(newworkerid >= 0);
@@ -200,7 +200,7 @@ static void find_and_assign_combinations_without_hwloc(int *workerids, int nwork
 	int min;
 	int max;
 
-	struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 	/* We put the id of all CPU workers in this array */
 	int cpu_workers[STARPU_NMAXWORKERS];
@@ -253,7 +253,7 @@ static void combine_all_cpu_workers(int *workerids, int nworkers)
 	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
 	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
 		sched_ctx_id = 0;
-	struct starpu_sched_ctx_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	int cpu_workers[STARPU_NMAXWORKERS];
 	int ncpus = 0;
 	struct _starpu_worker *worker;

+ 2 - 2
src/sched_policies/eager_central_policy.c

@@ -32,7 +32,7 @@ struct _starpu_eager_center_policy_data
 
 static void initialize_eager_center_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)malloc(sizeof(struct _starpu_eager_center_policy_data));
 
@@ -86,7 +86,7 @@ static int push_task_eager_policy(struct starpu_task *task)
 	/*if there are no tasks block */
 	/* wake people waiting for a task */
 	unsigned worker = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)

+ 3 - 3
src/sched_policies/eager_central_priority_policy.c

@@ -76,7 +76,7 @@ static void _starpu_destroy_priority_taskq(struct _starpu_priority_taskq *priori
 
 static void initialize_eager_center_priority_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)malloc(sizeof(struct _starpu_eager_central_prio_data));
 
 	/* In this policy, we support more than two levels of priority. */
@@ -135,7 +135,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 	/*if there are no tasks block */
 	/* wake people waiting for a task */
 	unsigned worker = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)
@@ -223,7 +223,7 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 	{
 		/* Notify another worker to do that task */
 		unsigned worker = 0;
-		struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+		struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 		struct starpu_sched_ctx_iterator it;
 		if(workers->init_iterator)

+ 2 - 2
src/sched_policies/parallel_eager.c

@@ -128,7 +128,7 @@ static void peager_remove_workers(unsigned sched_ctx_id, int *workerids, unsigne
 
 static void initialize_peager_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)malloc(sizeof(struct _starpu_peager_data));
 	/* masters pick tasks from that queue */
@@ -168,7 +168,7 @@ static int push_task_peager_policy(struct starpu_task *task)
 	}
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	int worker = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)

+ 43 - 5
src/sched_policies/parallel_heft.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012 inria
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -185,10 +185,23 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
 static double compute_expected_end(int workerid, double length)
 {
+	_starpu_pthread_mutex_t *sched_mutex;
+	_starpu_pthread_cond_t *sched_cond;
+
+	starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
+
 	if (!starpu_worker_is_combined_worker(workerid))
 	{
+		double res;
 		/* This is a basic worker */
-		return worker_exp_start[workerid] + worker_exp_len[workerid] + length;
+
+		VALGRIND_HG_MUTEX_LOCK_PRE(sched_mutex, 0);
+		VALGRIND_HG_MUTEX_LOCK_POST(sched_mutex);
+		res = worker_exp_start[workerid] + worker_exp_len[workerid] + length;
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(sched_mutex);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(sched_mutex);
+
+		return res;
 	}
 	else
 	{
@@ -199,6 +212,9 @@ static double compute_expected_end(int workerid, double length)
 
 		double exp_end = DBL_MIN;
 
+		VALGRIND_HG_MUTEX_LOCK_PRE(sched_mutex, 0);
+		VALGRIND_HG_MUTEX_LOCK_POST(sched_mutex);
+
 		int i;
 		for (i = 0; i < worker_size; i++)
 		{
@@ -208,6 +224,9 @@ static double compute_expected_end(int workerid, double length)
 			exp_end = STARPU_MAX(exp_end, local_exp_end);
 		}
 
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(sched_mutex);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(sched_mutex);
+
 		return exp_end;
 	}
 }
@@ -215,10 +234,23 @@ static double compute_expected_end(int workerid, double length)
 static double compute_ntasks_end(int workerid)
 {
 	enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(workerid);
+	_starpu_pthread_mutex_t *sched_mutex;
+	_starpu_pthread_cond_t *sched_cond;
+
+	starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
+
 	if (!starpu_worker_is_combined_worker(workerid))
 	{
+		double res;
 		/* This is a basic worker */
-		return ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch);
+
+		VALGRIND_HG_MUTEX_LOCK_PRE(sched_mutex, 0);
+		VALGRIND_HG_MUTEX_LOCK_POST(sched_mutex);
+		res = ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch);
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(sched_mutex);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(sched_mutex);
+
+		return res;
 	}
 	else
 	{
@@ -229,6 +261,9 @@ static double compute_ntasks_end(int workerid)
 
 		int ntasks_end=0;
 
+		VALGRIND_HG_MUTEX_LOCK_PRE(sched_mutex, 0);
+		VALGRIND_HG_MUTEX_LOCK_POST(sched_mutex);
+
 		int i;
 		for (i = 0; i < worker_size; i++)
 		{
@@ -236,6 +271,9 @@ static double compute_ntasks_end(int workerid)
 			ntasks_end = STARPU_MAX(ntasks_end, (int) ((double) ntasks[combined_workerid[i]] / starpu_worker_get_relative_speedup(perf_arch)));
 		}
 
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(sched_mutex);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(sched_mutex);
+
 		return ntasks_end;
 	}
 }
@@ -244,7 +282,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 {
 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	unsigned nworkers_ctx = workers->nworkers;
 
 	unsigned worker, worker_ctx = 0;
@@ -535,7 +573,7 @@ static void parallel_heft_add_workers(unsigned sched_ctx_id, int *workerids, uns
 
 static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)malloc(sizeof(struct _starpu_pheft_data));
 	hd->alpha = _STARPU_DEFAULT_ALPHA;
 	hd->beta = _STARPU_DEFAULT_BETA;

+ 2 - 2
src/sched_policies/random_policy.c

@@ -34,7 +34,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 	double alpha_sum = 0.0;
 
 	unsigned sched_ctx_id = task->sched_ctx;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
         int worker;
 	struct starpu_sched_ctx_iterator it;
         if(workers->init_iterator)
@@ -103,7 +103,7 @@ static int random_push_task(struct starpu_task *task)
 
 static void initialize_random_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	starpu_srand48(time(NULL));
 }
 

+ 20 - 5
src/sched_policies/work_stealing_policy.c

@@ -59,10 +59,25 @@ static unsigned select_victim_round_robin(unsigned sched_ctx_id)
 	unsigned worker = ws->last_pop_worker;
 	unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
 
+	_starpu_pthread_mutex_t *victim_sched_mutex;
+	_starpu_pthread_cond_t *victim_sched_cond;
+
 	/* If the worker's queue is empty, let's try
 	 * the next ones */
-	while (!ws->queue_array[worker]->njobs)
+	while (1)
 	{
+		unsigned njobs;
+
+		starpu_worker_get_sched_condition(worker, &victim_sched_mutex, &victim_sched_cond);
+		VALGRIND_HG_MUTEX_LOCK_PRE(victim_sched_mutex, 0);
+		VALGRIND_HG_MUTEX_LOCK_POST(victim_sched_mutex);
+		njobs = ws->queue_array[worker]->njobs;
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(victim_sched_mutex);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(victim_sched_mutex);
+
+		if (njobs)
+			break;
+
 		worker = (worker + 1) % nworkers;
 		if (worker == ws->last_pop_worker)
 		{
@@ -146,7 +161,7 @@ static unsigned select_victim_overload(unsigned sched_ctx_id)
 	if (performed_total < calibration_value)
 		return select_victim_round_robin(sched_ctx_id);
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 	struct starpu_sched_ctx_iterator it;
         if(workers->init_iterator)
@@ -186,7 +201,7 @@ static unsigned select_worker_overload(unsigned sched_ctx_id)
 	if (performed_total < calibration_value)
 		return select_worker_round_robin(sched_ctx_id);
 
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 
 	struct starpu_sched_ctx_iterator it;
         if(workers->init_iterator)
@@ -335,7 +350,7 @@ int ws_push_task(struct starpu_task *task)
         }
 
 	unsigned worker = 0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	struct starpu_sched_ctx_iterator it;
 	if(workers->init_iterator)
 		workers->init_iterator(workers, &it);
@@ -420,7 +435,7 @@ static void ws_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nw
 
 static void initialize_ws_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_SCHED_CTX_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)malloc(sizeof(struct _starpu_work_stealing_data));
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)ws);

+ 10 - 10
src/worker_collection/worker_list.c

@@ -19,7 +19,7 @@
 #include <starpu.h>
 #include <pthread.h>
 
-static unsigned list_has_next(struct starpu_sched_ctx_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
+static unsigned list_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
 {
 	int nworkers = (int)workers->nworkers;
 	STARPU_ASSERT(it != NULL);
@@ -31,7 +31,7 @@ static unsigned list_has_next(struct starpu_sched_ctx_worker_collection *workers
 	return ret;
 }
 
-static int list_get_next(struct starpu_sched_ctx_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
+static int list_get_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
 {
 	int *workerids = (int *)workers->workerids;
 	int nworkers = (int)workers->nworkers;
@@ -43,7 +43,7 @@ static int list_get_next(struct starpu_sched_ctx_worker_collection *workers, str
 	return ret;
 }
 
-static unsigned _worker_belongs_to_ctx(struct starpu_sched_ctx_worker_collection *workers, int workerid)
+static unsigned _worker_belongs_to_ctx(struct starpu_worker_collection *workers, int workerid)
 {
 	int *workerids = (int *)workers->workerids;
 	unsigned nworkers = workers->nworkers;
@@ -57,7 +57,7 @@ static unsigned _worker_belongs_to_ctx(struct starpu_sched_ctx_worker_collection
 	return 0;
 }
 
-static int list_add(struct starpu_sched_ctx_worker_collection *workers, int worker)
+static int list_add(struct starpu_worker_collection *workers, int worker)
 {
 	int *workerids = (int *)workers->workerids;
 	unsigned *nworkers = &workers->nworkers;
@@ -105,7 +105,7 @@ static void _rearange_workerids(int *workerids, int old_nworkers)
 	  }
 }
 
-static int list_remove(struct starpu_sched_ctx_worker_collection *workers, int worker)
+static int list_remove(struct starpu_worker_collection *workers, int worker)
 {
 	int *workerids = (int *)workers->workerids;
 	unsigned nworkers = workers->nworkers;
@@ -137,7 +137,7 @@ static void _init_workers(int *workerids)
 	return;
 }
 
-static void list_init(struct starpu_sched_ctx_worker_collection *workers)
+static void list_init(struct starpu_worker_collection *workers)
 {
 	int *workerids = (int*)malloc(STARPU_NMAXWORKERS * sizeof(int));
 	_init_workers(workerids);
@@ -148,17 +148,17 @@ static void list_init(struct starpu_sched_ctx_worker_collection *workers)
 	return;
 }
 
-static void list_deinit(struct starpu_sched_ctx_worker_collection *workers)
+static void list_deinit(struct starpu_worker_collection *workers)
 {
 	free(workers->workerids);
 }
 
-static void list_init_iterator(struct starpu_sched_ctx_worker_collection *workers STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_ctx_iterator *it)
+static void list_init_iterator(struct starpu_worker_collection *workers STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_ctx_iterator *it)
 {
 	*((int*)it) = 0;
 }
 
-struct starpu_sched_ctx_worker_collection worker_list =
+struct starpu_worker_collection worker_list =
 {
 	.has_next = list_has_next,
 	.get_next = list_get_next,
@@ -167,6 +167,6 @@ struct starpu_sched_ctx_worker_collection worker_list =
 	.init = list_init,
 	.deinit = list_deinit,
 	.init_iterator = list_init_iterator,
-	.type = STARPU_SCHED_CTX_WORKER_LIST
+	.type = STARPU_WORKER_LIST
 };
 

+ 34 - 0
starpu-1.1.pc.in

@@ -0,0 +1,34 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+pkglibdir=@pkglibdir@
+includedir=@includedir@
+
+# When the GCC plug-in is available, the following lines indicate
+# where it is installed.
+@GCC_PLUGIN_DIR_PKGCONFIG@
+@GCC_PLUGIN_PKGCONFIG@
+
+Name: starpu
+Description: offers support for heterogeneous multicore architecture
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
+Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_OPENCL_LDFLAGS@ @STARPU_CUDA_LDFLAGS@ @STARPU_SCHED_CTX_HYPERVISOR@
+Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
+Requires: @HWLOC_REQUIRES@

+ 2 - 2
starpufft/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2009-2013  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -40,7 +40,7 @@ versinclude_HEADERS = 				\
 	starpufft.h
 
 pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libstarpufft.pc starpufft-1.0.pc
+pkgconfig_DATA = libstarpufft.pc starpufft-1.0.pc starpufft-1.1.pc
 
 libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c
 libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(FFTW_LIBS) $(FFTWF_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS)

+ 27 - 0
starpufft/starpufft-1.1.pc.in

@@ -0,0 +1,27 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2012  Université de Bordeaux 1
+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: starpufft
+Description: offers support for heterogeneous multicore architecture
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
+Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ 
+Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@

+ 126 - 19
tests/datawizard/allocate.c

@@ -27,31 +27,105 @@ int main(int argc, char **argv)
 }
 #else
 
-int main(int argc, char **argv)
+int test_prefetch(unsigned memnodes)
 {
 	int ret;
-	float *buffer;
-	float *buffer2;
-	float *buffer3;
-	size_t global_size;
+	float *buffers[4];
+	starpu_data_handle_t handles[4];
+	unsigned i;
+	ssize_t available_size;
 
-	setenv("STARPU_LIMIT_CUDA_MEM", "1", 1);
-	setenv("STARPU_LIMIT_OPENCL_MEM", "1", 1);
-	setenv("STARPU_LIMIT_CPU_MEM", "1", 1);
+	buffers[0] = malloc(1*1024*512);
+	STARPU_ASSERT(buffers[0]);
 
-        ret = starpu_init(NULL);
-	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	starpu_variable_data_register(&handles[0], 0, (uintptr_t)buffers[0], 1*1024*512);
+	for(i=1 ; i<memnodes ; i++)
+	{
+		starpu_data_prefetch_on_node(handles[0], i, 0);
+	}
+
+	for(i=1 ; i<memnodes ; i++)
+	{
+		available_size = starpu_memory_get_available(i);
+		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+		STARPU_CHECK_RETURN_VALUE_IS((int) available_size, 1*1024*512, "starpu_memory_get_available (node %u)", i);
+	}
+
+	buffers[1] = malloc(1*1024*256);
+	STARPU_ASSERT(buffers[1]);
+
+	starpu_variable_data_register(&handles[1], 0, (uintptr_t)buffers[1], 1*1024*256);
+	for(i=1 ; i<memnodes ; i++)
+	{
+		starpu_data_prefetch_on_node(handles[1], i, 0);
+	}
+
+	for(i=1 ; i<memnodes ; i++)
+	{
+		available_size = starpu_memory_get_available(i);
+		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+		STARPU_CHECK_RETURN_VALUE_IS((int)available_size, 1*1024*256, "starpu_memory_get_available (node %u)", i);
+	}
+
+	buffers[2] = malloc(1*1024*600);
+	STARPU_ASSERT(buffers[2]);
+
+	starpu_variable_data_register(&handles[2], 0, (uintptr_t)buffers[2], 1*1024*600);
+	for(i=1 ; i<memnodes ; i++)
+	{
+		starpu_data_prefetch_on_node(handles[2], i, 0);
+	}
+
+	for(i=1 ; i<memnodes ; i++)
+	{
+		available_size = starpu_memory_get_available(i);
+		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+		// here, we do not know which data has been cleaned, we cannot test the exact amout of available memory
+		STARPU_CHECK_RETURN_VALUE((available_size == 0), "starpu_memory_get_available (node %u)", i);
+	}
+
+	buffers[3] = malloc(1*1024*512);
+	STARPU_ASSERT(buffers[3]);
+
+	starpu_variable_data_register(&handles[3], 0, (uintptr_t)buffers[3], 1*1024*512);
+	for(i=0 ; i<memnodes ; i++)
+	{
+		starpu_data_prefetch_on_node(handles[3], i, 0);
+	}
 
-	global_size = _starpu_memory_manager_get_global_memory_size(0);
-	if (global_size == 0)
+	for(i=1 ; i<memnodes ; i++)
 	{
-		FPRINTF(stderr, "Global memory size unavailable, skip the test\n");
-		starpu_shutdown();
-		return STARPU_TEST_SKIPPED;
+		available_size = starpu_memory_get_available(i);
+		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+		STARPU_CHECK_RETURN_VALUE_IS((int)available_size, 1*1024*512, "starpu_memory_get_available (node %u)", i);
 	}
-	STARPU_CHECK_RETURN_VALUE_IS((int)global_size, 1*1024*1024, "_starpu_memory_manager_get_global_memory_size");
-	FPRINTF(stderr, "Available memory size on node 0: %ld\n", global_size);
+
+	for(i=0 ; i<4 ; i++)
+	{
+		free(buffers[i]);
+		starpu_data_unregister(handles[i]);
+	}
+
+#ifdef STARPU_DEVEL
+#warning is is normal that all memory has not been cleaned here? i was assuming the available memory to be 1G
+#endif
+//	for(i=1 ; i<memnodes ; i++)
+//	{
+//		available_size = starpu_memory_get_available(i);
+//		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+//		STARPU_CHECK_RETURN_VALUE_IS((int)available_size, 1*1024*1024, "starpu_memory_get_available (node %u)", i);
+//	}
+
+	return 0;
+}
+
+void test_malloc()
+{
+	int ret;
+	float *buffer;
+	float *buffer2;
+	float *buffer3;
+	size_t global_size;
 
 	ret = starpu_malloc_flags((void **)&buffer, 1, STARPU_MALLOC_COUNT);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc_flags");
@@ -79,9 +153,42 @@ int main(int argc, char **argv)
 
 	starpu_free_flags(buffer3, 1*1024*512, STARPU_MALLOC_COUNT);
 	starpu_free_flags(buffer, 1, STARPU_MALLOC_COUNT);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	unsigned memnodes, i;
+	ssize_t available_size;
+
+	setenv("STARPU_LIMIT_CUDA_MEM", "1", 1);
+	setenv("STARPU_LIMIT_OPENCL_MEM", "1", 1);
+	setenv("STARPU_LIMIT_CPU_MEM", "1", 1);
+
+        ret = starpu_init(NULL);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	memnodes = starpu_memory_nodes_get_count();
+	for(i=0 ; i<memnodes ; i++)
+	{
+		available_size = starpu_memory_get_available(i);
+		if (available_size == -1)
+		{
+			FPRINTF(stderr, "Global memory size for node %u unavailable, skip the test\n", i);
+			starpu_shutdown();
+			return STARPU_TEST_SKIPPED;
+		}
+		FPRINTF(stderr, "Available memory size on node %u: %ld\n", i, available_size);
+		STARPU_CHECK_RETURN_VALUE_IS((int)available_size, 1*1024*1024, "starpu_memory_get_available (node %u)", i);
+	}
+
+	test_malloc();
+	ret = test_prefetch(memnodes);
 
 	starpu_shutdown();
-	return 0;
+	return ret;
 }
 
  #endif
+

+ 4 - 1
tests/errorcheck/invalid_blocking_calls.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -86,6 +86,7 @@ int main(int argc, char **argv)
 	task->tag_id = TAG;
 
 	task->callback_func = wrong_callback;
+	task->detach = 0;
 
 	ret = starpu_task_submit(task);
 	if (ret == -ENODEV) goto enodev;
@@ -100,6 +101,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
 
 	starpu_data_release(handle);
+
+	starpu_task_wait(task);
 	starpu_data_unregister(handle);
 	starpu_shutdown();
 

+ 2 - 0
tests/microbenchs/tasks_overhead.c

@@ -155,6 +155,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait");
 	gettimeofday(&end_exec, NULL);
 
+	starpu_task_wait_for_all();
+
 	for (i = 0; i < ntasks; i++)
 		starpu_task_clean(&tasks[i]);
 

+ 6 - 2
tests/sched_policies/simple_cpu_gpu_sched.c

@@ -143,11 +143,15 @@ run(struct starpu_sched_policy *policy)
 		exit(STARPU_TEST_SKIPPED);
 
 	/* At least 1 CPU and 1 GPU are needed. */
-	if (starpu_cpu_worker_get_count() == 0)
+	if (starpu_cpu_worker_get_count() == 0) {
+		starpu_shutdown();
 		exit(STARPU_TEST_SKIPPED);
+	}
 	if (starpu_cuda_worker_get_count() == 0 &&
-	    starpu_opencl_worker_get_count() == 0)
+	    starpu_opencl_worker_get_count() == 0) {
+		starpu_shutdown();
 		exit(STARPU_TEST_SKIPPED);
+	}
 
 	starpu_profiling_status_set(1);
 	init_perfmodels();