浏览代码

merge trunk up to 10397

Simon Archipoff 12 年之前
父节点
当前提交
71f958a95b

+ 14 - 13
doc/doxygen/chapters/api/modularized_scheduler.doxy

@@ -1,19 +1,19 @@
-!\defgroup API_Modularized_Scheduler
+/* !\defgroup API_Modularized_Scheduler
 
 
 \struct _starpu_sched_node
 \struct _starpu_sched_node
 \ingroup API_Modularized_Scheduler
 \ingroup API_Modularized_Scheduler
 This structure represent a scheduler module.
 This structure represent a scheduler module.
 
 
 \var _starpu_sched_node::push_task
 \var _starpu_sched_node::push_task
-	this function push a task in the scheduler module.
+	push a task in the scheduler module.
 \var _starpu_sched_node::pop_task
 \var _starpu_sched_node::pop_task
-	this function pop a task from the scheduler module, the task returned by this function is executable by the caller if its a worker
+	pop a task from the scheduler module, the task returned by this function is executable by the caller if its a worker
 \var _starpu_sched_node::available
 \var _starpu_sched_node::available
-	this function notify workers downstairs that a task is waiting for a pop
+	notify workers downstairs that a task is waiting for a pop
 \var _starpu_sched_node::estimated_load
 \var _starpu_sched_node::estimated_load
-	this function is an heuristic to compute load of scheduler module
+	is an heuristic to compute load of scheduler module
 \var _starpu_sched_node::estimated_execute_preds
 \var _starpu_sched_node::estimated_execute_preds
-	this function compute executions prediction for a task
+	compute executions prediction for a task
 \var _starpu_sched_node::nchilds
 \var _starpu_sched_node::nchilds
 	the number of modules downstairs
 	the number of modules downstairs
 \var _starpu_sched_node::childs
 \var _starpu_sched_node::childs
@@ -27,9 +27,9 @@ This structure represent a scheduler module.
 \var _starpu_sched_node::fathers
 \var _starpu_sched_node::fathers
 	the array of scheduler module above indexed by scheduling context index
 	the array of scheduler module above indexed by scheduling context index
 \var _starpu_sched_node::init_data
 \var _starpu_sched_node::init_data
-	this function is called after all the scheduler is created and should init data member
+	is called after all the scheduler is created and should init data member
 \var _starpu_sched_node::deinit_data
 \var _starpu_sched_node::deinit_data
-	this function is called just before _starpu_sched_node_destroy
+	is called just before _starpu_sched_node_destroy
 \var _starpu_sched_node::obj
 \var _starpu_sched_node::obj
 	the hwloc object associed to scheduler module
 	the hwloc object associed to scheduler module
 
 
@@ -60,13 +60,14 @@ this structure containt predictions for a task and is filled by _starpu_sched_no
 	this lock protect the worker member
 	this lock protect the worker member
 
 
 \fn struct _starpu_sched_node * _starpu_sched_node_create(void)
 \fn struct _starpu_sched_node * _starpu_sched_node_create(void)
-    	   this function return an initialised scheduler module with default values
+    	   return an initialised scheduler module with default values
 \fn void _starpu_sched_node_destroy(struct _starpu_sched_node * node)
 \fn void _starpu_sched_node_destroy(struct _starpu_sched_node * node)
-    	 this function free data allocated by _starpu_sched_node_create, but dont call node->deinit_data(node)
+    	 free data allocated by _starpu_sched_node_create, but dont call node->deinit_data(node)
 \fn void _starpu_sched_node_set_father(struct _starpu_sched_node * node, struct _starpu_sched_node * father_node, unsigned sched_ctx_id)
 \fn void _starpu_sched_node_set_father(struct _starpu_sched_node * node, struct _starpu_sched_node * father_node, unsigned sched_ctx_id)
-    	 this function set node->fathers[sched_ctx_id] to father_node
+    	 set node->fathers[sched_ctx_id] to father_node
 \fn void _starpu_sched_node_add_child(struct _starpu_sched_node* node, struct _starpu_sched_node * child)
 \fn void _starpu_sched_node_add_child(struct _starpu_sched_node* node, struct _starpu_sched_node * child)
-    	 this function add child to node->childs and increment nchilds as well
+    	 add child to node->childs and increment nchilds as well
 	 and dont modify child->fathers
 	 and dont modify child->fathers
 \fn void _starpu_sched_node_remove_child(struct _starpu_sched_node * node, struct _starpu_sched_node * child)
 \fn void _starpu_sched_node_remove_child(struct _starpu_sched_node * node, struct _starpu_sched_node * child)
-    	 this function remove child from node->childs and decrement nchilds
+    	 remove child from node->childs and decrement nchilds
+*/

+ 174 - 89
doc/doxygen/chapters/basic_examples.doxy

@@ -60,64 +60,143 @@ The header starpu.h should be included in any code using StarPU.
 
 
 \subsection DefiningACodelet Defining A Codelet
 \subsection DefiningACodelet Defining A Codelet
 
 
+A codelet is a structure that represents a computational kernel. Such a codelet
+may contain an implementation of the same kernel on different architectures
+(e.g. CUDA, x86, ...). For compatibility, make sure that the whole
+structure is properly initialized to zero, either by using the
+function starpu_codelet_init(), or by letting the
+compiler implicitly do it as examplified above.
+
+The field starpu_codelet::nbuffers specifies the number of data buffers that are
+manipulated by the codelet: here the codelet does not access or modify any data
+that is controlled by our data management library.
+
+We create a codelet which may only be executed on the CPUs. When a CPU
+core will execute a codelet, it will call the function
+<c>cpu_func</c>, which \em must have the following prototype:
+
+\code{.c}
+void (*cpu_func)(void *buffers[], void *cl_arg);
+\endcode
+
+In this example, we can ignore the first argument of this function which gives a
+description of the input and output buffers (e.g. the size and the location of
+the matrices) since there is none. We also ignore the second argument
+which is a pointer to optional arguments for the codelet.
+
 \code{.c}
 \code{.c}
-struct params
-{
-    int i;
-    float f;
-};
 void cpu_func(void *buffers[], void *cl_arg)
 void cpu_func(void *buffers[], void *cl_arg)
 {
 {
-    struct params *params = cl_arg;
-
-    printf("Hello world (params = {%i, %f} )\n", params->i, params->f);
+    printf("Hello world\n");
 }
 }
 
 
 struct starpu_codelet cl =
 struct starpu_codelet cl =
 {
 {
-    .where = STARPU_CPU,
     .cpu_funcs = { cpu_func, NULL },
     .cpu_funcs = { cpu_func, NULL },
-    .cpu_funcs_name = { "cpu_func", NULL },
     .nbuffers = 0
     .nbuffers = 0
 };
 };
 \endcode
 \endcode
 
 
-A codelet is a structure that represents a computational kernel. Such a codelet
-may contain an implementation of the same kernel on different architectures
-(e.g. CUDA, x86, ...). For compatibility, make sure that the whole
-structure is properly initialized to zero, either by using the
-function starpu_codelet_init(), or by letting the
-compiler implicitly do it as examplified above.
+\subsection SubmittingATask Submitting A Task
 
 
-The field starpu_codelet::nbuffers specifies the number of data buffers that are
-manipulated by the codelet: here the codelet does not access or modify any data
-that is controlled by our data management library. Note that the argument
-passed to the codelet (the parameter <c>cl_arg</c> of the function
-<c>cpu_func</c>) does not count as a buffer since it is not managed by
-our data management library, but just contain trivial parameters.
+Before submitting any tasks to StarPU, starpu_init() must be called. The
+<c>NULL</c> argument specifies that we use the default configuration. Tasks cannot
+be submitted after the termination of StarPU by a call to
+starpu_shutdown().
+
+In the example above, a task structure is allocated by a call to
+starpu_task_create(). This function only allocates and fills the
+corresponding structure with the default settings, but it does not
+submit the task to StarPU.
 
 
 \internal
 \internal
-TODO need a crossref to the proper description of "where" see bla for more ...
+not really clear ;)
 \endinternal
 \endinternal
 
 
-We create a codelet which may only be executed on the CPUs. The field
-starpu_codelet::where is a bitmask that defines where the codelet may
-be executed. Here, the value ::STARPU_CPU means that only CPUs can
-execute this codelet. Note that field starpu_codelet::where is
-optional, when unset its value is automatically set based on the
-availability of the different fields <c>XXX_funcs</c>.
-When a CPU core executes a codelet, it calls the function
-<c>cpu_func</c>, which \em must have the following prototype:
+The field starpu_task::cl is a pointer to the codelet which the task will
+execute: in other words, the codelet structure describes which computational
+kernel should be offloaded on the different architectures, and the task
+structure is a wrapper containing a codelet and the piece of data on which the
+codelet should operate.
+
+If the field starpu_task::synchronous is non-zero, task submission
+will be synchronous: the function starpu_task_submit() will not return
+until the task has been executed. Note that the function starpu_shutdown()
+does not guarantee that asynchronous tasks have been executed before
+it returns, starpu_task_wait_for_all() can be used to that effect, or
+data can be unregistered (starpu_data_unregister()), which will
+implicitly wait for all the tasks scheduled to work on it, unless
+explicitly disabled thanks to
+starpu_data_set_default_sequential_consistency_flag() or
+starpu_data_set_sequential_consistency_flag().
 
 
 \code{.c}
 \code{.c}
-void (*cpu_func)(void *buffers[], void *cl_arg);
+int main(int argc, char **argv)
+{
+    /* initialize StarPU */
+    starpu_init(NULL);
+
+    struct starpu_task *task = starpu_task_create();
+
+    task->cl = &cl; /* Pointer to the codelet defined above */
+
+    /* starpu_task_submit will be a blocking call. If unset,
+    starpu_task_wait() needs to be called after submitting the task. */
+    task->synchronous = 1;
+
+    /* submit the task to StarPU */
+    starpu_task_submit(task);
+
+    /* terminate StarPU */
+    starpu_shutdown();
+
+    return 0;
+}
 \endcode
 \endcode
 
 
-In this example, we can ignore the first argument of this function which gives a
-description of the input and output buffers (e.g. the size and the location of
-the matrices) since there is none.
-The second argument is a pointer to a buffer passed as an
-argument to the codelet by the means of the field starpu_task::cl_arg.
+\subsection ExecutionOfHelloWorld Execution Of Hello World
+
+\verbatim
+$ make hello_world
+cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
+$ ./hello_world
+Hello world
+\endverbatim
+
+\subsection PassingArgumentsToTheCodelet Passing Arguments To The Codelet
+
+The optional field starpu_task::cl_arg field is a pointer to a buffer
+(of size starpu_task::cl_arg_size) with some parameters for the kernel
+described by the codelet. For instance, if a codelet implements a
+computational kernel that multiplies its input vector by a constant,
+the constant could be specified by the means of this buffer, instead
+of registering it as a StarPU data. It must however be noted that
+StarPU avoids making copy whenever possible and rather passes the
+pointer as such, so the buffer which is pointed at must be kept allocated
+until the task terminates, and if several tasks are submitted with
+various parameters, each of them must be given a pointer to their
+own buffer.
+
+\code{.c}
+struct params
+{
+    int i;
+    float f;
+};
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+    struct params *params = cl_arg;
+
+    printf("Hello world (params = {%i, %f} )\n", params->i, params->f);
+}
+\endcode
+
+As said before, the field starpu_codelet::nbuffers specifies the
+number of data buffers that are manipulated by the codelet. It does
+not count the argument --- the parameter <c>cl_arg</c> of the function
+<c>cpu_func</c> --- since it is not managed by our data management
+library, but just contains trivial parameters.
 
 
 \internal
 \internal
 TODO rewrite so that it is a little clearer ?
 TODO rewrite so that it is a little clearer ?
@@ -130,14 +209,7 @@ buffer will be modified as well: this for instance implies that the buffer
 cannot be used as a synchronization medium. If synchronization is needed, data
 cannot be used as a synchronization medium. If synchronization is needed, data
 has to be registered to StarPU, see \ref VectorScalingUsingStarPUAPI.
 has to be registered to StarPU, see \ref VectorScalingUsingStarPUAPI.
 
 
-\subsection SubmittingATask Submitting A Task
-
 \code{.c}
 \code{.c}
-void callback_func(void *callback_arg)
-{
-    printf("Callback function (arg %x)\n", callback_arg);
-}
-
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
     /* initialize StarPU */
     /* initialize StarPU */
@@ -151,9 +223,6 @@ int main(int argc, char **argv)
     task->cl_arg = &params;
     task->cl_arg = &params;
     task->cl_arg_size = sizeof(params);
     task->cl_arg_size = sizeof(params);
 
 
-    task->callback_func = callback_func;
-    task->callback_arg = 0x42;
-
     /* starpu_task_submit will be a blocking call */
     /* starpu_task_submit will be a blocking call */
     task->synchronous = 1;
     task->synchronous = 1;
 
 
@@ -167,37 +236,14 @@ int main(int argc, char **argv)
 }
 }
 \endcode
 \endcode
 
 
-Before submitting any tasks to StarPU, starpu_init() must be called. The
-<c>NULL</c> argument specifies that we use the default configuration. Tasks cannot
-be submitted after the termination of StarPU by a call to
-starpu_shutdown().
-
-In the example above, a task structure is allocated by a call to
-starpu_task_create(). This function only allocates and fills the
-corresponding structure with the default settings, but it does not
-submit the task to StarPU.
-
-\internal
-not really clear ;)
-\endinternal
+\verbatim
+$ make hello_world
+cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
+$ ./hello_world
+Hello world (params = {1, 2.000000} )
+\endverbatim
 
 
-The field starpu_task::cl is a pointer to the codelet which the task will
-execute: in other words, the codelet structure describes which computational
-kernel should be offloaded on the different architectures, and the task
-structure is a wrapper containing a codelet and the piece of data on which the
-codelet should operate.
-
-The optional field starpu_task::cl_arg field is a pointer to a buffer
-(of size starpu_task::cl_arg_size) with some parameters for the kernel
-described by the codelet. For instance, if a codelet implements a
-computational kernel that multiplies its input vector by a constant,
-the constant could be specified by the means of this buffer, instead
-of registering it as a StarPU data. It must however be noted that
-StarPU avoids making copy whenever possible and rather passes the
-pointer as such, so the buffer which is pointed at must be kept allocated
-until the task terminates, and if several tasks are submitted with
-various parameters, each of them must be given a pointer to their
-own buffer.
+\subsection DefiningACallback Defining A Callback
 
 
 Once a task has been executed, an optional callback function
 Once a task has been executed, an optional callback function
 starpu_task::callback_func is called when defined.
 starpu_task::callback_func is called when defined.
@@ -210,27 +256,66 @@ function. The prototype of a callback function must be:
 void (*callback_function)(void *);
 void (*callback_function)(void *);
 \endcode
 \endcode
 
 
-If the field starpu_task::synchronous is non-zero, task submission
-will be synchronous: the function starpu_task_submit() will not return
-until the task has been executed. Note that the function starpu_shutdown()
-does not guarantee that asynchronous tasks have been executed before
-it returns, starpu_task_wait_for_all() can be used to that effect, or
-data can be unregistered (starpu_data_unregister()), which will
-implicitly wait for all the tasks scheduled to work on it, unless
-explicitly disabled thanks to
-starpu_data_set_default_sequential_consistency_flag() or
-starpu_data_set_sequential_consistency_flag().
+\code{.c}
+void callback_func(void *callback_arg)
+{
+    printf("Callback function (arg %x)\n", callback_arg);
+}
 
 
-\subsection ExecutionOfHelloWorld Execution Of Hello World
+int main(int argc, char **argv)
+{
+    /* initialize StarPU */
+    starpu_init(NULL);
+
+    struct starpu_task *task = starpu_task_create();
+
+    task->cl = &cl; /* Pointer to the codelet defined above */
+
+    task->callback_func = callback_func;
+    task->callback_arg = 0x42;
+
+    /* starpu_task_submit will be a blocking call */
+    task->synchronous = 1;
+
+    /* submit the task to StarPU */
+    starpu_task_submit(task);
+
+    /* terminate StarPU */
+    starpu_shutdown();
+
+    return 0;
+}
+\endcode
 
 
 \verbatim
 \verbatim
 $ make hello_world
 $ make hello_world
 cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
 cc $(pkg-config --cflags starpu-1.2)  $(pkg-config --libs starpu-1.2) hello_world.c -o hello_world
 $ ./hello_world
 $ ./hello_world
-Hello world (params = {1, 2.000000} )
+Hello world
 Callback function (arg 42)
 Callback function (arg 42)
 \endverbatim
 \endverbatim
 
 
+\subsection WhereToExecuteACodelet Where To Execute A Codelet
+
+\code{.c}
+struct starpu_codelet cl =
+{
+    .where = STARPU_CPU,
+    .cpu_funcs = { cpu_func, NULL },
+    .cpu_funcs_name = { "cpu_func", NULL },
+     .nbuffers = 0
+};
+\endcode
+
+We create a codelet which may only be executed on the CPUs. The
+optional field starpu_codelet::where is a bitmask that defines where
+the codelet may be executed. Here, the value ::STARPU_CPU means that
+only CPUs can execute this codelet. When the optional field
+starpu_codelet::where is unset, its value is automatically set based
+on the availability of the different fields <c>XXX_funcs</c>.
+
+TODO: explain starpu_codelet::cpu_funcs_name
+
 \section VectorScalingUsingTheCExtension Vector Scaling Using the C Extension
 \section VectorScalingUsingTheCExtension Vector Scaling Using the C Extension
 
 
 The previous example has shown how to submit tasks. In this section,
 The previous example has shown how to submit tasks. In this section,

+ 34 - 35
include/starpu_deprecated_api.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -27,40 +27,6 @@ extern "C"
 #warning Your application is using former types. You may want to update to use the latest API, by using tools/dev/rename.sh.
 #warning Your application is using former types. You may want to update to use the latest API, by using tools/dev/rename.sh.
 #endif /* defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) */
 #endif /* defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) */
 
 
-#ifdef STARPU_USE_DEPRECATED_API
-typedef starpu_data_handle_t starpu_data_handle;
-typedef struct starpu_block_interface starpu_block_interface_t;
-typedef struct starpu_matrix_interface starpu_matrix_interface_t;
-typedef struct starpu_vector_interface starpu_vector_interface_t;
-typedef struct starpu_variable_interface starpu_variable_interface_t;
-typedef struct starpu_csr_interface starpu_csr_interface_t;
-typedef struct starpu_bcsr_interface starpu_bcsr_interface_t;
-typedef struct starpu_multiformat_interface starpu_multiformat_interface_t;
-#define starpu_machine_topology_s starpu_machine_topology
-#define starpu_htbl32_node_s starpu_htbl32_node
-#define starpu_history_list_t starpu_history_list
-#define starpu_buffer_descr_t starpu_buffer_descr
-#define starpu_history_list_t starpu_history_list
-#define starpu_regression_model_t starpu_regression_model
-#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel
-#define starpu_perfmodel_t starpu_perfmodel
-#define starpu_sched_policy_s starpu_sched_policy
-#define starpu_data_interface_ops_t starpu_data_interface_ops
-
-typedef struct starpu_buffer_descr starpu_buffer_descr;
-typedef struct starpu_codelet starpu_codelet;
-typedef struct starpu_codelet starpu_codelet_t;
-typedef enum starpu_access_mode starpu_access_mode;
-
-#define starpu_print_bus_bandwidth     starpu_bus_print_bandwidth
-#define starpu_get_handle_interface_id starpu_handle_get_interface_id
-#define starpu_get_current_task        starpu_task_get_current
-#define starpu_unpack_cl_args          starpu_codelet_unpack_args
-#define starpu_pack_cl_args   	       starpu_codelet_pack_args
-#define starpu_task_deinit	       starpu_task_clean
-
-#endif /* STARPU_USE_DEPRECATED_API */
-
 #ifdef STARPU_USE_DEPRECATED_ONE_ZERO_API
 #ifdef STARPU_USE_DEPRECATED_ONE_ZERO_API
 
 
 #define starpu_allocate_buffer_on_node	starpu_malloc_on_node
 #define starpu_allocate_buffer_on_node	starpu_malloc_on_node
@@ -116,6 +82,39 @@ typedef enum starpu_access_mode starpu_access_mode;
 
 
 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
 
 
+#ifdef STARPU_USE_DEPRECATED_API
+typedef starpu_data_handle_t starpu_data_handle;
+typedef struct starpu_block_interface starpu_block_interface_t;
+typedef struct starpu_matrix_interface starpu_matrix_interface_t;
+typedef struct starpu_vector_interface starpu_vector_interface_t;
+typedef struct starpu_variable_interface starpu_variable_interface_t;
+typedef struct starpu_csr_interface starpu_csr_interface_t;
+typedef struct starpu_bcsr_interface starpu_bcsr_interface_t;
+typedef struct starpu_multiformat_interface starpu_multiformat_interface_t;
+#define starpu_machine_topology_s starpu_machine_topology
+#define starpu_htbl32_node_s starpu_htbl32_node
+#define starpu_history_list_t starpu_history_list
+#define starpu_buffer_descr_t starpu_buffer_descr
+#define starpu_regression_model_t starpu_regression_model
+#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel
+#define starpu_perfmodel_t starpu_perfmodel
+#define starpu_sched_policy_s starpu_sched_policy
+#define starpu_data_interface_ops_t starpu_data_interface_ops
+
+typedef struct starpu_buffer_descr starpu_buffer_descr;
+typedef struct starpu_codelet starpu_codelet;
+typedef struct starpu_codelet starpu_codelet_t;
+typedef enum starpu_access_mode starpu_access_mode;
+
+#define starpu_print_bus_bandwidth     starpu_bus_print_bandwidth
+#define starpu_get_handle_interface_id starpu_handle_get_interface_id
+#define starpu_get_current_task        starpu_task_get_current
+#define starpu_unpack_cl_args          starpu_codelet_unpack_args
+#define starpu_pack_cl_args   	       starpu_codelet_pack_args
+#define starpu_task_deinit	       starpu_task_clean
+
+#endif /* STARPU_USE_DEPRECATED_API */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 11 - 7
include/starpu_thread.h

@@ -83,9 +83,10 @@ typedef pthread_mutexattr_t starpu_pthread_mutexattr_t;
 
 
 #define starpu_pthread_mutex_init pthread_mutex_init
 #define starpu_pthread_mutex_init pthread_mutex_init
 #define starpu_pthread_mutex_destroy pthread_mutex_destroy
 #define starpu_pthread_mutex_destroy pthread_mutex_destroy
-#define starpu_pthread_mutex_lock pthread_mutex_lock
-#define starpu_pthread_mutex_unlock pthread_mutex_unlock
-#define starpu_pthread_mutex_trylock pthread_mutex_trylock
+
+int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex);
+int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex);
+int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex);
 
 
 #define STARPU_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
 #define STARPU_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
 
 
@@ -139,7 +140,9 @@ typedef pthread_condattr_t starpu_pthread_condattr_t;
 #define starpu_pthread_cond_init pthread_cond_init
 #define starpu_pthread_cond_init pthread_cond_init
 #define starpu_pthread_cond_signal pthread_cond_signal
 #define starpu_pthread_cond_signal pthread_cond_signal
 #define starpu_pthread_cond_broadcast pthread_cond_broadcast
 #define starpu_pthread_cond_broadcast pthread_cond_broadcast
-#define starpu_pthread_cond_wait pthread_cond_wait
+
+int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
+
 #define starpu_pthread_cond_timedwait pthread_cond_timedwait
 #define starpu_pthread_cond_timedwait pthread_cond_timedwait
 #define starpu_pthread_cond_destroy pthread_cond_destroy
 #define starpu_pthread_cond_destroy pthread_cond_destroy
 
 
@@ -167,9 +170,10 @@ typedef pthread_rwlockattr_t starpu_pthread_rwlockattr_t;
 
 
 #define starpu_pthread_rwlock_init pthread_rwlock_init
 #define starpu_pthread_rwlock_init pthread_rwlock_init
 #define starpu_pthread_rwlock_destroy pthread_rwlock_destroy
 #define starpu_pthread_rwlock_destroy pthread_rwlock_destroy
-#define starpu_pthread_rwlock_rdlock pthread_rwlock_rdlock
-#define starpu_pthread_rwlock_wrlock pthread_rwlock_wrlock
-#define starpu_pthread_rwlock_unlock pthread_rwlock_unlock
+
+int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock);
+int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock);
+int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock);
 
 
 #endif /* STARPU_SIMGRID, _MSC_VER */
 #endif /* STARPU_SIMGRID, _MSC_VER */
 
 

+ 2 - 2
libstarpu.pc.in

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
+# Copyright (C) 2009, 2010, 2011, 2013  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -22,7 +22,7 @@ includedir=@includedir@
 Name: starpu
 Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
 Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
 Libs.private: @LDFLAGS@ @LIBS@
 Libs.private: @LDFLAGS@ @LIBS@
 Requires: @HWLOC_REQUIRES@
 Requires: @HWLOC_REQUIRES@

+ 11 - 8
sc_hypervisor/src/policies_utils/speed.c

@@ -64,7 +64,7 @@ double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w,
                 double curr_time = starpu_timing_now();
                 double curr_time = starpu_timing_now();
                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
 		elapsed_time -= sc_w->idle_time[worker];
 		elapsed_time -= sc_w->idle_time[worker];
-		sc_w->idle_time[worker] = 0.0;
+		
 
 
 /* 		size_t elapsed_data_used = sc_w->elapsed_data[worker]; */
 /* 		size_t elapsed_data_used = sc_w->elapsed_data[worker]; */
 /*  		enum starpu_worker_archtype arch = starpu_worker_get_type(worker); */
 /*  		enum starpu_worker_archtype arch = starpu_worker_get_type(worker); */
@@ -84,7 +84,6 @@ double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w,
 /* 		} */
 /* 		} */
 			
 			
                 double vel  = (elapsed_flops/elapsed_time);/* in Gflops/s */
                 double vel  = (elapsed_flops/elapsed_time);/* in Gflops/s */
-//		printf("%d in ctx %d: vel %lf\n", worker, sc_w->sched_ctx, vel);
 		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 1.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
 		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 1.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
                 return vel;
                 return vel;
         }
         }
@@ -143,12 +142,9 @@ double sc_hypervisor_get_ref_velocity_per_worker_type(struct sc_hypervisor_wrapp
                 enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
                 enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
                 if(arch == req_arch)
                 if(arch == req_arch)
                 {
                 {
-		
-			if(sc_w->ref_velocity[worker] > 1.0)
-			{
-				ref_velocity += sc_w->ref_velocity[worker];
-				nw++;
-			}
+			if(sc_w->ref_velocity[worker] < 1.0) return -1.0;
+			ref_velocity += sc_w->ref_velocity[worker];
+			nw++;
 		}
 		}
 	}
 	}
 	
 	
@@ -159,10 +155,17 @@ double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starp
 {
 {
 
 
 	double velocity = sc_hypervisor_get_velocity_per_worker_type(sc_w, arch);
 	double velocity = sc_hypervisor_get_velocity_per_worker_type(sc_w, arch);
+	printf("arch %d vel %lf\n", arch, velocity);
 	if(velocity == -1.0)
 	if(velocity == -1.0)
+	{
 		velocity = sc_hypervisor_get_ref_velocity_per_worker_type(sc_w, arch);
 		velocity = sc_hypervisor_get_ref_velocity_per_worker_type(sc_w, arch);
+		printf("arch %d ref_vel %lf\n", arch, velocity);
+	}
 	if(velocity == -1.0)
 	if(velocity == -1.0)
+	{
 		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
 		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
+		printf("arch %d default_vel %lf\n", arch, velocity);
+	}
        
        
 	return velocity;
 	return velocity;
 }
 }

+ 12 - 0
sc_hypervisor/src/sc_hypervisor.c

@@ -440,6 +440,16 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 	return ret_val;
 	return ret_val;
 }
 }
 
 
+static void _reset_idle_time(unsigned sched_ctx)
+{
+	int i;
+	for(i = 0; i < STARPU_NMAXWORKERS; i++)
+	{
+		hypervisor.sched_ctx_w[sched_ctx].idle_time[i] = 0.0;
+	}
+	return;
+}
+
 void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
 void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
 {
 {
 	/* info concerning only the gflops_rate strateg */
 	/* info concerning only the gflops_rate strateg */
@@ -449,9 +459,11 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 	double start_time =  starpu_timing_now();
 	double start_time =  starpu_timing_now();
 	sender_sc_w->start_time = start_time;
 	sender_sc_w->start_time = start_time;
 	_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
 	_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
+	_reset_idle_time(sender_sched_ctx);
 
 
 	receiver_sc_w->start_time = start_time;
 	receiver_sc_w->start_time = start_time;
 	_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
 	_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
+	_reset_idle_time(receiver_sched_ctx);
 }
 }
 
 
 /* actually move the workers: the cpus are moved, gpus are only shared  */
 /* actually move the workers: the cpus are moved, gpus are only shared  */

+ 155 - 6
src/common/fxt.h

@@ -105,6 +105,36 @@
 
 
 #define _STARPU_FUT_TASK_WAIT_FOR_ALL	0x513b
 #define _STARPU_FUT_TASK_WAIT_FOR_ALL	0x513b
 
 
+#define _STARPU_FUT_LOCKING_MUTEX	0x5140	
+#define _STARPU_FUT_MUTEX_LOCKED	0x5141	
+
+#define _STARPU_FUT_UNLOCKING_MUTEX		0x5142	
+#define _STARPU_FUT_MUTEX_UNLOCKED		0x5143	
+
+#define _STARPU_FUT_TRYLOCK_MUTEX		0x5144	
+
+#define _STARPU_FUT_RDLOCKING_RWLOCK	0x5145	
+#define _STARPU_FUT_RWLOCK_RDLOCKED		0x5146	
+
+#define _STARPU_FUT_WRLOCKING_RWLOCK	0x5147	
+#define _STARPU_FUT_RWLOCK_WRLOCKED		0x5148	
+
+#define _STARPU_FUT_UNLOCKING_RWLOCK	0x5149	
+#define _STARPU_FUT_RWLOCK_UNLOCKED		0x514a	
+
+#define _STARPU_FUT_LOCKING_SPINLOCK	0x514b	
+#define _STARPU_FUT_SPINLOCK_LOCKED		0x514c	
+
+#define _STARPU_FUT_UNLOCKING_SPINLOCK		0x514d	
+#define _STARPU_FUT_SPINLOCK_UNLOCKED		0x514e	
+
+#define _STARPU_FUT_TRYLOCK_SPINLOCK		0x514f	
+
+#define _STARPU_FUT_COND_WAIT_BEGIN		0x5150
+#define _STARPU_FUT_COND_WAIT_END		0x5151
+
+#define _STARPU_FUT_MEMORY_FULL			0x5152
+
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 #include <fxt/fxt.h>
 #include <fxt/fxt.h>
 #include <fxt/fut.h>
 #include <fxt/fut.h>
@@ -136,6 +166,24 @@ void _starpu_fxt_register_thread(unsigned);
 /* Sometimes we need something a little more specific than the wrappers from
 /* Sometimes we need something a little more specific than the wrappers from
  * FxT: these macro permit to put add an event with 3 (or 4) numbers followed
  * FxT: these macro permit to put add an event with 3 (or 4) numbers followed
  * by a string. */
  * by a string. */
+#define _STARPU_FUT_DO_PROBE2STR(CODE, P1, P2, str)			\
+do {									\
+    if(fut_active) {							\
+	/* No more than FXT_MAX_PARAMS args are allowed */		\
+	/* we add a \0 just in case ... */				\
+	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\
+	unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\
+	unsigned nbargs = 2 + nbargs_str;				\
+	size_t total_len = FUT_SIZE(nbargs);				\
+	unsigned long *futargs =					\
+		fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\
+	*(futargs++) = (unsigned long)(P1);				\
+	*(futargs++) = (unsigned long)(P2);				\
+	snprintf((char *)futargs, len, "%s", str);			\
+	((char *)futargs)[len - 1] = '\0';				\
+    }									\
+} while (0);
+
 #define _STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)			\
 #define _STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)			\
 do {									\
 do {									\
     if(fut_active) {							\
     if(fut_active) {							\
@@ -341,11 +389,11 @@ do {										\
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
 	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
 	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
 	
 	
-#define _STARPU_TRACE_START_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, _starpu_gettid());
+#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
+	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	
 	
-#define _STARPU_TRACE_END_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, _starpu_gettid());
+#define _STARPU_TRACE_END_MEMRECLAIM(memnode, is_prefetch)		\
+	FUT_DO_PROBE3(_STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	
 	
 /* We skip these events becasue they are called so often that they cause FxT to
 /* We skip these events becasue they are called so often that they cause FxT to
  * fail and make the overall trace unreadable anyway. */
  * fail and make the overall trace unreadable anyway. */
@@ -366,6 +414,88 @@ do {										\
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
 
 
+#ifdef STARPU_FXT_LOCK_TRACES 
+
+#define _STARPU_TRACE_LOCKING_MUTEX(file,line)	\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_MUTEX,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_MUTEX_LOCKED(file,line)			\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_MUTEX_LOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_UNLOCKING_MUTEX(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_MUTEX,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_MUTEX_UNLOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_MUTEX_UNLOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_TRYLOCK_MUTEX(file,line)			\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_MUTEX,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_RDLOCKING_RWLOCK(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RDLOCKING_RWLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_RWLOCK_RDLOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_RDLOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_WRLOCKING_RWLOCK(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_WRLOCKING_RWLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_RWLOCK_WRLOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_WRLOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_UNLOCKING_RWLOCK(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_RWLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_RWLOCK_UNLOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_UNLOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_LOCKING_SPINLOCK(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_SPINLOCK_LOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file,line)	\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_COND_WAIT_BEGIN(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_COND_WAIT_BEGIN,line,_starpu_gettid(),file);
+
+#define _STARPU_TRACE_COND_WAIT_END(file,line)		\
+	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_COND_WAIT_END,line,_starpu_gettid(),file);
+
+#else // !STARPU_FXT_LOCK_TRACES
+
+#define _STARPU_TRACE_LOCKING_MUTEX(file,line)			do {} while(0)
+#define _STARPU_TRACE_MUTEX_LOCKED(file,line)			do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_MUTEX(file,line)		do {} while(0)
+#define _STARPU_TRACE_MUTEX_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_TRYLOCK_MUTEX(file,line)			do {} while(0)
+#define _STARPU_TRACE_RDLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_RDLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_WRLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_WRLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_LOCKING_SPINLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_SPINLOCK_LOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file,line)	do {} while(0)
+#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_COND_WAIT_BEGIN(file,line)		do {} while(0)
+#define _STARPU_TRACE_COND_WAIT_END(file,line)			do {} while(0)
+
+#endif // STARPU_FXT_LOCK_TRACES
+
+#define _STARPU_TRACE_MEMORY_FULL(size)	\
+	FUT_DO_PROBE2(_STARPU_FUT_MEMORY_FULL,size,_starpu_gettid());
+
 #else // !STARPU_USE_FXT
 #else // !STARPU_USE_FXT
 
 
 /* Dummy macros in case FxT is disabled */
 /* Dummy macros in case FxT is disabled */
@@ -405,13 +535,32 @@ do {										\
 #define _STARPU_TRACE_END_ALLOC(memnode)		do {} while(0)
 #define _STARPU_TRACE_END_ALLOC(memnode)		do {} while(0)
 #define _STARPU_TRACE_START_ALLOC_REUSE(a)	do {} while(0)
 #define _STARPU_TRACE_START_ALLOC_REUSE(a)	do {} while(0)
 #define _STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0)
 #define _STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0)
-#define _STARPU_TRACE_START_MEMRECLAIM(memnode)	do {} while(0)
-#define _STARPU_TRACE_END_MEMRECLAIM(memnode)	do {} while(0)
+#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
+#define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0)
 #define _STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0)
 #define _STARPU_TRACE_END_PROGRESS(memnode)	do {} while(0)
 #define _STARPU_TRACE_END_PROGRESS(memnode)	do {} while(0)
 #define _STARPU_TRACE_USER_EVENT(code)		do {} while(0)
 #define _STARPU_TRACE_USER_EVENT(code)		do {} while(0)
 #define _STARPU_TRACE_SET_PROFILING(status)	do {} while(0)
 #define _STARPU_TRACE_SET_PROFILING(status)	do {} while(0)
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL		do {} while(0)
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL		do {} while(0)
+#define _STARPU_TRACE_LOCKING_MUTEX(file,line)			do {} while(0)
+#define _STARPU_TRACE_MUTEX_LOCKED(file,line)			do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_MUTEX(file,line)		do {} while(0)
+#define _STARPU_TRACE_MUTEX_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_TRYLOCK_MUTEX(file,line)			do {} while(0)
+#define _STARPU_TRACE_RDLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_RDLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_WRLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_WRLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_RWLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_RWLOCK_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_LOCKING_SPINLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_SPINLOCK_LOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file,line)	do {} while(0)
+#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file,line)		do {} while(0)
+#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file,line)		do {} while(0)
+#define _STARPU_TRACE_COND_WAIT_BEGIN(file,line)		do {} while(0)
+#define _STARPU_TRACE_COND_WAIT_END(file,line)			do {} while(0)
+#define _STARPU_TRACE_MEMORY_FULL(size)				do {} while(0)
 
 
 #endif // STARPU_USE_FXT
 #endif // STARPU_USE_FXT
 
 

+ 2 - 0
src/common/starpu_spinlock.c

@@ -18,6 +18,7 @@
 #include <common/starpu_spinlock.h>
 #include <common/starpu_spinlock.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/utils.h>
+#include <common/fxt.h>
 #include <starpu_util.h>
 #include <starpu_util.h>
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
@@ -147,6 +148,7 @@ int _starpu_spin_trylock(struct _starpu_spinlock *lock)
 #endif
 #endif
 }
 }
 
 
+#undef _starpu_spin_unlock
 int _starpu_spin_unlock(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED)
 int _starpu_spin_unlock(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID

+ 52 - 7
src/common/starpu_spinlock.h

@@ -41,27 +41,72 @@ struct _starpu_spinlock
 #endif
 #endif
 };
 };
 
 
+#ifdef STARPU_SPINLOCK_CHECK 
+#define STARPU_RECORD_LOCK(lock) do { 	\
+	(lock)->last_taker = __starpu_func__; \
+} while(0) 
+#else // !STARPU_SPINLOCK_CHECK
+#define STARPU_RECORD_LOCK(lock) do {} while(0)
+#endif // STARPU_SPINLOCK_CHECK
+
 int _starpu_spin_init(struct _starpu_spinlock *lock);
 int _starpu_spin_init(struct _starpu_spinlock *lock);
 int _starpu_spin_destroy(struct _starpu_spinlock *lock);
 int _starpu_spin_destroy(struct _starpu_spinlock *lock);
 
 
 int _starpu_spin_lock(struct _starpu_spinlock *lock);
 int _starpu_spin_lock(struct _starpu_spinlock *lock);
-#if defined(STARPU_SPINLOCK_CHECK)
 #define _starpu_spin_lock(lock) ({ \
 #define _starpu_spin_lock(lock) ({ \
+	const char *file;   \
+	if (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) \
+	{ \
+		file = strrchr(__FILE__,'/'); \
+		file += sizeof(char);\
+		_STARPU_TRACE_LOCKING_SPINLOCK(file,__LINE__); \
+	}\
 	_starpu_spin_lock(lock); \
 	_starpu_spin_lock(lock); \
-	(lock)->last_taker = __starpu_func__; \
+	if (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) \
+	{ \
+		file = strrchr(__FILE__,'/'); \
+		file += sizeof(char);\
+		_STARPU_TRACE_SPINLOCK_LOCKED(file,__LINE__); \
+	}\
+	STARPU_RECORD_LOCK(lock); \
 	0; \
 	0; \
-})
-#endif
+}) 
+
 int _starpu_spin_trylock(struct _starpu_spinlock *lock);
 int _starpu_spin_trylock(struct _starpu_spinlock *lock);
-#if defined(STARPU_SPINLOCK_CHECK)
 #define _starpu_spin_trylock(lock) ({ \
 #define _starpu_spin_trylock(lock) ({ \
+	const char *file;   \
+	if (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) \
+	{ \
+		file = strrchr(__FILE__,'/'); \
+		file += sizeof(char);\
+		_STARPU_TRACE_TRYLOCK_SPINLOCK(file,__LINE__); \
+	}\
 	int err = _starpu_spin_trylock(lock); \
 	int err = _starpu_spin_trylock(lock); \
 	if (!err) \
 	if (!err) \
-		(lock)->last_taker = __starpu_func__; \
+		STARPU_RECORD_LOCK(lock); \
 	err; \
 	err; \
 })
 })
-#endif
 int _starpu_spin_checklocked(struct _starpu_spinlock *lock);
 int _starpu_spin_checklocked(struct _starpu_spinlock *lock);
 int _starpu_spin_unlock(struct _starpu_spinlock *lock);
 int _starpu_spin_unlock(struct _starpu_spinlock *lock);
+#define _starpu_spin_unlock(lock) ({ \
+	const char *file;   \
+	if (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) \
+	{ \
+		file = strrchr(__FILE__,'/'); \
+		file += sizeof(char);\
+		_STARPU_TRACE_UNLOCKING_SPINLOCK(file,__LINE__); \
+	}\
+	_starpu_spin_unlock(lock); \
+	if (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) \
+	{ \
+		file = strrchr(__FILE__,'/'); \
+		file += sizeof(char);\
+		_STARPU_TRACE_SPINLOCK_UNLOCKED(file,__LINE__); \
+	}\
+	0; \
+}) 
+
+
+#define STARPU_SPIN_MAXTRY 10 
 
 
 #endif // __STARPU_SPINLOCK_H__
 #endif // __STARPU_SPINLOCK_H__

+ 179 - 4
src/common/thread.c

@@ -84,19 +84,44 @@ int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex)
 
 
 int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex)
 {
 {
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_LOCKING_MUTEX(file,__LINE__); 
+
 	if (!*mutex) STARPU_PTHREAD_MUTEX_INIT(mutex, NULL);
 	if (!*mutex) STARPU_PTHREAD_MUTEX_INIT(mutex, NULL);
+	
 	xbt_mutex_acquire(*mutex);
 	xbt_mutex_acquire(*mutex);
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_MUTEX_LOCKED(file,__LINE__); 
+	
 	return 0;
 	return 0;
 }
 }
 
 
 int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex)
 {
 {
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_UNLOCKING_MUTEX(file,__LINE__); 
+
 	xbt_mutex_release(*mutex);
 	xbt_mutex_release(*mutex);
+	
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_MUTEX_UNLOCKED(file,__LINE__); 
+	
 	return 0;
 	return 0;
 }
 }
 
 
 int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
 {
 {
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_TRYLOCK_MUTEX(file,__LINE__); 
+
 	xbt_mutex_acquire(*mutex);
 	xbt_mutex_acquire(*mutex);
 	return 0;
 	return 0;
 }
 }
@@ -162,9 +187,19 @@ int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond)
 
 
 int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
 int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
 {
 {
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_COND_WAIT_BEGIN(file,__LINE__);			
+
 	if (!*cond)
 	if (!*cond)
 		STARPU_PTHREAD_COND_INIT(cond, NULL);
 		STARPU_PTHREAD_COND_INIT(cond, NULL);
 	xbt_cond_wait(*cond, *mutex);
 	xbt_cond_wait(*cond, *mutex);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_COND_WAIT_END(file,__LINE__);			
+	
 	return 0;
 	return 0;
 }
 }
 
 
@@ -187,19 +222,159 @@ int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock)
 
 
 int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock)
 int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock)
 {
 {
-	return starpu_pthread_mutex_lock(rwlock);
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RDLOCKING_RWLOCK(file,__LINE__);			
+
+ 	int p_ret = starpu_pthread_mutex_lock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_RDLOCKED(file,__LINE__);			
+	
+	return p_ret;
 }
 }
 
 
 int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock)
 int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock)
 {
 {
-	return starpu_pthread_mutex_lock(rwlock);
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_WRLOCKING_RWLOCK(file,__LINE__);			
+
+ 	int p_ret = starpu_pthread_mutex_lock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_WRLOCKED(file,__LINE__);			
+	
+	return p_ret;
 }
 }
 
 
 int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock)
 int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock)
 {
 {
-	return starpu_pthread_mutex_unlock(rwlock);
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_UNLOCKING_RWLOCK(file,__LINE__);			
+	
+ 	int p_ret = starpu_pthread_mutex_unlock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_UNLOCKED(file,__LINE__);			
+	
+	return p_ret;
 }
 }
 
 
+#elif !defined(_MSC_VER) /* !STARPU_SIMGRID */
+
+int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex)
+{
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_LOCKING_MUTEX(file,__LINE__); 
+
+	pthread_mutex_lock(mutex);
+
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_MUTEX_LOCKED(file,__LINE__); 
+
+	return 0;
+}
+
+int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex)
+{
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_UNLOCKING_MUTEX(file,__LINE__); 
+
+	pthread_mutex_unlock(mutex);
+
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_MUTEX_UNLOCKED(file,__LINE__); 
+
+	return 0;
+}
+
+int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
+{
+	const char *file;   
+	file = strrchr(__FILE__,'/'); 
+	file += sizeof(char);
+	_STARPU_TRACE_LOCKING_MUTEX(file,__LINE__); 
 
 
+	pthread_mutex_trylock(mutex);
+	return 0;
+}
+
+int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
+{
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_COND_WAIT_BEGIN(file,__LINE__);			
+
+ 	pthread_cond_wait(cond, mutex);
+
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_COND_WAIT_END(file,__LINE__);			
+
+	return 0;
+}
+
+int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock)
+{
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RDLOCKING_RWLOCK(file,__LINE__);			
+
+ 	int p_ret = pthread_rwlock_rdlock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_RDLOCKED(file,__LINE__);			
+
+	return p_ret;
+}
+
+int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock)
+{
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_WRLOCKING_RWLOCK(file,__LINE__);			
+
+ 	int p_ret = pthread_rwlock_wrlock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_WRLOCKED(file,__LINE__);			
+	
+	return p_ret;
+}
+
+int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock)
+{
+	const char* file;													
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_UNLOCKING_RWLOCK(file,__LINE__);			
+
+ 	int p_ret = pthread_rwlock_unlock(rwlock);
+	
+	file = strrchr(__FILE__,'/');							
+	file += sizeof(char);										
+	_STARPU_TRACE_RWLOCK_UNLOCKED(file,__LINE__);			
+	
+	return p_ret;
+}
 
 
-#endif /* STARPU_SIMGRID */
+#endif /* STARPU_SIMGRID, _MSC_VER */

+ 1 - 0
src/common/thread.h

@@ -19,6 +19,7 @@
 #define __COMMON_THREAD_H__
 #define __COMMON_THREAD_H__
 
 
 #include <starpu.h>
 #include <starpu.h>
+#include <common/fxt.h>
 
 
 #define _starpu_pthread_barrier_t pthread_barrier_t
 #define _starpu_pthread_barrier_t pthread_barrier_t
 
 

+ 7 - 1
src/core/dependencies/data_concurrency.c

@@ -92,8 +92,14 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 	 * lock to be available. */
 	 * lock to be available. */
 	if (request_from_codelet)
 	if (request_from_codelet)
 	{
 	{
-		while (_starpu_spin_trylock(&handle->header_lock))
+		int cpt = 0;
+		while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+		{
+			cpt++;
 			_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
 			_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
+		}
+		if (cpt == STARPU_SPIN_MAXTRY)
+			_starpu_spin_lock(&handle->header_lock);
 	}
 	}
 	else
 	else
 	{
 	{

+ 14 - 2
src/datawizard/coherency.c

@@ -494,8 +494,14 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_
 	unsigned local_node = _starpu_memory_node_get_local_key();
 	unsigned local_node = _starpu_memory_node_get_local_key();
         _STARPU_LOG_IN();
         _STARPU_LOG_IN();
 
 
-	while (_starpu_spin_trylock(&handle->header_lock))
+	int cpt = 0;
+	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+	{
+		cpt++;
 		_starpu_datawizard_progress(local_node, 1);
 		_starpu_datawizard_progress(local_node, 1);
+	}
+	if (cpt == STARPU_SPIN_MAXTRY)
+		_starpu_spin_lock(&handle->header_lock);
 
 
 	if (!detached)
 	if (!detached)
 	{
 	{
@@ -565,8 +571,14 @@ void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_
 		_starpu_write_through_data(handle, memory_node, wt_mask);
 		_starpu_write_through_data(handle, memory_node, wt_mask);
 
 
 	unsigned local_node = _starpu_memory_node_get_local_key();
 	unsigned local_node = _starpu_memory_node_get_local_key();
-	while (_starpu_spin_trylock(&handle->header_lock))
+	int cpt = 0;
+	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+	{
+		cpt++;
 		_starpu_datawizard_progress(local_node, 1);
 		_starpu_datawizard_progress(local_node, 1);
+	}
+	if (cpt == STARPU_SPIN_MAXTRY)
+		_starpu_spin_lock(&handle->header_lock);
 
 
 	/* Release refcnt taken by fetch_data_on_node */
 	/* Release refcnt taken by fetch_data_on_node */
 	replicate->refcnt--;
 	replicate->refcnt--;

+ 7 - 2
src/datawizard/malloc.c

@@ -19,6 +19,7 @@
 
 
 #include <core/workers.h>
 #include <core/workers.h>
 #include <common/config.h>
 #include <common/config.h>
+#include <common/fxt.h>
 #include <starpu.h>
 #include <starpu.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
@@ -94,9 +95,9 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 			size_t freed;
 			size_t freed;
 			size_t reclaim = 2 * dim;
 			size_t reclaim = 2 * dim;
 			_STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", reclaim);
 			_STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", reclaim);
-			_STARPU_TRACE_START_MEMRECLAIM(0);
+			_STARPU_TRACE_START_MEMRECLAIM(0,0);
 			freed = _starpu_memory_reclaim_generic(0, 0, reclaim);
 			freed = _starpu_memory_reclaim_generic(0, 0, reclaim);
-			_STARPU_TRACE_END_MEMRECLAIM(0);
+			_STARPU_TRACE_END_MEMRECLAIM(0,0);
 			if (freed < dim)
 			if (freed < dim)
 			{
 			{
 				// We could not reclaim enough memory
 				// We could not reclaim enough memory
@@ -438,6 +439,10 @@ starpu_malloc_on_node(unsigned dst_node, size_t size)
 	if (addr == 0)
 	if (addr == 0)
 	{
 	{
 		// Allocation failed, gives the memory back to the memory manager
 		// Allocation failed, gives the memory back to the memory manager
+		const char* file;					
+		file = strrchr(__FILE__,'/');							
+		file += sizeof(char);										
+		_STARPU_TRACE_MEMORY_FULL(size);
 		_starpu_memory_manager_deallocate_size(size, dst_node);
 		_starpu_memory_manager_deallocate_size(size, dst_node);
 	}
 	}
 	return addr;
 	return addr;

+ 10 - 4
src/datawizard/memalloc.c

@@ -853,17 +853,23 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			handle->busy_count++;
 			handle->busy_count++;
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
 
 
-			_STARPU_TRACE_START_MEMRECLAIM(dst_node);
+			_STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
 			if (is_prefetch)
 			if (is_prefetch)
 			{
 			{
 				flush_memchunk_cache(dst_node, reclaim);
 				flush_memchunk_cache(dst_node, reclaim);
 			}
 			}
 			else
 			else
 				_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 				_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
-			_STARPU_TRACE_END_MEMRECLAIM(dst_node);
+			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
 
 
-		        while (_starpu_spin_trylock(&handle->header_lock))
-		                _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
+			int cpt = 0;
+			while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+			{
+				cpt++;
+				_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
+			}
+			if (cpt == STARPU_SPIN_MAXTRY)
+				_starpu_spin_lock(&handle->header_lock);
 
 
 			replicate->refcnt--;
 			replicate->refcnt--;
 			STARPU_ASSERT(replicate->refcnt >= 0);
 			STARPU_ASSERT(replicate->refcnt >= 0);

+ 7 - 1
src/datawizard/write_back.c

@@ -46,8 +46,14 @@ void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting
 			/* we need to commit the buffer on that node */
 			/* we need to commit the buffer on that node */
 			if (node != requesting_node)
 			if (node != requesting_node)
 			{
 			{
-				while (_starpu_spin_trylock(&handle->header_lock))
+				int cpt = 0;
+				while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+				{
+					cpt++;
 					_starpu_datawizard_progress(requesting_node, 1);
 					_starpu_datawizard_progress(requesting_node, 1);
+				}
+				if (cpt == STARPU_SPIN_MAXTRY)
+					_starpu_spin_lock(&handle->header_lock);
 
 
 				/* We need to keep a Read lock to avoid letting writers corrupt our copy.  */
 				/* We need to keep a Read lock to avoid letting writers corrupt our copy.  */
 				STARPU_ASSERT(handle->current_mode != STARPU_REDUX);
 				STARPU_ASSERT(handle->current_mode != STARPU_REDUX);

+ 57 - 0
src/debug/traces/starpu_fxt.c

@@ -1549,6 +1549,63 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 				handle_task_wait_for_all();
 				handle_task_wait_for_all();
 				break;
 				break;
 
 
+			case _STARPU_FUT_LOCKING_MUTEX:
+				break;
+
+			case _STARPU_FUT_MUTEX_LOCKED:
+				break;
+
+			case _STARPU_FUT_UNLOCKING_MUTEX:
+				break;
+
+			case _STARPU_FUT_MUTEX_UNLOCKED:
+				break;
+
+			case _STARPU_FUT_TRYLOCK_MUTEX:
+				break;
+
+			case _STARPU_FUT_RDLOCKING_RWLOCK:
+				break;
+
+			case _STARPU_FUT_RWLOCK_RDLOCKED:
+				break;
+
+			case _STARPU_FUT_WRLOCKING_RWLOCK:
+				break;
+
+			case _STARPU_FUT_RWLOCK_WRLOCKED:
+				break;
+
+			case _STARPU_FUT_UNLOCKING_RWLOCK:
+				break;
+
+			case _STARPU_FUT_RWLOCK_UNLOCKED:
+				break;
+
+			case _STARPU_FUT_LOCKING_SPINLOCK:
+				break;
+
+			case _STARPU_FUT_SPINLOCK_LOCKED:
+				break;
+
+			case _STARPU_FUT_UNLOCKING_SPINLOCK:
+				break;
+
+			case _STARPU_FUT_SPINLOCK_UNLOCKED:
+				break;
+
+			case _STARPU_FUT_TRYLOCK_SPINLOCK:
+				break;
+
+			case _STARPU_FUT_COND_WAIT_BEGIN:
+				break;
+
+			case _STARPU_FUT_COND_WAIT_END:
+				break;
+
+			case _STARPU_FUT_MEMORY_FULL:
+				break;
+
 			default:
 			default:
 #ifdef STARPU_VERBOSE
 #ifdef STARPU_VERBOSE
 				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
 				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",

+ 2 - 2
src/profiling/profiling_helpers.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2013  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -83,7 +83,7 @@ void starpu_profiling_worker_helper_display_summary(void)
 				overall_time = total_time;
 				overall_time = total_time;
 
 
 			fprintf(stderr, "%-32s\n", name);
 			fprintf(stderr, "%-32s\n", name);
-			fprintf(stderr, "\t%d task(s)\n\ttotal: %.2lf ms executing: %.2lf ms sleeping: %.2lf\n", info.executed_tasks, total_time, executing_time, sleeping_time);
+			fprintf(stderr, "\t%d task(s)\n\ttotal: %.2lf ms executing: %.2lf ms sleeping: %.2lf ms overhead %.2lf ms\n", info.executed_tasks, total_time, executing_time, sleeping_time, total_time - executing_time - sleeping_time);
 			if (info.used_cycles || info.stall_cycles)
 			if (info.used_cycles || info.stall_cycles)
 				fprintf(stderr, "\t%lu Mcy %lu Mcy stall\n", info.used_cycles/1000000, info.stall_cycles/1000000);
 				fprintf(stderr, "\t%lu Mcy %lu Mcy stall\n", info.used_cycles/1000000, info.stall_cycles/1000000);
 			if (info.power_consumed)
 			if (info.power_consumed)

+ 24 - 30
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -179,12 +179,11 @@ static struct starpu_task *dmda_pop_ready_task(unsigned sched_ctx_id)
 	task = _starpu_fifo_pop_first_ready_task(fifo, node);
 	task = _starpu_fifo_pop_first_ready_task(fifo, node);
 	if (task)
 	if (task)
 	{
 	{
-		double model = task->predicted;
-
-		if(!isnan(model))
+		double transfer_model = task->predicted_transfer;
+		if(!isnan(transfer_model)) 
 		{
 		{
-			fifo->exp_len -= model;
-			fifo->exp_start = starpu_timing_now() + model;
+			fifo->exp_len -= transfer_model;
+			fifo->exp_start = starpu_timing_now() + transfer_model;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 		}
 		}
 
 
@@ -217,13 +216,17 @@ static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id)
 	task = _starpu_fifo_pop_local_task(fifo);
 	task = _starpu_fifo_pop_local_task(fifo);
 	if (task)
 	if (task)
 	{
 	{
+		double transfer_model = task->predicted_transfer;
 		double model = task->predicted;
 		double model = task->predicted;
-		if(!isnan(model))
+
+		if(!isnan(transfer_model)) 
 		{
 		{
-			fifo->exp_len -= model;
-			fifo->exp_start = starpu_timing_now() + model;
+			fifo->exp_len -= transfer_model;
+			fifo->exp_start = starpu_timing_now() + transfer_model+model;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 		}
 		}
+
+
 		  
 		  
 #ifdef STARPU_VERBOSE
 #ifdef STARPU_VERBOSE
 		if (task->cl)
 		if (task->cl)
@@ -257,11 +260,11 @@ static struct starpu_task *dmda_pop_every_task(unsigned sched_ctx_id)
 	STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	while (new_list)
 	while (new_list)
 	{
 	{
-		double model = new_list->predicted;
-		if(!isnan(model)) 
+		double transfer_model = new_list->predicted_transfer;
+		if(!isnan(transfer_model)) 
 		{
 		{
-			fifo->exp_len -= model;
-			fifo->exp_start = starpu_timing_now() + model;
+			fifo->exp_len -= transfer_model;
+			fifo->exp_start = starpu_timing_now() + transfer_model;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 			fifo->exp_end = fifo->exp_start + fifo->exp_len;
 		}
 		}
 
 
@@ -294,19 +297,19 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 
 
-	if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
+	if ((starpu_timing_now() + predicted_transfer) < fifo->exp_end)
 	{
 	{
 		/* We may hope that the transfer will be finished by
 		/* We may hope that the transfer will be finished by
 		 * the start of the task. */
 		 * the start of the task. */
-		predicted_transfer = 0;
+		predicted_transfer = 0.0;
 	}
 	}
 	else
 	else
 	{
 	{
 		/* The transfer will not be finished by then, take the
 		/* The transfer will not be finished by then, take the
 		 * remainder into account */
 		 * remainder into account */
-		predicted_transfer += starpu_timing_now();
-		predicted_transfer -= fifo->exp_end;
+		predicted_transfer = (starpu_timing_now() + predicted_transfer) - fifo->exp_end;
 	}
 	}
+
 	if(!isnan(predicted_transfer)) 
 	if(!isnan(predicted_transfer)) 
 	{
 	{
 		fifo->exp_end += predicted_transfer;
 		fifo->exp_end += predicted_transfer;
@@ -401,15 +404,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 		enum starpu_perfmodel_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 		enum starpu_perfmodel_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 
 
 		/* Sometimes workers didn't take the tasks as early as we expected */
 		/* Sometimes workers didn't take the tasks as early as we expected */
-		starpu_pthread_mutex_t *sched_mutex;
-		starpu_pthread_cond_t *sched_cond;
-		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-
-		STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
-		fifo->exp_end = fifo->exp_start + fifo->exp_len;
-		STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
-
+		double exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
 
 
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		{
 		{
@@ -476,7 +471,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 			if (unknown)
 			if (unknown)
 				continue;
 				continue;
 
 
-			exp_end = fifo->exp_start + fifo->exp_len + local_length;
+			exp_end = exp_start + fifo->exp_len + local_length;
 
 
 			if (best == -1 || exp_end < best_exp_end)
 			if (best == -1 || exp_end < best_exp_end)
 			{
 			{
@@ -557,7 +552,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			}
 			}
 
 
 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
-			exp_end[worker_ctx][nimpl] = fifo->exp_start + fifo->exp_len;
+			exp_end[worker_ctx][nimpl] = exp_start + fifo->exp_len;
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
 				max_exp_end = exp_end[worker_ctx][nimpl];
 				max_exp_end = exp_end[worker_ctx][nimpl];
 
 
@@ -579,7 +574,6 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 				if (conversion_time > 0.0)
 				if (conversion_time > 0.0)
 					local_task_length[worker_ctx][nimpl] += conversion_time;
 					local_task_length[worker_ctx][nimpl] += conversion_time;
 			}
 			}
-			
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 
 
 			/*
 			/*
@@ -915,9 +909,9 @@ static void dmda_pre_exec_hook(struct starpu_task *task)
 	/* Once the task is executing, we can update the predicted amount
 	/* Once the task is executing, we can update the predicted amount
 	 * of work. */
 	 * of work. */
 	STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-	if(!isnan(transfer_model))
+	if(!isnan(model))
 	{
 	{
-		fifo->exp_len-= transfer_model;
+		fifo->exp_len-= model;
 		fifo->exp_start = starpu_timing_now() + model;
 		fifo->exp_start = starpu_timing_now() + model;
 		fifo->exp_end= fifo->exp_start + fifo->exp_len;
 		fifo->exp_end= fifo->exp_start + fifo->exp_len;
 	}
 	}

+ 2 - 4
src/sched_policies/node_eager.c

@@ -33,8 +33,6 @@ static void deinitialize_eager_center_policy(unsigned sched_ctx_id)
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 }
 
 
-
-
 struct starpu_sched_policy _starpu_sched_tree_eager_policy =
 struct starpu_sched_policy _starpu_sched_tree_eager_policy =
 {
 {
 	.init_sched = initialize_eager_center_policy,
 	.init_sched = initialize_eager_center_policy,
@@ -43,8 +41,8 @@ struct starpu_sched_policy _starpu_sched_tree_eager_policy =
 	.remove_workers = _starpu_tree_remove_workers,
 	.remove_workers = _starpu_tree_remove_workers,
 	.push_task = _starpu_tree_push_task,
 	.push_task = _starpu_tree_push_task,
 	.pop_task = _starpu_tree_pop_task,
 	.pop_task = _starpu_tree_pop_task,
-	.pre_exec_hook = NULL,
-	.post_exec_hook = NULL,
+	.pre_exec_hook = _starpu_sched_node_worker_pre_exec_hook,
+	.post_exec_hook = _starpu_sched_node_worker_post_exec_hook,
 	.pop_every_task = NULL,//pop_every_task_eager_policy,
 	.pop_every_task = NULL,//pop_every_task_eager_policy,
 	.policy_name = "tree",
 	.policy_name = "tree",
 	.policy_description = "test tree policy"
 	.policy_description = "test tree policy"

+ 8 - 0
src/sched_policies/node_sched.h

@@ -226,4 +226,12 @@ void _starpu_tree_call_init_data(struct _starpu_sched_tree * t);
  */
  */
 int _starpu_sched_node_push_tasks_to_firsts_suitable_parent(struct _starpu_sched_node * node, struct starpu_task_list * list, int sched_ctx_id);
 int _starpu_sched_node_push_tasks_to_firsts_suitable_parent(struct _starpu_sched_node * node, struct starpu_task_list * list, int sched_ctx_id);
 
 
+
+
+
+void _starpu_sched_node_worker_pre_exec_hook(struct starpu_task * task);
+void _starpu_sched_node_worker_post_exec_hook(struct starpu_task * task);
+
+
+
 #endif
 #endif

+ 33 - 0
src/sched_policies/node_worker.c

@@ -706,3 +706,36 @@ int _starpu_sched_node_worker_get_workerid(struct _starpu_sched_node * worker_no
 	STARPU_ASSERT(1 == _starpu_bitmap_cardinal(worker_node->workers));
 	STARPU_ASSERT(1 == _starpu_bitmap_cardinal(worker_node->workers));
 	return _starpu_bitmap_first(worker_node->workers);
 	return _starpu_bitmap_first(worker_node->workers);
 }
 }
+
+
+static struct _starpu_worker_task_list * _worker_get_list(void)
+{
+	int workerid = _starpu_worker_get_id();
+	STARPU_ASSERT(0 <= workeid && workerid < starpu_worker_get_count());
+	struct _starpu_worker_node_data * d = _starpu_sched_node_worker_get(workerid)->data;
+	return d->list;
+}
+
+struct _starpu_worker_task_list
+{
+	double exp_start, exp_len, exp_end;
+	struct _starpu_task_grid *first, *last;
+	unsigned ntasks;
+	starpu_pthread_mutex_t mutex;
+};
+
+void _starpu_sched_node_worker_pre_exec_hook(struct starpu_task * task)
+{
+	if(!isnan(task->predicted))
+	{
+		struct _starpu_worker_task_list * list = _worker_get_list();
+		STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&list->mutex);
+		
+	}
+
+}
+void _starpu_sched_node_worker_post_exec_hook(struct starpu_task * task)
+{
+
+}

+ 1 - 1
tests/Makefile.am

@@ -16,7 +16,7 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 
 AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@
+LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) $(FXT_LIBS) @LIBS@
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(FXT_LDFLAGS)
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(FXT_LDFLAGS)